Skip to content

Commit

Permalink
Add a workflow to check docs URLs
Browse files Browse the repository at this point in the history
Like the workflow to check for necessary redirects to match renamed or
deleted docs pages, this workflow takes the path to a
`gravitational/teleport` clone. It finds mentions of Teleport docs site
paths in each file changed by a PR and ensures that each path has a
corresponding docs page or redirect source.
  • Loading branch information
ptgott committed Nov 11, 2024
1 parent d9e5a50 commit c0bf70c
Show file tree
Hide file tree
Showing 5 changed files with 321 additions and 6 deletions.
7 changes: 5 additions & 2 deletions bot/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ go 1.21
require (
github.com/google/go-github/v37 v37.0.0
github.com/gravitational/trace v1.3.1
github.com/spf13/afero v1.11.0
github.com/stretchr/testify v1.8.4
golang.org/x/oauth2 v0.13.0
golang.org/x/oauth2 v0.15.0
)

require golang.org/x/text v0.14.0 // indirect

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/golang/protobuf v1.5.3 // indirect
Expand All @@ -18,7 +21,7 @@ require (
github.com/sirupsen/logrus v1.9.3 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
golang.org/x/net v0.17.0 // indirect
golang.org/x/net v0.19.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/term v0.15.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
Expand Down
12 changes: 8 additions & 4 deletions bot/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=
github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
Expand Down Expand Up @@ -101,12 +103,12 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY=
golang.org/x/oauth2 v0.13.0/go.mod h1:/JMhi4ZRXAf4HG9LiNmxvk+45+96RUlVThiH8FzNBn0=
golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ=
golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down Expand Up @@ -139,6 +141,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
Expand Down
145 changes: 145 additions & 0 deletions bot/internal/bot/doclinks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package bot

import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path"
"regexp"
"strings"
"text/template"

"github.com/gravitational/trace"
"github.com/spf13/afero"
)

type staleDocURL struct {
Text string
Line int
}

// CheckDocsURLs opens each file changed by a pull request and checks whether
// any URL paths for the Teleport documentation site within the file are
// incorrect. An incorrect URL path is one that does not correspond to either:
// - a docs page in the docs/pages directory within teleportClonePath
// - the source of a redirect named in the docs configuration file at
// docs/config.json within teleportClonePath
func (b *Bot) CheckDocsURLs(ctx context.Context, teleportClonePath string) error {
if teleportClonePath == "" {
return trace.Wrap(errors.New("unable to load Teleport documentation config with an empty path"))
}

docsConfigPath := path.Join(teleportClonePath, "docs", "config.json")
f, err := os.Open(docsConfigPath)
if err != nil {
return trace.Wrap(fmt.Errorf("unable to open docs config file at %v: %v", docsConfigPath, err))
}
defer f.Close()

var c DocsConfig
if err := json.NewDecoder(f).Decode(&c); err != nil {
return trace.Wrap(fmt.Errorf("unable to load redirect configuration from %v: %v", docsConfigPath, err))
}

files, err := b.c.GitHub.ListFiles(ctx, b.c.Environment.Organization, b.c.Environment.Repository, b.c.Environment.Number)
if err != nil {
return trace.Wrap(fmt.Errorf("unable to fetch files for PR %v: %v", b.c.Environment.Number, err))
}

data := make(staleDocsURLData)
for _, f := range files {
l, err := os.Open(f.Name)
if err != nil {
return trace.Wrap(fmt.Errorf("unable to open PR file %v: %v", f.Name, err))
}
defer l.Close()

stale := staleDocsURLs(afero.NewOsFs(), c.Redirects, l, teleportClonePath)
if len(stale) == 0 {
continue
}
data[f.Name] = stale
}

if len(data) != 0 {
return trace.Wrap(fmt.Errorf("found incorrect documentation URLs in the following files:\n%v", data))
}

return nil
}

type staleDocsURLData map[string][]staleDocURL

const staleURLDataTempl = `{{ range $key, $val := . }}{{ $key }}:
{{ range $val }} - line {{ .Line }}: {{ .Text }}
{{ end }}
{{ end }}`

func (s staleDocsURLData) String() string {
var buf bytes.Buffer
template.Must(template.New("").Parse(staleURLDataTempl)).Execute(&buf, s)
return buf.String()
}

var docURLRegexp = regexp.MustCompile(`goteleport.com(/([a-zA-Z0-9\._-]+/?)+)`)

// docURLPathToFilePath returns the name of the docs file path, relative to the
// root of a gravitational/teleport clone directory, that corresponds to the
// provided URL path, which is a valid redirect source.
//
// For example, "/admin-guides/database-access/introduction/" would map to
// "docs/pages/admin-guides/database-access/introduction.mdx".
func docURLPathToFilePath(urlpath string) string {
return docsPrefix + strings.TrimSuffix(urlpath, "/") + ".mdx"
}

// staleDocsURLs examines file to detect stale docs site URLs. A style docs site
// URL is a URL of a docs page that neither corresponds to a file in fs nor a
// redirect source in conf.
//
// staleDocsURLs assumes that docs paths are in fs and relative to
// teleportClonePath.
func staleDocsURLs(fs afero.Fs, conf RedirectConfig, file io.Reader, teleportClonePath string) []staleDocURL {
sources := make(map[string]struct{})
for _, s := range conf {
sources[s.Source] = struct{}{}
}

res := []staleDocURL{}
sc := bufio.NewScanner(file)
sc.Split(bufio.ScanLines)
var line int
for sc.Scan() {
line++
m := docURLRegexp.FindAllStringSubmatch(sc.Text(), -1)
for _, a := range m {
urlpath := a[1]
// Redirect sources include a trailing "/".
if !strings.HasSuffix(urlpath, "/") {
urlpath += "/"
}

// While goteleport.com/docs URLs include the /docs/
// path segment, redirect sources do not.
urlpath = strings.TrimPrefix(urlpath, "/docs")

_, ok := sources[urlpath]
_, err := fs.Stat(path.Join(teleportClonePath, docURLPathToFilePath(urlpath)))
if ok || err == nil {
continue
}

res = append(res, staleDocURL{
Text: a[0],
Line: line,
})
}
}

return res
}
161 changes: 161 additions & 0 deletions bot/internal/bot/doclinks_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package bot

import (
"path"
"strings"
"testing"

"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
)

func TestStaleDocsURLs(t *testing.T) {
cases := []struct {
description string
sourceFile string
docsFilenames []string
redirects RedirectConfig
expected []staleDocURL
}{
{
description: "no errors, no redirects",
sourceFile: `https://www.goteleport.com/docs/enroll-databases`,
docsFilenames: []string{
"docs/pages/enroll-databases.mdx",
},
redirects: RedirectConfig{},
expected: []staleDocURL{},
},
{
description: "one error, no redirects",
sourceFile: `https://www.goteleport.com/docs/enroll-databases`,
docsFilenames: []string{
"docs/pages/enroll-dbs.mdx",
},
redirects: RedirectConfig{},
expected: []staleDocURL{
{
Text: "goteleport.com/docs/enroll-databases",
Line: 1,
},
},
},
{
description: "no errors, one redirect",
sourceFile: `https://www.goteleport.com/docs/enroll-databases`,
docsFilenames: []string{
"docs/pages/enroll-dbs.mdx",
},
redirects: RedirectConfig{
{
Source: "/enroll-databases/",
Destination: "/enroll-dbs/",
Permanent: true,
},
},
expected: []staleDocURL{},
},
{
description: "error on line 3",
sourceFile: `This is a paragraph.
This is a link to a [docs page](https://www.goteleport.com/docs/enroll-databases).`,
docsFilenames: []string{
"docs/pages/enroll-dbs.mdx",
},
redirects: RedirectConfig{},
expected: []staleDocURL{
{
Text: "goteleport.com/docs/enroll-databases",
Line: 3,
},
},
},
{
description: "no subdomain in docs link, no error",
sourceFile: `https://goteleport.com/docs/enroll-databases`,
docsFilenames: []string{
"docs/pages/enroll-databases.mdx",
},
redirects: RedirectConfig{},
expected: []staleDocURL{},
},
{
description: "query string in docs link, no error",
sourceFile: `https://www.goteleport.com/docs/enroll-databases?scope="enterprise"`,
docsFilenames: []string{
"docs/pages/enroll-databases.mdx",
},
redirects: RedirectConfig{},
expected: []staleDocURL{},
},
{
description: "fragment in docs link, no error",
sourceFile: `https://www.goteleport.com/docs/enroll-databases#step14-install-teleport`,
docsFilenames: []string{
"docs/pages/enroll-databases.mdx",
},
redirects: RedirectConfig{},
expected: []staleDocURL{},
},
{
description: "trailing slash in docs link, no error",
sourceFile: `https://www.goteleport.com/docs/enroll-databases/`,
docsFilenames: []string{
"docs/pages/enroll-databases.mdx",
},
redirects: RedirectConfig{},
expected: []staleDocURL{},
},
}

for _, c := range cases {
fs := afero.NewMemMapFs()
for _, n := range c.docsFilenames {
_, err := fs.Create(path.Join("docs", n))
assert.NoError(t, err)
}

t.Run(c.description, func(t *testing.T) {
urls := staleDocsURLs(fs, c.redirects, strings.NewReader(c.sourceFile), "docs")
assert.Equal(t, c.expected, urls)
})
}
}

func TestString(t *testing.T) {
expected := `my/file/1.go:
- line 1: https://goteleport.com/docs/page1
- line 10: https://goteleport.com/docs/page2
- line 5: https://goteleport.com/docs/page3
my/file/2.go:
- line 304: https://goteleport.com/docs/page1
- line 1003: https://goteleport.com/docs/page2
my/file/3.go:
- line 19: https://goteleport.com/docs/page1
- line 253: https://goteleport.com/docs/page2
`

data := staleDocsURLData{
"my/file/1.go": []staleDocURL{
{Line: 1, Text: "https://goteleport.com/docs/page1"},
{Line: 10, Text: "https://goteleport.com/docs/page2"},
{Line: 5, Text: "https://goteleport.com/docs/page3"},
},

"my/file/2.go": []staleDocURL{
{Line: 304, Text: "https://goteleport.com/docs/page1"},
{Line: 1003, Text: "https://goteleport.com/docs/page2"},
},

"my/file/3.go": []staleDocURL{
{Line: 19, Text: "https://goteleport.com/docs/page1"},
{Line: 253, Text: "https://goteleport.com/docs/page2"},
},
}

assert.Equal(t, expected, data.String())
}
2 changes: 2 additions & 0 deletions bot/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ func main() {
err = b.CheckChangelog(ctx)
case "docpaths":
err = b.CheckDocsPathsForMissingRedirects(ctx, flags.teleportClonePath)
case "docurls":
err = b.CheckDocsURLs(ctx, flags.teleportClonePath)
default:
err = trace.BadParameter("unknown workflow: %v", flags.workflow)
}
Expand Down

0 comments on commit c0bf70c

Please sign in to comment.