Skip to content

Commit

Permalink
Simplified the parent-path-finding logic to be strictly lexical.
Browse files Browse the repository at this point in the history
We no longer query the filesystem to check if each of the parent paths exist,
which improves efficiency and avoids unnecessary failures when a parent path
cannot be listed (e.g., due to execute-only permissions on a directory).
Checks for an existing directory have been moved into the list* functions
themselves, which is arguably a more appropriate place for them anyway.

This change means that a user can now use the /list/files endpoint to list
files through a symlinked directory inside a registered directory. Technically,
this is inconsistent with our policy of not traversing symlinked directories,
but if the user attempts to do so, that's their problem, not ours.
  • Loading branch information
LTLA committed Oct 20, 2024
1 parent cc833ca commit b1ce4f8
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 97 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,6 @@ curl -L ${SEWER_RAT_URL}/retrieve/file -G --data-urlencode "path=${path}"

On success, the contents of the target file are returned with a content type guessed from its name.
If `path` is a symbolic link to a file, the contents of the target file will be returned by this endpoint.
However, if a registered directory contains a symbolic link to a directory, the contents of the target directory cannot be retrieved if `path` needs to traverse that symbolic link.
This is consistent with the registration policy whereby symbolic links to directories are not recursively traversed during indexing.

On error, the exact response may either be `text/plain` content containing the error message directly,
or `application/json` content encoding a JSON object with the `reason` for the error.
Expand Down
32 changes: 4 additions & 28 deletions database.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"encoding/json"
"path/filepath"
"io/fs"
"net/http"
"database/sql"
"strconv"
"context"
Expand Down Expand Up @@ -778,7 +777,7 @@ func listRegisteredDirectories(db * sql.DB, query *listRegisteredDirectoriesQuer
}

if query.ContainsPath != nil {
collected, err := stripPaths(*(query.ContainsPath))
collected, err := getParentPaths(*(query.ContainsPath))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -820,35 +819,12 @@ func listRegisteredDirectories(db * sql.DB, query *listRegisteredDirectoriesQuer
return output, nil
}

func stripPaths(path string) ([]interface{}, error) {
func getParentPaths(path string) ([]interface{}, error) {
collected := []interface{}{}
for {
info, err := os.Lstat(path) // Lstat() is deliberate as we need to distinguish symlinks, see below.
if errors.Is(err, os.ErrNotExist) {
return nil, newHttpError(http.StatusNotFound, fmt.Errorf("path at %q does not exist", path))
} else if err != nil {
return nil, fmt.Errorf("inaccessible path at %q; %v", path, err)
}

if info.Mode() & fs.ModeSymlink != 0 {
// Symlinks to directories within a registered directory are not
// followed during registration or updates. This allows us to quit
// the loop and search on the current 'collected'; if any of these
// are registered, all is fine as the symlink occurs in the
// parents. Had we kept on taking the dirnames, all would NOT be
// fine as the symlink would have been inside the registered
// directory of subsequent additions to 'collected'.
break
}

if !info.IsDir() {
return nil, newHttpError(http.StatusBadRequest, errors.New("path should refer to a directory"))
}

// Incidentally, note that there's no need to defend against '..', as
// Note that there's no need to defend against '..', as
// it is assumed that all paths are Cleaned before this point.
collected = append(collected, path)

newpath := filepath.Dir(path)
if newpath == path {
break
Expand All @@ -860,7 +836,7 @@ func stripPaths(path string) ([]interface{}, error) {
}

func isDirectoryRegistered(db * sql.DB, path string) (bool, error) {
collected, err := stripPaths(path)
collected, err := getParentPaths(path)
if err != nil {
return false, err
}
Expand Down
56 changes: 1 addition & 55 deletions database_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1882,68 +1882,14 @@ func TestIsDirectoryRegistered(t *testing.T) {
}
})

t.Run("failure", func(t *testing.T) {
// Directory is absent.
_, err := isDirectoryRegistered(dbconn, to_add + "_missing")
if err == nil || !strings.Contains(err.Error(), "not exist") {
t.Fatal(err)
}

// Directory is present but not registered.
t.Run("absent", func(t *testing.T) {
found, err := isDirectoryRegistered(dbconn, tmp)
if err != nil {
t.Fatal(err)
}
if found {
t.Fatal("should not have found a matching path")
}

// Referring to a file.
_, err = isDirectoryRegistered(dbconn, filepath.Join(to_add, "metadata.json"))
if err == nil || !strings.Contains(err.Error(), "refer to a directory") {
t.Fatal(err)
}
})

// Link within the registration directory should be ignored, but link to a
// parent of the registration directory should be fine.
slink := filepath.Join(to_add, "stuff2")
err = os.Symlink(filepath.Join(to_add, "stuff"), slink)
if err != nil {
t.Fatal(err)
}

linkparent := filepath.Join(tmp, "redirect")
err = os.Symlink(to_add, linkparent)
if err != nil {
t.Fatal(err)
}

to_add2 := filepath.Join(linkparent, "stuff")
comments, err = addNewDirectory(dbconn, to_add2, []string{ "metadata.json", "other.json" }, "myself", tokr)
if err != nil {
t.Fatal(err)
}
if len(comments) > 0 {
t.Fatalf("unexpected comments from the directory addition %v", comments)
}

t.Run("symlink checks", func(t *testing.T) {
found, err := isDirectoryRegistered(dbconn, slink)
if err != nil {
t.Fatal(err)
}
if found {
t.Fatal("should not have found a matching path")
}

found, err = isDirectoryRegistered(dbconn, to_add2)
if err != nil {
t.Fatal(err)
}
if !found {
t.Fatal("should have found a matching path")
}
})
}

Expand Down
33 changes: 30 additions & 3 deletions list.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,34 @@ import (
"path/filepath"
"strings"
"errors"
"net/http"
)

func verifyDirectory(dir string) error {
info, err := os.Stat(dir)
if errors.Is(err, os.ErrNotExist) {
return newHttpError(http.StatusNotFound, fmt.Errorf("path %q does not exist", dir))
}

if err != nil {
return fmt.Errorf("failed to check %q; %w", dir, err)
}

if !info.IsDir() {
return newHttpError(http.StatusBadRequest, fmt.Errorf("%q is not a directory", dir))
}

return nil
}

func listFiles(dir string, recursive bool) ([]string, error) {
to_report := []string{}
err := verifyDirectory(dir)
if err != nil {
return nil, err
}

err := filepath.WalkDir(dir, func(path string, info fs.DirEntry, err error) error {
to_report := []string{}
err = filepath.WalkDir(dir, func(path string, info fs.DirEntry, err error) error {
if err != nil {
return err
}
Expand Down Expand Up @@ -46,6 +68,11 @@ func listFiles(dir string, recursive bool) ([]string, error) {
}

func listMetadata(dir string, base_names []string) (map[string]fs.FileInfo, []string, error) {
err := verifyDirectory(dir)
if err != nil {
return nil, nil, err
}

curcontents := map[string]fs.FileInfo{}
curfailures := []string{}
curnames := map[string]bool{}
Expand All @@ -54,7 +81,7 @@ func listMetadata(dir string, base_names []string) (map[string]fs.FileInfo, []st
}

// Just skip any directories that we can't access, no need to check the error.
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
err = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
curfailures = append(curfailures, fmt.Sprintf("failed to walk %q; %v", path, err))
return nil
Expand Down
15 changes: 6 additions & 9 deletions list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,17 +168,14 @@ func TestListMetadata(t *testing.T) {
})

t.Run("walk failure", func(t *testing.T) {
found, fails, err := listMetadata("missing", []string{ "A.json", "B.json" })
if err != nil {
t.Fatal(err)
_, _, err := listMetadata("missing", []string{ "A.json", "B.json" })
if err == nil || !strings.Contains(err.Error(), "does not exist") {
t.Fatalf("unexpected lack of failure; %v", err)
}

if len(fails) != 1 || !strings.Contains(fails[0], "failed to walk") {
t.Fatalf("expected a walking failure %v", fails)
}

if len(found) != 0 {
t.Fatal("unexpected file")
_, _, err = listMetadata(filepath.Join(dir, "A.json"), []string{ "A.json", "B.json" })
if err == nil || !strings.Contains(err.Error(), "not a directory") {
t.Fatalf("unexpected lack of failure; %v", err)
}
})
}
Expand Down

0 comments on commit b1ce4f8

Please sign in to comment.