From c7d927b7370173debf563e7e1f440dc6af0ea8d8 Mon Sep 17 00:00:00 2001 From: Alfonso Subiotto Marques Date: Tue, 14 May 2024 16:58:43 +0200 Subject: [PATCH 1/2] *: allow building wal with GOARCH=wasm GOOS=wasip1 This required abstracting some unix-only functionality and implementing a non-bolt meta store. The wasm meta store is purposefully simple and not production ready since we just need something to run tests with. Eventually, we should remove bolt and use a simpler production-ready metastore since the functionality Bolt was used for in the original repo is something we don't need and have actually eliminated (arbitrary kv get/sets). --- bench/bench_test.go | 44 +---------- fs/fs.go | 17 +---- fs/fs_nowasm.go | 26 +++++++ fs/fs_test.go | 16 ++-- fs/fs_wasm.go | 16 ++++ metadb/metadb.go | 5 +- metadb/metadb_test.go | 6 +- metadb/metadb_wasm.go | 173 ++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 233 insertions(+), 70 deletions(-) create mode 100644 fs/fs_nowasm.go create mode 100644 fs/fs_wasm.go create mode 100644 metadb/metadb_wasm.go diff --git a/bench/bench_test.go b/bench/bench_test.go index 0a09577..bbafcf1 100644 --- a/bench/bench_test.go +++ b/bench/bench_test.go @@ -5,15 +5,15 @@ package main import ( "fmt" - "github.com/polarsignals/wal/types" "os" "path/filepath" "testing" "time" - "github.com/coreos/etcd/pkg/fileutil" - "github.com/polarsignals/wal" "github.com/stretchr/testify/require" + + "github.com/polarsignals/wal" + "github.com/polarsignals/wal/types" ) func BenchmarkAppend(b *testing.B) { @@ -137,44 +137,6 @@ func runGetLogBench(b *testing.B, ls wal.LogStore, n int) { } } -// These OS benchmarks showed that at least on my Mac Creating and preallocating -// a file is not reliably quicker than renaming a file we already created and -// preallocated so the extra work of doing that in the background ahead of time -// and just renaming it during rotation seems unnecessary. We are not fsyncing -// either the file or parent dir in either case which dominates cost of either -// operation. Three random consecutive runs on my machine: -// -// BenchmarkOSCreateAndPreallocate-16 100 370304 ns/op 221 B/op 3 allocs/op -// BenchmarkOSRename-16 100 876001 ns/op 570 B/op 5 allocs/op -// -// BenchmarkOSCreateAndPreallocate-16 100 353654 ns/op 221 B/op 3 allocs/op -// BenchmarkOSRename-16 100 168558 ns/op 570 B/op 5 allocs/op -// -// BenchmarkOSCreateAndPreallocate-16 100 367360 ns/op 224 B/op 3 allocs/op -// BenchmarkOSRename-16 100 1353014 ns/op 571 B/op 5 allocs/op - -func BenchmarkOSCreateAndPreallocate(b *testing.B) { - tmpDir, err := os.MkdirTemp("", "raft-wal-bench-*") - require.NoError(b, err) - defer os.RemoveAll(tmpDir) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - fname := filepath.Join(tmpDir, fmt.Sprintf("test-%d.txt", i)) - b.StartTimer() - f, err := os.OpenFile(fname, os.O_CREATE|os.O_EXCL|os.O_RDWR, os.FileMode(0644)) - if err != nil { - panic(err) // require is kinda slow in benchmarks - } - err = fileutil.Preallocate(f, int64(64*1024*1024), true) - if err != nil { - panic(err) - } - b.StopTimer() - f.Close() - } -} - func BenchmarkOSRename(b *testing.B) { tmpDir, err := os.MkdirTemp("", "raft-wal-bench-*") require.NoError(b, err) diff --git a/fs/fs.go b/fs/fs.go index 7d87fcf..ac1591a 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -10,7 +10,6 @@ import ( "os" "path/filepath" - "github.com/coreos/etcd/pkg/fileutil" "github.com/polarsignals/wal/types" ) @@ -60,7 +59,8 @@ func (fs *FS) Create(dir string, name string, size uint64) (types.WritableFile, if size > math.MaxInt32 { return nil, fmt.Errorf("maximum file size is %d bytes", math.MaxInt32) } - if err := fileutil.Preallocate(f, int64(size), true); err != nil { + + if err := prealloc(f, int64(size), true); err != nil { f.Close() return nil, err } @@ -113,16 +113,3 @@ func (fs *FS) OpenReader(dir string, name string) (types.ReadableFile, error) { func (fs *FS) OpenWriter(dir string, name string) (types.WritableFile, error) { return os.OpenFile(filepath.Join(dir, name), os.O_RDWR, os.FileMode(0644)) } - -func syncDir(dir string) error { - f, err := os.Open(dir) - if err != nil { - return err - } - err = f.Sync() - closeErr := f.Close() - if err != nil { - return err - } - return closeErr -} diff --git a/fs/fs_nowasm.go b/fs/fs_nowasm.go new file mode 100644 index 0000000..a224292 --- /dev/null +++ b/fs/fs_nowasm.go @@ -0,0 +1,26 @@ +//go:build !wasm + +package fs + +import ( + "os" + + "github.com/coreos/etcd/pkg/fileutil" +) + +func syncDir(dir string) error { + f, err := os.Open(dir) + if err != nil { + return err + } + err = f.Sync() + closeErr := f.Close() + if err != nil { + return err + } + return closeErr +} + +func prealloc(f *os.File, sizeInBytes int64, extendFile bool) error { + return fileutil.Preallocate(f, sizeInBytes, extendFile) +} diff --git a/fs/fs_test.go b/fs/fs_test.go index 5fa08a2..a1d842f 100644 --- a/fs/fs_test.go +++ b/fs/fs_test.go @@ -7,7 +7,7 @@ import ( "bytes" "io" "os" - "path/filepath" + "strings" "testing" "github.com/stretchr/testify/require" @@ -30,12 +30,6 @@ func TestFS(t *testing.T) { require.NoError(t, err) defer wf.Close() - // Should be pre-allocated (on supported file systems). - // TODO work out if this is reliable in CI or if we can detect supported FSs?) - info, err := os.Stat(filepath.Join(tmpDir, "00001-abcd1234.wal")) - require.NoError(t, err) - require.Equal(t, int64(512*1024), info.Size()) - // Should be able to write data in any order n, err := wf.WriteAt(bytes.Repeat([]byte{'2'}, 1024), 1024) require.NoError(t, err) @@ -121,17 +115,17 @@ func TestRealFSNoDir(t *testing.T) { _, err := fs.ListDir("/not-a-real-dir") require.Error(t, err) - require.Contains(t, err.Error(), "no such file or directory") + require.Contains(t, strings.ToLower(err.Error()), "no such file or directory") _, err = fs.Create("/not-a-real-dir", "foo", 1024) require.Error(t, err) - require.Contains(t, err.Error(), "no such file or directory") + require.Contains(t, strings.ToLower(err.Error()), "no such file or directory") _, err = fs.OpenReader("/not-a-real-dir", "foo") require.Error(t, err) - require.Contains(t, err.Error(), "no such file or directory") + require.Contains(t, strings.ToLower(err.Error()), "no such file or directory") _, err = fs.OpenWriter("/not-a-real-dir", "foo") require.Error(t, err) - require.Contains(t, err.Error(), "no such file or directory") + require.Contains(t, strings.ToLower(err.Error()), "no such file or directory") } diff --git a/fs/fs_wasm.go b/fs/fs_wasm.go new file mode 100644 index 0000000..f6b526f --- /dev/null +++ b/fs/fs_wasm.go @@ -0,0 +1,16 @@ +//go:build wasm + +package fs + +import "os" + +func syncDir(dir string) error { + // TODO(asubiotto): Issue syncing dirs on wasm. + return nil +} + +func prealloc(_ *os.File, _ int64, _ bool) error { + // fileutil.Prealloc relies on unix-only functionality which is unavailable + // on wasm. + return nil +} diff --git a/metadb/metadb.go b/metadb/metadb.go index f2c5a75..1efc40a 100644 --- a/metadb/metadb.go +++ b/metadb/metadb.go @@ -1,3 +1,5 @@ +//go:build !wasm + // Copyright (c) HashiCorp, Inc // SPDX-License-Identifier: MPL-2.0 @@ -10,8 +12,9 @@ import ( "os" "path/filepath" - "github.com/polarsignals/wal/types" "go.etcd.io/bbolt" + + "github.com/polarsignals/wal/types" ) const ( diff --git a/metadb/metadb_test.go b/metadb/metadb_test.go index 3dc9753..aebfe04 100644 --- a/metadb/metadb_test.go +++ b/metadb/metadb_test.go @@ -6,11 +6,13 @@ package metadb import ( "io/ioutil" "os" + "strings" "testing" "time" - "github.com/polarsignals/wal/types" "github.com/stretchr/testify/require" + + "github.com/polarsignals/wal/types" ) func TestMetaDB(t *testing.T) { @@ -86,7 +88,7 @@ func TestMetaDBErrors(t *testing.T) { // Loading from a non-existent dir is an error var db2 BoltMetaDB _, err = db2.Load("fake-dir-that-does-not-exist") - require.ErrorContains(t, err, "no such file or directory") + require.True(t, strings.Contains(strings.ToLower(err.Error()), "no such file or directory")) } func makeState(nSegs int) *types.PersistentState { diff --git a/metadb/metadb_wasm.go b/metadb/metadb_wasm.go new file mode 100644 index 0000000..339509a --- /dev/null +++ b/metadb/metadb_wasm.go @@ -0,0 +1,173 @@ +//go:build wasm + +// Copyright (c) HashiCorp, Inc +// SPDX-License-Identifier: MPL-2.0 + +package metadb + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/polarsignals/wal/types" +) + +// This file is the BoltMetaDB implementation for the wasm build target. It is +// a mock store not safe to be used in production. +// TODO(asubiotto): For our use-case, BoltDB is overkill. Eventually we should +// just move to something simpler but still correct which can also be used when +// compiled to WASM. + +// FileName is the default file name for the bolt db file. +const FileName = "wal-meta.db" + +var ( + // ErrUnintialized is returned when any call is made before Load has opened + // the DB file. + ErrUnintialized = errors.New("uninitialized") +) + +type BoltMetaDB struct { + dir string + f *os.File +} + +func (db *BoltMetaDB) ensureOpen(dir string) error { + if db.dir != "" && db.dir != dir { + return fmt.Errorf("can't load dir %s, already open in dir %s", dir, db.dir) + } + if db.f != nil { + return nil + } + + fileName := filepath.Join(dir, FileName) + + open := func() error { + f, err := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644) + if err != nil { + return fmt.Errorf("failed to open %s: %w", FileName, err) + } + db.dir = dir + db.f = f + return nil + } + + // 1. Check if file exits already. If yes, skip init and just open it. + // 2. Delete any existing DB file with tmp name + // 3. Creat a new BoltDB that is empty and has the buckets with a temp name. + // 4. Once that's committed, rename to final name and Fsync parent dir + _, err := os.Stat(fileName) + if err == nil { + // File exists, just open it + return open() + } + if !errors.Is(err, os.ErrNotExist) { + // Unknown err just return that + return fmt.Errorf("failed to stat %s: %w", FileName, err) + } + + // File doesn't exist, initialize a new DB in a crash-safe way + if err := safeInitBoltDB(dir); err != nil { + return fmt.Errorf("failed initializing meta DB: %w", err) + } + + // All good, now open it! + return open() +} + +func safeInitBoltDB(dir string) error { + // Delete any old attempts to init that were unsuccessful + f, err := os.Create(filepath.Join(dir, FileName)) + if err != nil { + return err + } + if err := f.Close(); err != nil { + return err + } + + // And Fsync that parent dir to make sure the new new file with it's new name + // is persisted! + // TODO(asubiotto): Directory fsyncs are not supported in WASM. + /*dirF, err := os.Open(dir) + if err != nil { + return err + } + err = dirF.Sync() + closeErr := dirF.Close() + if err != nil { + return err + } + return closeErr*/ + return nil +} + +// Load loads the existing persisted state. If there is no existing state +// implementations are expected to create initialize new storage and return an +// empty state. +func (db *BoltMetaDB) Load(dir string) (types.PersistentState, error) { + var state types.PersistentState + + if err := db.ensureOpen(dir); err != nil { + return state, err + } + + raw, err := io.ReadAll(db.f) + if err != nil { + return state, err + } + + if len(raw) == 0 { + // Valid state, just an "empty" log. + return state, nil + } + + if err := json.Unmarshal(raw, &state); err != nil { + return state, fmt.Errorf("%w: failed to parse persisted state: %s", types.ErrCorrupt, err) + } + return state, nil +} + +// CommitState must atomically replace all persisted metadata in the current +// store with the set provided. It must not return until the data is persisted +// durably and in a crash-safe way otherwise the guarantees of the WAL will be +// compromised. The WAL will only ever call this in a single thread at one +// time and it will never be called concurrently with Load however it may be +// called concurrently with Get/SetStable operations. +func (db *BoltMetaDB) CommitState(state types.PersistentState) error { + if db.f == nil { + return ErrUnintialized + } + + raw, err := json.Marshal(state) + if err != nil { + return fmt.Errorf("failed to encode persisted state: %w", err) + } + + // This is not really safe, but good enough for testing. + if err := db.f.Truncate(0); err != nil { + return fmt.Errorf("failed to truncate file: %w", err) + } + + if _, err := db.f.Seek(0, 0); err != nil { + return fmt.Errorf("failed to seek: %w", err) + } + + if _, err := db.f.Write(raw); err != nil { + return fmt.Errorf("failed to write persisted state: %w", err) + } + return nil +} + +// Close implements io.Closer +func (db *BoltMetaDB) Close() error { + if db.f == nil { + return nil + } + closeErr := db.f.Close() + db.f = nil + return closeErr +} From e81905594dc62a7bf9847f1fe7d483697b66ea82 Mon Sep 17 00:00:00 2001 From: Alfonso Subiotto Marques Date: Tue, 14 May 2024 17:03:10 +0200 Subject: [PATCH 2/2] workflows: add Go Test WASM job This ensures the codebase can be built with GOARCH=wasm GOOS=wasip1 --- .github/workflows/go-tests.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/go-tests.yml b/.github/workflows/go-tests.yml index 900ffe9..4763f60 100644 --- a/.github/workflows/go-tests.yml +++ b/.github/workflows/go-tests.yml @@ -15,7 +15,13 @@ jobs: - name: Set up Go uses: actions/setup-go@v2 with: - go-version: '1.20' + go-version: '1.22' - name: Test run: go test -v ./... + + - name: Set up wasmtime + uses: bytecodealliance/actions/wasmtime/setup@v1 + + - name: Test WASM + run: PATH=$PATH:$(go env GOROOT)/misc/wasm GOOS=wasip1 GOARCH=wasm go test ./... \ No newline at end of file