Skip to content

Commit

Permalink
initial go version
Browse files Browse the repository at this point in the history
  • Loading branch information
laktak committed Aug 16, 2024
1 parent bb33b02 commit 9e6a42f
Show file tree
Hide file tree
Showing 29 changed files with 1,532 additions and 40 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# bin
/chkbit
dist
63 changes: 23 additions & 40 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Remember to always maintain multiple backups for comprehensive data protection.
```
brew install chkbit
```
- Download for [Linux, macOS or Windows](https://github.com/laktak/chkbit-py/releases).
- Download for [Linux, macOS or Windows](https://github.com/laktak/chkbit/releases).


## Usage
Expand All @@ -47,45 +47,28 @@ chkbit will
Run `chkbit PATH` to verify only.

```
usage: chkbit [-h] [-u] [--show-ignored-only] [--algo ALGO] [-f] [-s] [-l FILE] [--log-verbose] [--index-name NAME] [--ignore-name NAME] [-w N] [--plain] [-q] [-v] [PATH ...]
Checks the data integrity of your files. See https://github.com/laktak/chkbit-py
positional arguments:
PATH directories to check
options:
-h, --help show this help message and exit
-u, --update update indices (without this chkbit will verify files in readonly mode)
--show-ignored-only only show ignored files
--algo ALGO hash algorithm: md5, sha512, blake3 (default: blake3)
-f, --force force update of damaged items
-s, --skip-symlinks do not follow symlinks
-l FILE, --log-file FILE
write to a logfile if specified
--log-verbose verbose logging
--index-name NAME filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)
--ignore-name NAME filename that chkbit reads its ignore list from, needs to start with '.' (default: .chkbitignore)
-w N, --workers N number of workers to use (default: 5)
--plain show plain status instead of being fancy
-q, --quiet quiet, don't show progress/information
-v, --verbose verbose output
.chkbitignore rules:
each line should contain exactly one name
you may use Unix shell-style wildcards (see README)
lines starting with `#` are skipped
lines starting with `/` are only applied to the current directory
Status codes:
DMG: error, data damage detected
EIX: error, index damaged
old: warning, file replaced by an older version
new: new file
upd: file updated
ok : check ok
ign: ignored (see .chkbitignore)
EXC: internal exception
Usage: chkbit [<paths> ...] [flags]
Arguments:
[<paths> ...] directories to check
Flags:
-h, --help Show context-sensitive help.
-H, --tips Show tips.
-u, --update update indices (without this chkbit will verify files in readonly mode)
--show-ignored-only only show ignored files
--algo="blake3" hash algorithm: md5, sha512, blake3 (default: blake3)
-f, --force force update of damaged items
-s, --skip-symlinks do not follow symlinks
-l, --log-file=STRING write to a logfile if specified
--log-verbose verbose logging
--index-name=".chkbit" filename where chkbit stores its hashes, needs to start with '.' (default: .chkbit)
--ignore-name=".chkbitignore" filename that chkbit reads its ignore list from, needs to start with '.' (default: .chkbitignore)
-w, --workers=5 number of workers to use (default: 5)
--plain show plain status instead of being fancy
-q, --quiet quiet, don't show progress/information
-v, --verbose verbose output
-V, --version show version information
```

chkbit is set to use only 5 workers by default so it will not slow your system to a crawl. You can specify a higher number to make it a lot faster if the IO throughput can also keep up.
Expand Down
153 changes: 153 additions & 0 deletions check/context.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package check

import (
"errors"
"os"
"path/filepath"
"sync"
)

type Context struct {
NumWorkers int
Force bool
Update bool
ShowIgnoredOnly bool
HashAlgo string
SkipSymlinks bool
IndexFilename string
IgnoreFilename string
WorkQueue chan *WorkItem
LogQueue chan *LogEvent
PerfQueue chan *PerfEvent
wg sync.WaitGroup
}

func NewContext(numWorkers int, force bool, update bool, showIgnoredOnly bool, hashAlgo string, skipSymlinks bool, indexFilename string, ignoreFilename string) (*Context, error) {
if indexFilename[0] != '.' {
return nil, errors.New("The index filename must start with a dot!")
}
if ignoreFilename[0] != '.' {
return nil, errors.New("The ignore filename must start with a dot!")
}
if hashAlgo != "md5" && hashAlgo != "sha512" && hashAlgo != "blake3" {
return nil, errors.New(hashAlgo + " is unknown.")
}
return &Context{
NumWorkers: numWorkers,
Force: force,
Update: update,
ShowIgnoredOnly: showIgnoredOnly,
HashAlgo: hashAlgo,
SkipSymlinks: skipSymlinks,
IndexFilename: indexFilename,
IgnoreFilename: ignoreFilename,
WorkQueue: make(chan *WorkItem, numWorkers*10),
LogQueue: make(chan *LogEvent, numWorkers*100),
PerfQueue: make(chan *PerfEvent, numWorkers*10),
}, nil
}

func (context *Context) log(stat Status, message string) {
context.LogQueue <- &LogEvent{stat, message}
}

func (context *Context) logErr(path string, err error) {
context.LogQueue <- &LogEvent{STATUS_PANIC, path + ": " + err.Error()}
}

func (context *Context) perfMonFiles(numFiles int64) {
context.PerfQueue <- &PerfEvent{numFiles, 0}
}

func (context *Context) perfMonBytes(numBytes int64) {
context.PerfQueue <- &PerfEvent{0, numBytes}
}

func (context *Context) addWork(path string, filesToIndex []string, ignore *Ignore) {
context.WorkQueue <- &WorkItem{path, filesToIndex, ignore}
}

func (context *Context) endWork() {
context.WorkQueue <- nil
}

func (context *Context) isChkbitFile(name string) bool {
return name == context.IndexFilename || name == context.IgnoreFilename
}

func (context *Context) Start(pathList []string) {
var wg sync.WaitGroup
wg.Add(context.NumWorkers)
for i := 0; i < context.NumWorkers; i++ {
go func(id int) {
defer wg.Done()
context.RunWorker(id)
}(i)
}
go func() {
for _, path := range pathList {
context.scanDir(path, nil)
}
for i := 0; i < context.NumWorkers; i++ {
context.endWork()
}
}()
wg.Wait()
context.LogQueue <- nil
}

func (context *Context) scanDir(root string, parentIgnore *Ignore) {
files, err := os.ReadDir(root)
if err != nil {
context.logErr(root+"/", err)
return
}

isDir := func(file os.DirEntry, path string) bool {
if file.IsDir() {
return true
}
ft := file.Type()
if !context.SkipSymlinks && ft&os.ModeSymlink != 0 {
rpath, err := filepath.EvalSymlinks(path)
if err == nil {
fi, err := os.Lstat(rpath)
return err == nil && fi.IsDir()
}
}
return false
}

var dirList []string
var filesToIndex []string

for _, file := range files {
path := filepath.Join(root, file.Name())
if file.Name()[0] == '.' {
if context.ShowIgnoredOnly && !context.isChkbitFile(file.Name()) {
context.log(STATUS_IGNORE, path)
}
continue
}
if isDir(file, path) {
dirList = append(dirList, file.Name())
} else if file.Type().IsRegular() {
filesToIndex = append(filesToIndex, file.Name())
}
}

ignore, err := GetIgnore(context, root, parentIgnore)
if err != nil {
context.logErr(root+"/", err)
}

context.addWork(root, filesToIndex, ignore)

for _, name := range dirList {
if !ignore.shouldIgnore(name) {
context.scanDir(filepath.Join(root, name), ignore)
} else {
context.log(STATUS_IGNORE, name+"/")
}
}
}
57 changes: 57 additions & 0 deletions check/hashfile.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package check

import (
"crypto/md5"
"crypto/sha512"
"encoding/hex"
"errors"
"hash"
"io"
"os"

"lukechampine.com/blake3"
)

const BLOCKSIZE = 2 << 10 << 7 // kb

func Hashfile(path string, hashAlgo string, perfMonBytes func(int64)) (string, error) {
var h hash.Hash
switch hashAlgo {
case "md5":
h = md5.New()
case "sha512":
h = sha512.New()
case "blake3":
h = blake3.New(32, nil)
default:
return "", errors.New("algo '" + hashAlgo + "' is unknown.")
}

file, err := os.Open(path)
if err != nil {
return "", err
}
defer file.Close()

buf := make([]byte, BLOCKSIZE)
for {
bytesRead, err := file.Read(buf)
if err != nil && err != io.EOF {
return "", err
}
if bytesRead == 0 {
break
}
h.Write(buf[:bytesRead])
if perfMonBytes != nil {
perfMonBytes(int64(bytesRead))
}
}
return hex.EncodeToString(h.Sum(nil)), nil
}

func HashMd5(data []byte) string {
h := md5.New()
h.Write(data)
return hex.EncodeToString(h.Sum(nil))
}
90 changes: 90 additions & 0 deletions check/ignore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package check

import (
"bufio"
"os"
"path/filepath"
"strings"
)

type Ignore struct {
parentIgnore *Ignore
context *Context
path string
name string
itemList []string
}

func GetIgnore(context *Context, path string, parentIgnore *Ignore) (*Ignore, error) {
ignore := &Ignore{
parentIgnore: parentIgnore,
context: context,
path: path,
name: filepath.Base(path) + "/",
}
err := ignore.loadIgnore()
if err != nil {
return nil, err
}
return ignore, nil
}

func (ignore *Ignore) getIgnoreFilepath() string {
return filepath.Join(ignore.path, ignore.context.IgnoreFilename)
}

func (ignore *Ignore) loadIgnore() error {
if _, err := os.Stat(ignore.getIgnoreFilepath()); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}

file, err := os.Open(ignore.getIgnoreFilepath())
if err != nil {
return err
}
defer file.Close()

scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" && line[0] != '#' {
ignore.itemList = append(ignore.itemList, line)
}
}
return scanner.Err()
}

func (ignore *Ignore) shouldIgnore(name string) bool {
return ignore.shouldIgnore2(name, "")
}

func (ignore *Ignore) shouldIgnore2(name string, fullname string) bool {
for _, item := range ignore.itemList {
if item[0] == '/' {
if len(fullname) > 0 {
continue
} else {
item = item[1:]
}
}
if match, _ := filepath.Match(item, name); match {
return true
}
if fullname != "" {
if match, _ := filepath.Match(item, fullname); match {
return true
}
}
}
if ignore.parentIgnore != nil {
if fullname != "" {
return ignore.parentIgnore.shouldIgnore2(fullname, ignore.name+fullname)
} else {
return ignore.parentIgnore.shouldIgnore2(name, ignore.name+name)
}
}
return false
}
Loading

0 comments on commit 9e6a42f

Please sign in to comment.