Skip to content

Commit

Permalink
Make sure double-hashes are detected and blocked correctly
Browse files Browse the repository at this point in the history
Assume a double-hash could be parsed as both a b58btc string and hex string,
but handle cases when that happens and it's obviously wrong:

- Multihashes of type Identity
- Multihashes of unknown hash functions
- Hex strings of length different than 64 chars
  • Loading branch information
hsanjuan committed Oct 25, 2023
1 parent a8ac7a7 commit 25d9833
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 34 deletions.
130 changes: 96 additions & 34 deletions denylist.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/ipfs/go-cid"
"github.com/multiformats/go-multicodec"
"github.com/multiformats/go-multihash"
mhreg "github.com/multiformats/go-multihash/core"
"go.uber.org/multierr"
"gopkg.in/yaml.v3"
)
Expand Down Expand Up @@ -99,7 +100,7 @@ type Denylist struct {

IPFSBlocksDB *BlocksDB
IPNSBlocksDB *BlocksDB
DoubleHashBlocksDB map[uint64]*BlocksDB // codec -> blocks using that codec
DoubleHashBlocksDB map[uint64]*BlocksDB // mhCode -> blocks using that code
PathBlocksDB *BlocksDB
PathPrefixBlocks Entries
// MimeBlocksDB
Expand Down Expand Up @@ -368,47 +369,100 @@ func (dl *Denylist) parseLine(line string, number uint64) error {
switch {
case strings.HasPrefix(rule, "//"):
// Double-hash rule.
// It can be a Multihash (CIDv0) or a sha256-hex-encoded string.
// It can be a Multihash or a sha256-hex-encoded string.

var mhType uint64
// attempt to parse a b58btc-encoded multihash
rule = strings.TrimPrefix(rule, "//")
mh, err := multihash.FromB58String(rule)
if err == nil {

parseMultihash := func(mhStr string) (uint64, multihash.Multihash, error) {
mh, err := multihash.FromB58String(rule)
if err != nil { // not a b58 string usually
return 0, nil, err
}
dmh, err := multihash.Decode(mh)
if err != nil { // looked like a mhash but it was not.
return 0, nil, err
}

// Identity hash doesn't make sense for double
// hashing. In practice it is usually a hex string
// that has been wrongly parsed as multihash.
if dmh.Code == 0 {
return 0, nil, errors.New("identity hash cannot be a double hash")
}

// if we are here it means we have something that
// could be interpreted as a multihash but it may
// still be a hex-encoded string that just parsed as
// b58 fine. In any case, we should check we know how to
// hash for this type of multihash.
_, err = mhreg.GetVariableHasher(dmh.Code, dmh.Length)
if err != nil {
return fmt.Errorf("what appears to be a multihash b58 string cannot be decoded (%s:%d): %w", dl.Filename, number, err)
return 0, nil, err
}

return dmh.Code, mh, nil
}

parseHexString := func(hexStr string) (uint64, multihash.Multihash, error) {
if len(hexStr) != 64 {
return 0, nil, errors.New("hex string are sha2-256 hashes and must be 64 chars (32 bytes) long")
}

e.Multihash = mh
// we use the multihash codec to group double-hashes
// with the same hashing function.
mhType = dmh.Code
} else { // Assume a hex-encoded sha256 string
bs, err := hex.DecodeString(rule)
if err != nil {
return fmt.Errorf("double-hash is not a multihash nor a hex-encoded string (%s:%d): %w", dl.Filename, number, err)
return 0, nil, err
}
// We have a hex-encoded string and assume it is a
// SHA2_256. TODO: could support hints here to use
// different functions.
mhBytes, err := multihash.Encode(bs, multihash.SHA2_256)
if err != nil {
return 0, nil, err
}
return multihash.SHA2_256, multihash.Multihash(mhBytes), nil
}

addRule := func(e Entry, mhType uint64, mh multihash.Multihash) error {
bpath, _ := NewBlockedPath("")
e.Path = bpath
e.Multihash = mh

// Store it in the appropriate BlocksDB (per mhtype).
key := e.Multihash.B58String()
if blocks := dl.DoubleHashBlocksDB[mhType]; blocks == nil {
dl.DoubleHashBlocksDB[mhType] = &BlocksDB{}
}
dl.DoubleHashBlocksDB[mhType].Store(key, e)
logger.Debugf("%s:%d: Double-hash rule. Func: %s. Key: %s. Entry: %s", filepath.Base(dl.Filename), number, multicodec.Code(mhType).String(), key, e)
return nil
}

// We have to assume that perhaps one day a sha256 hex string
// is going to parse as a valid multihash with an known
// hashing function etc. And vice-versa perhaps.
//
// In a case where we cannot distinguish between a b58btc
// multihash and a hex-string, we add rules for both, to make
// sure we always block what should be blocked.
code, mh, err1 := parseMultihash(rule)
if err1 == nil {
// clone the entry as add-rule modifies it.
e1 := e.Clone()
if err := addRule(e1, code, mh); err != nil {
return err
}
e.Multihash = multihash.Multihash(mhBytes)
mhType = multihash.SHA2_256
}
bpath, _ := NewBlockedPath("")
e.Path = bpath

// Store it in the appropriate BlocksDB (per mhtype).
key := e.Multihash.B58String()
if blocks := dl.DoubleHashBlocksDB[mhType]; blocks == nil {
dl.DoubleHashBlocksDB[mhType] = &BlocksDB{}
code, mh, err2 := parseHexString(rule)
if err2 == nil {
if err := addRule(e, code, mh); err != nil {
return err
}
}

if err1 != nil && err2 != nil {
return fmt.Errorf("double-hash cannot be parsed as a multihash with a supported hashing function (%w) nor as a sha256 hex-encoded string (%w) (%s:%d)", err1, err2, dl.Filename, number)
}
dl.DoubleHashBlocksDB[mhType].Store(key, e)
logger.Debugf("%s:%d: Double-hash rule. Func: %s. Key: %s. Entry: %s", filepath.Base(dl.Filename), number, multicodec.Code(mhType).String(), key, e)

case strings.HasPrefix(rule, "/ipfs/"), strings.HasPrefix(rule, "/ipld/"):
// ipfs/ipld rule. We parse the CID and use the
Expand Down Expand Up @@ -583,9 +637,11 @@ func (dl *Denylist) IsIPNSPathBlocked(name, subpath string) StatusResponse {
}

// Double-hash blocking
for codec, blocks := range dl.DoubleHashBlocksDB {
double, err := multihash.Sum([]byte(p.String()), codec, -1)
for mhCode, blocks := range dl.DoubleHashBlocksDB {
double, err := multihash.Sum([]byte(p.String()), mhCode, -1)
if err != nil {
// Usually this means an unsupported hash function was
// registered.
logger.Error(err)
continue
}
Expand Down Expand Up @@ -697,7 +753,7 @@ func (dl *Denylist) isIPFSIPLDPathBlocked(cidStr, subpath, protocol string) Stat
}

prefix := c.Prefix()
for codec, blocks := range dl.DoubleHashBlocksDB {
for mhCode, blocks := range dl.DoubleHashBlocksDB {
// <cidv1base32>/<path>
// TODO: we should be able to disable this part with an Option
// or a hint for denylists not using it.
Expand All @@ -706,15 +762,17 @@ func (dl *Denylist) isIPFSIPLDPathBlocked(cidStr, subpath, protocol string) Stat
// badbits appends / on empty subpath. and hashes that
// https://github.com/protocol/badbits.dwebops.pub/blob/main/badbits-lambda/helpers.py#L17
v1b32path += "/" + subpath
doubleLegacy, err := multihash.Sum([]byte(v1b32path), codec, -1)
doubleLegacy, err := multihash.Sum([]byte(v1b32path), mhCode, -1)
if err != nil {
// Usually this means an unsupported hash function was
// registered.
logger.Error(err)
continue
}

// encode as b58 which is the key we use for the BlocksDB.
b58 := doubleLegacy.B58String()
logger.Debugf("IsIPFFSIPLDPathBlocked load IPFS doublehash (legacy): %d %s", codec, b58)
logger.Debugf("IsIPFFSIPLDPathBlocked load IPFS doublehash (legacy): %d %s", mhCode, b58)
entries, _ := blocks.Load(b58)
status, entry := entries.CheckPathStatus("")
if status != StatusNotFound { // Hit!
Expand All @@ -731,13 +789,15 @@ func (dl *Denylist) isIPFSIPLDPathBlocked(cidStr, subpath, protocol string) Stat
if subpath != "" {
v0path += "/" + subpath
}
double, err := multihash.Sum([]byte(v0path), codec, -1)
double, err := multihash.Sum([]byte(v0path), mhCode, -1)
if err != nil {
// Usually this means an unsupported hash function was
// registered.
logger.Error(err)
continue
}
b58 = double.B58String()
logger.Debugf("IsPathBlocked load IPFS doublehash: %d %s", codec, b58)
logger.Debugf("IsPathBlocked load IPFS doublehash: %d %s", mhCode, b58)
entries, _ = blocks.Load(b58)
status, entry = entries.CheckPathStatus("")
if status != StatusNotFound { // Hit!
Expand Down Expand Up @@ -863,16 +923,18 @@ func (dl *Denylist) IsCidBlocked(c cid.Cid) StatusResponse {
}
}

// Otherwise, double-hash the multihash string with the given codecs for
// Otherwise, double-hash the multihash string with the given codes for
// which we have blocks.
for codec, blocks := range dl.DoubleHashBlocksDB {
double, err := multihash.Sum([]byte(b58), codec, -1)
for mhCode, blocks := range dl.DoubleHashBlocksDB {
double, err := multihash.Sum([]byte(b58), mhCode, -1)
if err != nil {
// Usually this means an unsupported hash function was
// registered.
logger.Error(err)
continue
}
b58 := double.B58String()
logger.Debugf("IsCidBlocked load %d doublehash: %s", codec, b58)
logger.Debugf("IsCidBlocked load %d doublehash: %s", mhCode, b58)
entries, _ := blocks.Load(b58)
status, entry := entries.CheckPathStatus("")
if status != StatusNotFound { // Hit!
Expand Down
17 changes: 17 additions & 0 deletions entry.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package nopfs

import (
"bytes"
"fmt"
"net/url"
"strings"
Expand All @@ -27,6 +28,22 @@ func (e Entry) String() string {
return fmt.Sprintf("Path: %s. Prefix: %t. AllowRule: %t.", path, e.Path.Prefix, e.AllowRule)
}

func (e Entry) Clone() Entry {
hints := make(map[string]string, len(e.Hints))
for k, v := range e.Hints {
hints[k] = v
}

return Entry{
Line: e.Line,
AllowRule: e.AllowRule,
Hints: hints,
RawValue: e.RawValue,
Multihash: bytes.Clone(e.Multihash),
Path: e.Path,
}
}

// Entries is a slice of Entry.
type Entries []Entry

Expand Down

0 comments on commit 25d9833

Please sign in to comment.