From b6b80550167dafac96898e3f23a1c25c852afeb1 Mon Sep 17 00:00:00 2001 From: guillaumemichel Date: Fri, 10 Jan 2025 15:22:41 +0100 Subject: [PATCH] doc: ant locations & key generation --- README.md | 6 ++++++ keys.go | 9 +++++++++ queen.go | 50 ++++++++++++++++++++++++++++++++++++++++---------- util.go | 1 + 4 files changed, 56 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 1476005..b53c1b8 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,12 @@ You can run a health check on the honeypot by running the following command: go run . health ``` +## Ants key generation + +The queen ant periodically queries the [Nebula](https://github.com/dennis-tra/nebula) database to retrieve the list of connected DHT servers. Kademlia identifiers of these peers are then inserted into a [binary trie](https://github.com/guillaumemichel/py-binary-trie/). Using this binary trie, the queen defines keyspace zones of at most `bucket_size - 1` peers. One ant must be present in each of these zones in order to capture all DHT requests reaching the `bucket_size` closest peers to the target key. + +Kademlia identifiers are derived from a libp2p peer id, which itself is derived from a cryptographic key pair. Hence generating a key matching a specific zone of the binary trie isn't trivial and requires bruteforce. All keys generated during the bruteforce are persisted on disk, because they may be useful in the future. When an ant isn't needed anymore, its key is marked as available for reuse. This also allows reusing the same peer ids for the ants across multiple runs of the honeypot. + ## License This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details. diff --git a/keys.go b/keys.go index 4e8e05a..82823fa 100644 --- a/keys.go +++ b/keys.go @@ -91,6 +91,8 @@ func (db *KeysDB) writeKeysToFile(keysTrie *trie.Trie[bit256.Key, crypto.PrivKey } } +// integrateKeysIntoTrie converts the provided privkeys into kademlia ids and +// adds them to the provided binary trie func integrateKeysIntoTrie(keysTrie *trie.Trie[bit256.Key, crypto.PrivKey], keys []crypto.PrivKey) { for _, key := range keys { if key == nil { @@ -115,6 +117,10 @@ func genKey() crypto.PrivKey { return priv } +// getMatchingKeys will return a list of private keys whose kademlia IDs match +// the provided list of prefixes, by looking for matches in the provided binary +// trie, and if no match by bruteforcing new keys until a match is found. All +// keys generated during bruteforces are added to the trie. func getMatchingKeys(prefixes []bitstr.Key, keysTrie *trie.Trie[bit256.Key, crypto.PrivKey]) []crypto.PrivKey { // generate a random mask to be used as key suffix. If the same suffix is // used for all keys, the trie will be unbalanced @@ -156,6 +162,9 @@ func getMatchingKeys(prefixes []bitstr.Key, keysTrie *trie.Trie[bit256.Key, cryp return keys } +// MatchingKeys returns a list of private keys whose kademlia IDs match the +// provided list of prefixes. It also write back to disk the returned private +// keys for future use. func (db *KeysDB) MatchingKeys(prefixes []bitstr.Key, returned []crypto.PrivKey) []crypto.PrivKey { // read keys from disk keysTrie := db.readKeysFromFile() diff --git a/queen.go b/queen.go index 3f68362..2757f00 100644 --- a/queen.go +++ b/queen.go @@ -132,6 +132,7 @@ func (q *Queen) freePort(port int) { } } +// Run makes the queen orchestrate the ant nest func (q *Queen) Run(ctx context.Context) error { logger.Infoln("Queen.Run started") defer logger.Infoln("Queen.Run completing") @@ -271,20 +272,26 @@ func (q *Queen) persistLiveAntsKeys() { logger.Debugf("Number of antsKeys persisted: %d", len(antsKeys)) } +// routine must be called periodically to ensure that the number and positions +// of ants is still relevant given the latest observed DHT servers. func (q *Queen) routine(ctx context.Context) { + // get online DHT servers from the Nebula database networkPeers, err := q.nebulaDB.GetLatestPeerIds(ctx) if err != nil { logger.Warn("unable to get latest peer ids from Nebula ", err) return } + // build a binary trie from the network peers networkTrie := trie.New[bit256.Key, peer.ID]() for _, peerId := range networkPeers { networkTrie.Add(PeerIDToKadID(peerId), peerId) } - // zones correspond to the prefixes of the tries that must be covered by an ant - zones := trieZones(networkTrie, q.cfg.BucketSize) + // zones correspond to the prefixes of the tries that must be covered by an + // ant. One ant's kademlia ID MUST match each of the returned prefixes in + // order to ensure global coverage. + zones := trieZones(networkTrie, q.cfg.BucketSize-1) logger.Debugf("%d zones must be covered by ants", len(zones)) // convert string zone to bitstr.Key @@ -294,20 +301,20 @@ func (q *Queen) routine(ctx context.Context) { } var excessAntsIndices []int - // remove keys covered by existing ants, and mark useless ants + // remove keys covered by existing ants, and mark ants that aren't needed anymore for index, ant := range q.ants { matchedKey := false for i, missingKey := range missingKeys { if key.CommonPrefixLength(ant.kadID, missingKey) == missingKey.BitLen() { - // remove key from missingKeys since covered by current ant + // remove key from missingKeys since covered by exisitng missingKeys = append(missingKeys[:i], missingKeys[i+1:]...) matchedKey = true break } } if !matchedKey { - // this ant is not needed anymore - // two ants end up in the same zone, the younger one is discarded + // This ant is not needed anymore. Two ants end up in the same zone, the + // younger one is discarded. excessAntsIndices = append(excessAntsIndices, index) } } @@ -315,7 +322,7 @@ func (q *Queen) routine(ctx context.Context) { logger.Debugf("need %d extra ants", len(missingKeys)) logger.Debugf("removing %d ants", len(excessAntsIndices)) - // remove ants + // kill ants that are not needed anymore returnedKeys := make([]crypto.PrivKey, len(excessAntsIndices)) for i, index := range excessAntsIndices { ant := q.ants[index] @@ -330,8 +337,9 @@ func (q *Queen) routine(ctx context.Context) { q.freePort(port) } - // add missing ants + // get libp2p private keys whose kademlia id matches the missing key prefixes privKeys := q.keysDB.MatchingKeys(missingKeys, returnedKeys) + // add missing ants for _, key := range privKeys { port, err := q.takeAvailablePort() if err != nil { @@ -364,19 +372,41 @@ func (q *Queen) routine(ctx context.Context) { logger.Debug("queen routine over") } +// trieZones is a recursive function returning the prefixes that the ants must +// have in order to cover the complete keyspace. The prefixes correspond to +// subtries/branches, that have at most zoneSize (=bucketSize-1) peers. They +// must be the largest subtries with at most zoneSize peers. The returned +// prefixes cover the whole keyspace even if they don't all have the same +// length. +// +// e.g ["00", "010", "001", "1"] is a valid return value since the prefixes +// cover all possible values. In this specific example, the trie would be +// unbalanced, and would have only a few peers with the prefix "1", than +// starting with "0". func trieZones[K kad.Key[K], T any](t *trie.Trie[K, T], zoneSize int) []string { if t.Size() < zoneSize { + // We've hit the bottom of the trie. There are less peers in the (sub)trie + // than the zone size, hence spawning a single ant is enough to cover this + // (sub)trie. + // + // Since we are't aware of the subtrie location in the greater trie, it is + // the parent's responsibility to add the prefix. return []string{""} } + // a trie is composed of two branches, respectively starting with "0" and + // "1". Take the returned prefixes from each branch (subtrie), and add the + // corresponding prefix before returning them to the parent. zones := []string{} if !t.Branch(0).IsLeaf() { for _, zone := range trieZones(t.Branch(0), zoneSize) { zones = append(zones, "0"+zone) } } - for _, zone := range trieZones(t.Branch(1), zoneSize) { - zones = append(zones, "1"+zone) + if !t.Branch(1).IsLeaf() { + for _, zone := range trieZones(t.Branch(1), zoneSize) { + zones = append(zones, "1"+zone) + } } return zones } diff --git a/util.go b/util.go index cd6bef9..b1108eb 100644 --- a/util.go +++ b/util.go @@ -10,6 +10,7 @@ import ( "github.com/probe-lab/go-libdht/kad/key/bitstr" ) +// PeerIDToKadID converts a libp2p peer.ID to its binary kademlia identifier func PeerIDToKadID(pid peer.ID) bit256.Key { hasher, err := mhreg.GetHasher(mh.SHA2_256) if err != nil {