diff --git a/bpf/process/policy_filter.h b/bpf/process/policy_filter.h index 61563b6953a..083c0333d1e 100644 --- a/bpf/process/policy_filter.h +++ b/bpf/process/policy_filter.h @@ -8,6 +8,7 @@ #define POLICY_FILTER_MAX_POLICIES 128 #define POLICY_FILTER_MAX_NAMESPACES 1024 +#define POLICY_FILTER_MAX_CGROUP_IDS 512 struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); @@ -29,17 +30,39 @@ struct { }); } policy_filter_maps SEC(".maps"); +// This map keeps exactly the same information as policy_filter_maps +// but keeps the reverse mappings. i.e. +// policy_filter_maps maps policy_id to cgroup_ids +// policy_filter_reverse_maps maps cgroup_id to policy_ids +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, POLICY_FILTER_MAX_CGROUP_IDS); + __uint(key_size, sizeof(__u64)); /* cgroup id */ + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, POLICY_FILTER_MAX_POLICIES); + __type(key, __u32); /* policy id */ + __type(value, __u8); /* empty */ + }); +} policy_filter_reverse_maps SEC(".maps"); + // policy_filter_check checks whether the policy applies on the current process. // Returns true if it does, false otherwise. FUNC_INLINE bool policy_filter_check(u32 policy_id) { void *policy_map; - __u64 cgroupid; + __u64 cgroupid = 0; if (!policy_id) return true; + // we just want to make sure that policy_filter_reverse_maps + // is part of the object file in order to read the map + // spec from the user space + map_lookup_elem(&policy_filter_reverse_maps, &cgroupid); + policy_map = map_lookup_elem(&policy_filter_maps, &policy_id); if (!policy_map) return false; diff --git a/cmd/tetra/debug/dump.go b/cmd/tetra/debug/dump.go index e10ae00bd43..09be8abb3a0 100644 --- a/cmd/tetra/debug/dump.go +++ b/cmd/tetra/debug/dump.go @@ -181,18 +181,33 @@ func PolicyfilterState(fname string) { return } - if len(data) == 0 { + fmt.Println("--- Direct Map ---") + + if len(data.Direct) == 0 { fmt.Printf("(empty)\n") - return } - for polId, cgIDs := range data { + for polId, cgIDs := range data.Direct { ids := make([]string, 0, len(cgIDs)) for id := range cgIDs { ids = append(ids, strconv.FormatUint(uint64(id), 10)) } fmt.Printf("%d: %s\n", polId, strings.Join(ids, ",")) } + + fmt.Println("--- Reverse Map ---") + + if len(data.Reverse) == 0 { + fmt.Printf("(empty)\n") + } + + for cgIDs, polIds := range data.Reverse { + ids := make([]string, 0, len(polIds)) + for id := range polIds { + ids = append(ids, strconv.FormatUint(uint64(id), 10)) + } + fmt.Printf("%d: %s\n", cgIDs, strings.Join(ids, ",")) + } } func NamespaceState(fname string) error { diff --git a/pkg/policyfilter/map.go b/pkg/policyfilter/map.go index 99bffc8ba37..78569a890e3 100644 --- a/pkg/policyfilter/map.go +++ b/pkg/policyfilter/map.go @@ -17,14 +17,16 @@ import ( ) const ( - MapName = "policy_filter_maps" + MapName = "policy_filter_maps" + RevMapName = "policy_filter_reverse_maps" ) // map operations used by policyfilter. // PfMap is a simple wrapper for ebpf.Map so that we can write methods for it type PfMap struct { - *ebpf.Map + dir *ebpf.Map // policy_filter_maps + rev *ebpf.Map // policy_filter_reverse_maps } // newMap returns a new policy filter map. @@ -46,7 +48,7 @@ func newPfMap() (PfMap, error) { // fix the spec here. policyMapSpec.InnerMap.MaxEntries = polMapSize - ret, err := ebpf.NewMap(policyMapSpec) + retDir, err := ebpf.NewMap(policyMapSpec) if err != nil { return PfMap{}, err } @@ -55,26 +57,110 @@ func newPfMap() (PfMap, error) { pinPath := filepath.Join(mapDir, MapName) os.Remove(pinPath) os.Mkdir(mapDir, os.ModeDir) - err = ret.Pin(pinPath) + err = retDir.Pin(pinPath) if err != nil { - ret.Close() + retDir.Close() return PfMap{}, fmt.Errorf("failed to pin policy filter map in %s: %w", pinPath, err) } - return PfMap{ret}, err + policyMapRevSpec, ok := spec.Maps[RevMapName] + if !ok { + return PfMap{}, fmt.Errorf("%s not found in %s", RevMapName, objPath) + } + + retRev, err := ebpf.NewMap(policyMapRevSpec) + if err != nil { + return PfMap{}, err + } + + pinRevPath := filepath.Join(mapDir, RevMapName) + os.Remove(pinRevPath) + err = retRev.Pin(pinRevPath) + if err != nil { + retDir.Close() + retRev.Clone() + return PfMap{}, fmt.Errorf("failed to pin policy filter map in %s: %w", pinPath, err) + } + + return PfMap{dir: retDir, rev: retRev}, err } // release closes the policy filter bpf map and remove (unpin) the bpffs file func (m PfMap) release() error { - if err := m.Close(); err != nil { + if err := m.dir.Close(); err != nil { return err } // nolint:revive // ignore "if-return: redundant if just return error" for clarity - if err := m.Unpin(); err != nil { + if err := m.dir.Unpin(); err != nil { + return err + } + + if err := m.rev.Close(); err != nil { return err } + // nolint:revive // ignore "if-return: redundant if just return error" for clarity + if err := m.rev.Unpin(); err != nil { + return err + } + + return nil +} + +func (m polMap) addCgroupIDsReverse(polID PolicyID, cgIDs []CgroupID) error { + for _, cgID := range cgIDs { + if err := addReverseMapping(m.Reverse, polID, cgID); err != nil { + return err + } + } + return nil +} + +func addReverseMapping(m *ebpf.Map, polID PolicyID, cgID CgroupID) error { + var id uint32 + err := m.Lookup(cgID, &id) + if err == nil { // inner map exists + inMap, err := ebpf.NewMapFromID(ebpf.MapID(id)) + if err != nil { + return fmt.Errorf("error opening inner map: %w", err) + } + defer inMap.Close() + + var zero uint8 + if err := inMap.Update(polID, zero, ebpf.UpdateAny); err != nil { + return fmt.Errorf("error updating inner map: %w", err) + } + + return nil + } + + // inner map does not exist + name := fmt.Sprintf("policy_reverse_%d_map", cgID) + innerSpec := &ebpf.MapSpec{ + Name: name, + Type: ebpf.Hash, + KeySize: uint32(unsafe.Sizeof(PolicyID(0))), + ValueSize: uint32(1), + MaxEntries: uint32(polMaxPolicies), + } + + inner, err := ebpf.NewMap(innerSpec) + if err != nil { + return fmt.Errorf("failed to create cgroup (id=%d) map: %w", cgID, err) + } + defer inner.Close() + + var zero uint8 + if err := inner.Update(polID, zero, ebpf.UpdateAny); err != nil { + return fmt.Errorf("error updating inner map: %w", err) + } + + if err := m.Update(cgID, uint32(inner.FD()), ebpf.UpdateNoExist); err != nil { + inner.Close() + return fmt.Errorf("failed to insert inner policy (id=%d) map: %w", polID, err) + } + return nil } @@ -95,25 +181,98 @@ func (m PfMap) newPolicyMap(polID PolicyID, cgIDs []CgroupID) (polMap, error) { } // update inner map with ids - ret := polMap{inner} + ret := polMap{ + Inner: inner, + Reverse: m.rev, + } if err := ret.addCgroupIDs(cgIDs); err != nil { - ret.Close() + ret.Inner.Close() return polMap{}, fmt.Errorf("failed to update policy (id=%d): %w", polID, err) } // update outer map // NB(kkourt): use UpdateNoExist because we expect only a single policy with a given id - if err := m.Update(polID, uint32(ret.FD()), ebpf.UpdateNoExist); err != nil { - ret.Close() + if err := m.dir.Update(polID, uint32(ret.Inner.FD()), ebpf.UpdateNoExist); err != nil { + ret.Inner.Close() return polMap{}, fmt.Errorf("failed to insert inner policy (id=%d) map: %w", polID, err) } + // update reverse map + for _, cgID := range cgIDs { + if err := addReverseMapping(m.rev, polID, cgID); err != nil { + return polMap{}, fmt.Errorf("failed to update reverse map: %w", err) + } + } + return ret, nil } -func (m PfMap) readAll() (map[PolicyID]map[CgroupID]struct{}, error) { +func getMapSize(m *ebpf.Map) (uint32, error) { + var key uint32 + var val uint8 + var mapSize uint32 + + inIter := m.Iterate() + for inIter.Next(&key, &val) { + mapSize++ + } + + if err := inIter.Err(); err != nil { + return 0, fmt.Errorf("error iterating inner map: %w", err) + } + + return mapSize, nil +} + +func (m PfMap) deletePolicyIdInReverse(polID PolicyID) error { + var key CgroupID + var id uint32 - readInner := func(id uint32) (map[CgroupID]struct{}, error) { + cgIDs := []CgroupID{} + iter := m.rev.Iterate() + for iter.Next(&key, &id) { + inMap, err := ebpf.NewMapFromID(ebpf.MapID(id)) + if err != nil { + return fmt.Errorf("error opening inner map: %w", err) + } + defer inMap.Close() + + // We don't know if this exists if this does not exist this + // will return an error, but this is fine. + inMap.Delete(polID) + + // now we need to check the size of the inner map + // if this is 0 we should also remove the outer entry + mapSize, err := getMapSize(inMap) + if err != nil { + return fmt.Errorf("error getting inner map size: %w", err) + } + if mapSize == 0 { + cgIDs = append(cgIDs, key) + } + } + + if err := iter.Err(); err != nil { + return fmt.Errorf("deletePolicyIdInReverse: error iterating reverse map: %w", err) + } + + // delete empty outer maps + for _, cgID := range cgIDs { + if err := m.rev.Delete(cgID); err != nil { + return fmt.Errorf("error deleting outer map entry: %w", err) + } + } + + return nil +} + +type PfMapDump struct { + Direct map[PolicyID]map[CgroupID]struct{} + Reverse map[CgroupID]map[PolicyID]struct{} +} + +func readAll[K PolicyID | CgroupID, V PolicyID | CgroupID](m *ebpf.Map) (map[K]map[V]struct{}, error) { + readInner := func(id uint32) (map[V]struct{}, error) { inMap, err := ebpf.NewMapFromID(ebpf.MapID(id)) if err != nil { return nil, fmt.Errorf("error opening inner map: %w", err) @@ -121,10 +280,10 @@ func (m PfMap) readAll() (map[PolicyID]map[CgroupID]struct{}, error) { defer inMap.Close() inIter := inMap.Iterate() - var key CgroupID + var key V var val uint8 - ret := map[CgroupID]struct{}{} + ret := map[V]struct{}{} for inIter.Next(&key, &val) { ret[key] = struct{}{} } @@ -137,17 +296,17 @@ func (m PfMap) readAll() (map[PolicyID]map[CgroupID]struct{}, error) { } - ret := make(map[PolicyID]map[CgroupID]struct{}) - var key PolicyID + ret := make(map[K]map[V]struct{}) + var key K var id uint32 iter := m.Iterate() for iter.Next(&key, &id) { - cgids, err := readInner(id) + policyids, err := readInner(id) if err != nil { return nil, err } - ret[key] = cgids + ret[key] = policyids } if err := iter.Err(); err != nil { @@ -157,9 +316,24 @@ func (m PfMap) readAll() (map[PolicyID]map[CgroupID]struct{}, error) { return ret, nil } +func (m PfMap) readAll() (PfMapDump, error) { + d, err := readAll[PolicyID, CgroupID](m.dir) + if err != nil { + return PfMapDump{}, fmt.Errorf("error reading direct map: %w", err) + } + + r, err := readAll[CgroupID, PolicyID](m.rev) + if err != nil { + return PfMapDump{}, fmt.Errorf("error reading reverse map: %w", err) + } + + return PfMapDump{Direct: d, Reverse: r}, nil +} + // polMap is a simple wrapper for ebpf.Map so that we can write methods for it type polMap struct { - *ebpf.Map + Inner *ebpf.Map + Reverse *ebpf.Map } type batchError struct { @@ -181,7 +355,7 @@ func (e *batchError) Unwrap() error { func (m polMap) addCgroupIDs(cgIDs []CgroupID) error { var zero uint8 for i, cgID := range cgIDs { - if err := m.Update(&cgID, zero, ebpf.UpdateAny); err != nil { + if err := m.Inner.Update(&cgID, zero, ebpf.UpdateAny); err != nil { return &batchError{ SuccCount: i, err: fmt.Errorf("failed to update policy map (cgroup id: %d): %w", cgID, err), @@ -192,23 +366,63 @@ func (m polMap) addCgroupIDs(cgIDs []CgroupID) error { return nil } -// addCgroupIDs delete cgroups ids from the policy map +// delCgroupIDs delete cgroups ids from the policy map // todo: use batch operations when supported -func (m polMap) delCgroupIDs(cgIDs []CgroupID) error { +func (m polMap) delCgroupIDs(polID PolicyID, cgIDs []CgroupID) error { + rmRevCgIDs := []CgroupID{} for i, cgID := range cgIDs { - if err := m.Delete(&cgID); err != nil { + if err := m.Inner.Delete(&cgID); err != nil { return &batchError{ SuccCount: i, err: fmt.Errorf("failed to delete items from policy map (cgroup id: %d): %w", cgID, err), } } + rmRevCgIDs = append(rmRevCgIDs, cgID) + } + + // update reverse map + for _, cgID := range rmRevCgIDs { + var id uint32 + if err := m.Reverse.Lookup(cgID, &id); err != nil { // inner map does not exists + continue + } + + inMap, err := ebpf.NewMapFromID(ebpf.MapID(id)) + if err != nil { + return fmt.Errorf("error opening inner map: %w", err) + } + defer inMap.Close() + + var zero uint8 + if err := inMap.Lookup(polID, &zero); err != nil { + continue + } + + // policy exists for that cgrpid so delete that + inMap.Delete(polID) + + // get the inner map size + sz, err := getMapSize(inMap) + if err != nil { + return fmt.Errorf("error getting inner map size: %w", err) + } + + // we can now delete that outter entry + if sz == 0 { + m.Reverse.Delete(cgID) + } } return nil } func OpenMap(fname string) (PfMap, error) { - m, err := ebpf.LoadPinnedMap(fname, &ebpf.LoadPinOptions{ + base := filepath.Base(fname) + if base != MapName { + return PfMap{}, fmt.Errorf("unexpected policy filter map name: %s", base) + } + + d, err := ebpf.LoadPinnedMap(fname, &ebpf.LoadPinOptions{ ReadOnly: true, }) @@ -216,17 +430,34 @@ func OpenMap(fname string) (PfMap, error) { return PfMap{}, err } - return PfMap{m}, err + dir := filepath.Dir(fname) + reverseMapPath := filepath.Join(dir, RevMapName) + r, err := ebpf.LoadPinnedMap(reverseMapPath, &ebpf.LoadPinOptions{ + ReadOnly: true, + }) + + if err != nil { + d.Close() + return PfMap{}, err + } + + return PfMap{dir: d, rev: r}, err } -func (m PfMap) Dump() (map[PolicyID]map[CgroupID]struct{}, error) { +func (m PfMap) Close() { + m.dir.Close() + m.rev.Close() +} + +func (m PfMap) Dump() (PfMapDump, error) { return m.readAll() } func (m PfMap) AddCgroup(polID PolicyID, cgID CgroupID) error { + // direct map update var innerID uint32 - if err := m.Lookup(&polID, &innerID); err != nil { + if err := m.dir.Lookup(&polID, &innerID); err != nil { return fmt.Errorf("failed to lookup policy id %d: %w", polID, err) } @@ -241,5 +472,10 @@ func (m PfMap) AddCgroup(polID PolicyID, cgID CgroupID) error { return fmt.Errorf("error updating inner map: %w", err) } + // reverse map update + if err := addReverseMapping(m.rev, polID, cgID); err != nil { + return fmt.Errorf("error updating reverse map: %w", err) + } + return nil } diff --git a/pkg/policyfilter/map_test.go b/pkg/policyfilter/map_test.go index 0b12ff3145b..f247f02e01a 100644 --- a/pkg/policyfilter/map_test.go +++ b/pkg/policyfilter/map_test.go @@ -19,9 +19,20 @@ func requirePfmEqualTo(t *testing.T, m PfMap, val map[uint64][]uint64) { } } + checkReverseVals := map[CgroupID]map[PolicyID]struct{}{} + for k, ids := range val { + for _, id := range ids { + if checkReverseVals[CgroupID(id)] == nil { + checkReverseVals[CgroupID(id)] = map[PolicyID]struct{}{} + } + checkReverseVals[CgroupID(id)][PolicyID(k)] = struct{}{} + } + } + mapVals, err := m.readAll() require.NoError(t, err) - require.EqualValues(t, checkVals, mapVals) + require.EqualValues(t, checkVals, mapVals.Direct) + require.EqualValues(t, checkReverseVals, mapVals.Reverse) } // TestPfMapOps tests some simple map operations @@ -42,9 +53,11 @@ func TestPfMapOps(t *testing.T) { err = pm1.addCgroupIDs([]CgroupID{30}) require.NoError(t, err) + err = addReverseMapping(pm1.Reverse, polID1, 30) + require.NoError(t, err) requirePfmEqualTo(t, pfm, map[uint64][]uint64{100: {10, 20, 30}}) - err = pm1.delCgroupIDs([]CgroupID{20, 10}) + err = pm1.delCgroupIDs(polID1, []CgroupID{20, 10}) require.NoError(t, err) requirePfmEqualTo(t, pfm, map[uint64][]uint64{100: {30}}) diff --git a/pkg/policyfilter/state.go b/pkg/policyfilter/state.go index a109f3d6678..120fa1d4ae0 100644 --- a/pkg/policyfilter/state.go +++ b/pkg/policyfilter/state.go @@ -75,6 +75,9 @@ const ( // should be large enough to accommodate the number of containers // running in a system. polMapSize = 32768 + + // same as POLICY_FILTER_MAX_POLICIES in policy_filter.h + polMaxPolicies = 128 ) type PolicyID uint32 @@ -493,15 +496,18 @@ func (m *state) DelPolicy(polID PolicyID) error { defer m.mu.Unlock() policy := m.delPolicy(polID) if policy != nil { - policy.polMap.Close() + policy.polMap.Inner.Close() } else { m.log.WithField("policy-id", polID).Warn("DelPolicy: policy internal map not found") } - if err := m.pfMap.Delete(polID); err != nil { + if err := m.pfMap.dir.Delete(polID); err != nil { m.log.WithField("policy-id", polID).Warn("DelPolicy: failed to remove policy from external map") } + // update reverse map + m.pfMap.deletePolicyIdInReverse(polID) + for i := range m.pods { pod := &m.pods[i] pod.delCachedPolicy(policy.id) @@ -649,6 +655,14 @@ func (m *state) addPodContainers(pod *podInfo, containerIDs []string, "pod-id": pod.id, "cgroup-ids": matchingCgIDs, }).Warn("failed to update policy map") + } else { + if err := pol.polMap.addCgroupIDsReverse(pol.id, matchingCgIDs); err != nil { + m.log.WithError(err).WithFields(logrus.Fields{ + "policy-id": pol.id, + "pod-id": pod.id, + "cgroup-ids": matchingCgIDs, + }).Warn("failed to update reverse policy map") + } } } } @@ -723,7 +737,7 @@ func (m *state) delPodCgroupIDsFromPolicyMaps(pod *podInfo, containers []contain // try to find containers in the pod matching this policy // this way, we only remove containers that are actually present in the policy cgroupIDs := pol.matchingContainersCgroupIDs(containers) - if err := pol.polMap.delCgroupIDs(cgroupIDs); err != nil { + if err := pol.polMap.delCgroupIDs(pol.id, cgroupIDs); err != nil { // NB: depending on the error, we might want to schedule some retries here m.log.WithError(err).WithFields(logrus.Fields{ "policy-id": pol.id, @@ -825,12 +839,22 @@ func (m *state) applyPodPolicyDiff(pod *podInfo, polDiff *policiesDiffRes) { "cgroup-ids": cgroupIDs, "reason": "labels change caused policy to match", }).Warn("failed to update policy map") + } else { + // update reverse map if addCgroupIDs succeeds + if err := addPol.polMap.addCgroupIDsReverse(addPol.id, cgroupIDs); err != nil { + m.log.WithError(err).WithFields(logrus.Fields{ + "policy-id": addPol.id, + "pod-id": pod.id, + "cgroup-ids": cgroupIDs, + "reason": "labels change caused policy to match", + }).Warn("failed to update reverse policy map") + } } } for _, delPol := range polDiff.deletedPolicies { cgroupIDs = delPol.matchingContainersCgroupIDs(pod.containers) - if err := delPol.polMap.delCgroupIDs(cgroupIDs); err != nil { + if err := delPol.polMap.delCgroupIDs(delPol.id, cgroupIDs); err != nil { m.log.WithError(err).WithFields(logrus.Fields{ "policy-id": delPol.id, "pod-id": pod.id,