From 257fb999c3f7c2ab88d42d624043c6d03b249138 Mon Sep 17 00:00:00 2001 From: rabbitstack Date: Sun, 1 Jun 2025 21:51:32 +0200 Subject: [PATCH] perf(rule-engine): More efficient filter indexing Move away from FNV hash-based filter indexing to two separate buckets - one for event types filters and one for category filters. --- pkg/event/category.go | 51 +++++++++++++++++ pkg/event/category_test.go | 34 ++++++++++++ pkg/filter/ql/literal.go | 23 ++++---- pkg/rules/engine.go | 110 +++++++++++++------------------------ pkg/rules/engine_test.go | 13 +---- 5 files changed, 139 insertions(+), 92 deletions(-) create mode 100644 pkg/event/category_test.go diff --git a/pkg/event/category.go b/pkg/event/category.go index d9d45e966..119dd55bd 100644 --- a/pkg/event/category.go +++ b/pkg/event/category.go @@ -19,6 +19,7 @@ package event import ( + "github.com/bits-and-blooms/bitset" "github.com/rabbitstack/fibratus/pkg/util/hashers" ) @@ -69,6 +70,56 @@ func (c Category) Hash() uint32 { return hashers.FnvUint32([]byte(c)) } +// CategoryMasks allows setting and checking the category bit mask. +type CategoryMasks struct { + bs bitset.BitSet +} + +// Set sets the category bit in the bit mask. +func (m *CategoryMasks) Set(c Category) { + m.bs.Set(uint(c.Index())) +} + +// Test checks if the given category bit is set. +func (m *CategoryMasks) Test(c Category) bool { + return m.bs.Test(uint(c.Index())) +} + +// MaxCategoryIndex designates the maximum category index. +const MaxCategoryIndex = 13 + +// Index returns a numerical category index. +func (c Category) Index() uint8 { + switch c { + case Registry: + return 1 + case File: + return 2 + case Net: + return 3 + case Process: + return 4 + case Thread: + return 5 + case Image: + return 6 + case Handle: + return 7 + case Driver: + return 8 + case Mem: + return 9 + case Object: + return 10 + case Threadpool: + return 11 + case Other: + return 12 + default: + return MaxCategoryIndex + } +} + // Categories returns all available categories. func Categories() []string { return []string{ diff --git a/pkg/event/category_test.go b/pkg/event/category_test.go new file mode 100644 index 000000000..6b6a6eedb --- /dev/null +++ b/pkg/event/category_test.go @@ -0,0 +1,34 @@ +/* + * Copyright 2021-present by Nedim Sabic Sabic + * https://www.fibratus.io + * All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package event + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestCategoryMasks(t *testing.T) { + var masks CategoryMasks + masks.Set(File) + masks.Set(Process) + + assert.True(t, masks.Test(File)) + assert.True(t, masks.Test(Process)) + assert.False(t, masks.Test(Registry)) +} diff --git a/pkg/filter/ql/literal.go b/pkg/filter/ql/literal.go index 208d9268a..5e68e0346 100644 --- a/pkg/filter/ql/literal.go +++ b/pkg/filter/ql/literal.go @@ -21,7 +21,6 @@ package ql import ( "github.com/rabbitstack/fibratus/pkg/event" "github.com/rabbitstack/fibratus/pkg/filter/fields" - "github.com/rabbitstack/fibratus/pkg/util/hashers" "golang.org/x/sys/windows" "net" "reflect" @@ -280,12 +279,13 @@ type SequenceExpr struct { // Alias represents the sequence expression alias. Alias string - buckets map[uint32]bool - types []event.Type + emasks event.EventsetMasks + cmasks event.CategoryMasks + + types []event.Type } func (e *SequenceExpr) init() { - e.buckets = make(map[uint32]bool) e.types = make([]event.Type, 0) e.BoundFields = make([]*BoundFieldLiteral, 0) } @@ -338,12 +338,15 @@ func (e *SequenceExpr) walk() { // initialize event type/category buckets for every such field for name, values := range stringFields { - if name == fields.EvtName || name == fields.EvtCategory { - for _, v := range values { - e.buckets[hashers.FnvUint32([]byte(v))] = true - if etype := event.NameToType(v); etype.Exists() { - e.types = append(e.types, etype) + for _, v := range values { + switch name { + case fields.EvtName: + for _, typ := range event.NameToTypes(v) { + e.emasks.Set(typ) + e.types = append(e.types, typ) } + case fields.EvtCategory: + e.cmasks.Set(event.Category(v)) } } } @@ -354,7 +357,7 @@ func (e *SequenceExpr) walk() { // to be evaluated when the incoming event type or category pertains to the one // defined in the field literal. func (e *SequenceExpr) IsEvaluable(evt *event.Event) bool { - return e.buckets[evt.Type.Hash()] || e.buckets[evt.Category.Hash()] + return e.emasks.Test(evt.Type.GUID(), evt.Type.HookID()) || e.cmasks.Test(evt.Category) } // HasBoundFields determines if this sequence expression references any bound field. diff --git a/pkg/rules/engine.go b/pkg/rules/engine.go index e9b6cc294..a3803d301 100644 --- a/pkg/rules/engine.go +++ b/pkg/rules/engine.go @@ -27,7 +27,6 @@ import ( "github.com/rabbitstack/fibratus/pkg/filter/fields" "github.com/rabbitstack/fibratus/pkg/ps" "github.com/rabbitstack/fibratus/pkg/rules/action" - "github.com/rabbitstack/fibratus/pkg/util/hashers" log "github.com/sirupsen/logrus" "sync" "time" @@ -53,7 +52,7 @@ var ( // the collection of compiled filters that are derived // from the loaded ruleset. type Engine struct { - filters compiledFilters + filters *filterset config *config.Config psnap ps.Snapshotter @@ -65,8 +64,6 @@ type Engine struct { compiler *compiler - hashCache *hashCache - matchFunc RuleMatchFunc } @@ -74,73 +71,35 @@ type ruleMatch struct { ctx *config.ActionContext } -// hashCache caches the event type/category FNV hashes -type hashCache struct { - mu sync.RWMutex - types map[event.Type]uint32 - cats map[event.Category]uint32 - lookupCategory bool -} - -func newHashCache() *hashCache { - return &hashCache{types: make(map[event.Type]uint32), cats: make(map[event.Category]uint32)} -} - -func (c *hashCache) typeHash(e *event.Event) uint32 { - c.mu.RLock() - defer c.mu.RUnlock() - return c.types[e.Type] -} - -func (c *hashCache) categoryHash(e *event.Event) uint32 { - c.mu.RLock() - defer c.mu.RUnlock() - return c.cats[e.Category] -} - -func (c *hashCache) addTypeHash(e *event.Event) uint32 { - c.mu.Lock() - defer c.mu.Unlock() - h := e.Type.Hash() - c.types[e.Type] = h - return h -} - -func (c *hashCache) addCategoryHash(e *event.Event) uint32 { - c.mu.Lock() - defer c.mu.Unlock() - h := e.Category.Hash() - c.cats[e.Category] = h - return h -} - type compiledFilter struct { filter filter.Filter config *config.FilterConfig ss *sequenceState } -type compiledFilters map[uint32][]*compiledFilter +// filterset contains compiled filters indexed by event type and category. +type filterset struct { + types map[event.Type][]*compiledFilter + categories map[uint8][]*compiledFilter +} -// collect collects all compiled filters for a -// particular event type or category. If no filters -// are found, the event is not asserted against the -// ruleset. -func (filters compiledFilters) collect(hashCache *hashCache, e *event.Event) []*compiledFilter { - h := hashCache.typeHash(e) - if h == 0 { - h = hashCache.addTypeHash(e) +func newFilterset() *filterset { + fs := &filterset{ + types: make(map[event.Type][]*compiledFilter), + categories: make(map[uint8][]*compiledFilter), } + return fs +} - if !hashCache.lookupCategory { - return filters[h] - } +func (f *filterset) empty() bool { + return len(f.types) == 0 && len(f.categories) == 0 +} - c := hashCache.categoryHash(e) - if c == 0 { - c = hashCache.addCategoryHash(e) +func (f *filterset) collect(e *event.Event) []*compiledFilter { + if len(f.categories) == 0 { + return f.types[e.Type] } - return append(filters[h], filters[c]...) + return append(f.types[e.Type], f.categories[e.Category.Index()]...) } func newCompiledFilter(f filter.Filter, c *config.FilterConfig, ss *sequenceState) *compiledFilter { @@ -172,14 +131,13 @@ func (f *compiledFilter) run(e *event.Event) bool { // NewEngine builds a fresh rules engine instance. func NewEngine(psnap ps.Snapshotter, config *config.Config) *Engine { e := &Engine{ - filters: make(map[uint32][]*compiledFilter), + filters: newFilterset(), matches: make([]*ruleMatch, 0), sequences: make([]*sequenceState, 0), psnap: psnap, config: config, scavenger: time.NewTicker(sequenceGcInterval), compiler: newCompiler(psnap, config), - hashCache: newHashCache(), } go e.gcSequences() @@ -217,6 +175,7 @@ func (e *Engine) Compile() (*config.RulesCompileResult, error) { // for more convenient tracking e.sequences = append(e.sequences, ss) } + if !fltr.isScoped() { log.Warnf("%q rule doesn't have "+ "event type or event category condition! "+ @@ -227,18 +186,21 @@ func (e *Engine) Compile() (*config.RulesCompileResult, error) { c.Name) continue } + // traverse all event name or category fields and determine // the event type from the filter field name expression. - // We end up with a map of rules indexed by event name - // or event category hash + // We end up with a map of rules indexed by event type + // or event category for name, values := range f.GetStringFields() { for _, v := range values { - if name == fields.EvtName || name == fields.EvtCategory { - if name == fields.EvtCategory { - e.hashCache.lookupCategory = true + switch name { + case fields.EvtName: + for _, typ := range event.NameToTypes(v) { + e.filters.types[typ] = append(e.filters.types[typ], fltr) } - hash := hashers.FnvUint32([]byte(v)) - e.filters[hash] = append(e.filters[hash], fltr) + case fields.EvtCategory: + category := event.Category(v) + e.filters.categories[category.Index()] = append(e.filters.categories[category.Index()], fltr) } } } @@ -258,10 +220,10 @@ func (*Engine) CanEnqueue() bool { return true } // Filters can be simple direct-event matchers or sequence states that // track an ordered series of events over a short period of time. func (e *Engine) ProcessEvent(evt *event.Event) (bool, error) { - if len(e.filters) == 0 { + if e.filters.empty() { return true, nil } - var matches bool + if evt.IsTerminateProcess() { // expire all sequences if the // process referenced in any @@ -270,7 +232,10 @@ func (e *Engine) ProcessEvent(evt *event.Event) (bool, error) { seq.expire(evt) } } - filters := e.filters.collect(e.hashCache, evt) + + filters := e.filters.collect(evt) + + var matches bool for _, f := range filters { match := f.run(evt) if !match { @@ -293,6 +258,7 @@ func (e *Engine) ProcessEvent(evt *event.Event) (bool, error) { return true, nil } } + return matches, nil } diff --git a/pkg/rules/engine_test.go b/pkg/rules/engine_test.go index 6484fb19a..80c269d9b 100644 --- a/pkg/rules/engine_test.go +++ b/pkg/rules/engine_test.go @@ -141,7 +141,8 @@ func TestCompileIndexableFilters(t *testing.T) { compileRules(t, e) - assert.Len(t, e.filters, 3) + assert.Len(t, e.filters.types, 5) + assert.Len(t, e.filters.categories, 1) var tests = []struct { evt *event.Event @@ -156,17 +157,9 @@ func TestCompileIndexableFilters(t *testing.T) { for _, tt := range tests { t.Run(tt.evt.Type.String(), func(t *testing.T) { - assert.Len(t, e.filters.collect(e.hashCache, tt.evt), tt.wants) + assert.Len(t, e.filters.collect(tt.evt), tt.wants) }) } - - assert.Len(t, e.hashCache.types, 4) - - evt := &event.Event{Type: event.RecvTCPv4} - - h1, h2 := e.hashCache.typeHash(evt), e.hashCache.categoryHash(evt) - assert.Equal(t, uint32(0xfa4dab59), h1) - assert.Equal(t, uint32(0x811c9dc5), h2) } func TestRunSimpleRules(t *testing.T) {