Skip to content

Commit 5a11847

Browse files
authored
add LookupFile and FileMap expr helpers (#4372)
1 parent a8a75f2 commit 5a11847

11 files changed

Lines changed: 778 additions & 2 deletions

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ require (
6666
github.com/nxadm/tail v1.4.11
6767
github.com/oschwald/geoip2-golang v1.9.0
6868
github.com/oschwald/maxminddb-golang v1.12.0
69+
github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745
6970
github.com/prometheus/client_golang v1.23.2
7071
github.com/prometheus/client_model v0.6.2
7172
github.com/prometheus/common v0.66.1
@@ -198,7 +199,6 @@ require (
198199
github.com/opencontainers/go-digest v1.0.0 // indirect
199200
github.com/opencontainers/image-spec v1.1.1 // indirect
200201
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
201-
github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745 // indirect
202202
github.com/pierrec/lz4/v4 v4.1.18 // indirect
203203
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
204204
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect

pkg/exprhelpers/expr_lib.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,20 @@ var exprFuncs = []exprCustomFunc{
118118
new(func(string, string) bool),
119119
},
120120
},
121+
{
122+
name: "FileMap",
123+
function: FileMap,
124+
signature: []any{
125+
new(func(string) []map[string]string),
126+
},
127+
},
128+
{
129+
name: "LookupFile",
130+
function: LookupFile,
131+
signature: []any{
132+
new(func(string, string) string),
133+
},
134+
},
121135
{
122136
name: "Upper",
123137
function: Upper,

pkg/exprhelpers/filemap.go

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
package exprhelpers
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"regexp"
7+
"slices"
8+
"strings"
9+
10+
aho_corasick "github.com/petar-dambovaliev/aho-corasick"
11+
log "github.com/sirupsen/logrus"
12+
)
13+
14+
// dataFileMap holds pre-parsed JSON-lines map data, keyed by filename.
15+
var dataFileMap map[string]*fileMapEntry
16+
17+
// validMapEntryTypes lists the recognized values for the mandatory "type" field in map data files.
18+
var validMapEntryTypes = []string{"equals", "contains", "regex"}
19+
20+
// fileMapEntry holds the parsed JSON-lines data and the pre-built match index.
21+
// The pattern field is always "pattern" and the value field is always "tag".
22+
type fileMapEntry struct {
23+
filename string // data file name, kept for log/error messages
24+
rows []map[string]string
25+
index *matchIndex // built eagerly via FileInit, always uses "pattern" field
26+
}
27+
28+
// matchIndex holds the pre-built matching structures for a given pattern field.
29+
type matchIndex struct {
30+
// O(1) map for "equals" entries (checked first).
31+
equalsMap map[string]int // exact pattern value → row index
32+
33+
// Aho-Corasick automaton for "contains" entries (O(|haystack|) matching, checked second).
34+
acAutomaton *aho_corasick.AhoCorasick
35+
acPatternToRow []int // AC pattern index → row index in fileMapEntry.rows
36+
37+
// Pre-compiled regexps for "regex" entries (checked last).
38+
regexPatterns []*regexp.Regexp
39+
regexToRow []int // regex slice index → row index in fileMapEntry.rows
40+
}
41+
42+
// fileMapInit parses a single JSON line and appends it to the fileMapEntry for the given filename.
43+
// Three fields are mandatory: "pattern", "tag", and "type" (one of: "equals", "contains", "regex").
44+
func fileMapInit(filename string, line string) error {
45+
var record map[string]string
46+
if err := json.Unmarshal([]byte(line), &record); err != nil {
47+
return fmt.Errorf("failed to parse JSON line in %s: %w", filename, err)
48+
}
49+
50+
if record["pattern"] == "" {
51+
return fmt.Errorf("missing mandatory 'pattern' field in %s: %s", filename, line)
52+
}
53+
54+
if record["tag"] == "" {
55+
return fmt.Errorf("missing mandatory 'tag' field in %s: %s", filename, line)
56+
}
57+
58+
entryType := record["type"]
59+
if entryType == "" {
60+
return fmt.Errorf("missing mandatory 'type' field in %s: %s", filename, line)
61+
}
62+
63+
if !slices.Contains(validMapEntryTypes, entryType) {
64+
return fmt.Errorf("unknown entry type '%s' in %s (supported: %s): %s",
65+
entryType, filename, strings.Join(validMapEntryTypes, ", "), line)
66+
}
67+
68+
if entryType == "regex" {
69+
if _, err := regexp.Compile(record["pattern"]); err != nil {
70+
log.Warningf("invalid regex pattern in %s: %s", filename, err)
71+
}
72+
}
73+
74+
if dataFileMap[filename] == nil {
75+
dataFileMap[filename] = &fileMapEntry{filename: filename}
76+
}
77+
78+
dataFileMap[filename].rows = append(dataFileMap[filename].rows, record)
79+
80+
return nil
81+
}
82+
83+
// buildIndex builds the matchIndex from the parsed rows.
84+
// Called once from FileInit after all lines are loaded.
85+
// Rows are partitioned by their "type" field (validated at load time):
86+
// - "equals" → inserted into equalsMap for O(1) lookup
87+
// - "contains" → fed to Aho-Corasick automaton builder
88+
// - "regex" → compiled to *regexp.Regexp
89+
func (e *fileMapEntry) buildIndex() {
90+
idx := &matchIndex{
91+
equalsMap: make(map[string]int),
92+
}
93+
94+
var acPatterns []string
95+
96+
for i, row := range e.rows {
97+
val := row["pattern"]
98+
99+
switch row["type"] {
100+
case "equals":
101+
if prev, exists := idx.equalsMap[val]; exists {
102+
log.Warningf("file '%s': duplicate equals pattern '%s' (row %d overrides row %d)", e.filename, val, i, prev)
103+
}
104+
105+
idx.equalsMap[val] = i
106+
case "regex":
107+
re, err := regexp.Compile(val)
108+
if err != nil {
109+
log.Errorf("file '%s': invalid regex pattern '%s' in row %d: %s", e.filename, val, i, err)
110+
continue
111+
}
112+
113+
idx.regexPatterns = append(idx.regexPatterns, re)
114+
idx.regexToRow = append(idx.regexToRow, i)
115+
case "contains":
116+
acPatterns = append(acPatterns, val)
117+
idx.acPatternToRow = append(idx.acPatternToRow, i)
118+
default:
119+
continue
120+
}
121+
}
122+
123+
log.Infof("file '%s': loaded %d equals, %d contains, %d regex patterns",
124+
e.filename, len(idx.equalsMap), len(acPatterns), len(idx.regexPatterns))
125+
126+
if len(acPatterns) > 0 {
127+
builder := aho_corasick.NewAhoCorasickBuilder(aho_corasick.Opts{
128+
AsciiCaseInsensitive: false,
129+
MatchOnlyWholeWords: false,
130+
MatchKind: aho_corasick.LeftMostFirstMatch,
131+
DFA: true,
132+
})
133+
134+
ac := builder.Build(acPatterns)
135+
idx.acAutomaton = &ac
136+
}
137+
138+
e.index = idx
139+
}
140+
141+
// FileMap returns the pre-parsed JSON-lines data for the given filename.
142+
// Each element is a map[string]string representing one JSON line.
143+
// func FileMap(filename string) []map[string]string
144+
func FileMap(params ...any) (any, error) {
145+
filename := params[0].(string)
146+
147+
entry, ok := dataFileMap[filename]
148+
if !ok {
149+
log.Errorf("file '%s' (type:map) not found in expr library", filename)
150+
return []map[string]string{}, nil
151+
}
152+
153+
return entry.rows, nil
154+
}
155+
156+
// LookupFile searches for the first entry in the map file whose "pattern" value
157+
// matches the haystack. Matching is done in priority order:
158+
// 1. "equals" entries via O(1) hash map lookup
159+
// 2. "contains" entries via Aho-Corasick substring matching
160+
// 3. "regex" entries via pre-compiled regexp
161+
//
162+
// Returns the corresponding "tag" value, or "" if no match.
163+
// func LookupFile(haystack string, filename string) string
164+
func LookupFile(params ...any) (any, error) {
165+
haystack := params[0].(string)
166+
filename := params[1].(string)
167+
168+
entry, ok := dataFileMap[filename]
169+
if !ok {
170+
log.Errorf("file '%s' (type:map) not found in expr library", filename)
171+
return "", nil
172+
}
173+
174+
idx := entry.index
175+
if idx == nil {
176+
return "", nil
177+
}
178+
179+
// Phase 1: Equals map (O(1) exact match)
180+
if rowIdx, ok := idx.equalsMap[haystack]; ok {
181+
return entry.rows[rowIdx]["tag"], nil
182+
}
183+
184+
// Phase 2: Aho-Corasick for "contains" entries
185+
if idx.acAutomaton != nil {
186+
iter := idx.acAutomaton.Iter(haystack)
187+
if match := iter.Next(); match != nil {
188+
rowIdx := idx.acPatternToRow[match.Pattern()]
189+
190+
return entry.rows[rowIdx]["tag"], nil
191+
}
192+
}
193+
194+
// Phase 3: Regex fallback
195+
for i, re := range idx.regexPatterns {
196+
if re.MatchString(haystack) {
197+
rowIdx := idx.regexToRow[i]
198+
199+
return entry.rows[rowIdx]["tag"], nil
200+
}
201+
}
202+
203+
return "", nil
204+
}

0 commit comments

Comments
 (0)