Skip to content

Commit 21f5ff5

Browse files
authored
Merge pull request #480 from buildkite/gordon/te-5578-auto-collect-git-metadata
[TE-5578] Auto-collect git metadata for plan command
2 parents cbc012a + 51cc9d9 commit 21f5ff5

8 files changed

Lines changed: 1099 additions & 41 deletions

File tree

README.md

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,47 @@ export BUILDKITE_TEST_ENGINE_SELECTION_STRATEGY=percent
7070
```
7171

7272
Command-line flags:
73+
```sh
74+
BKTEC_PREVIEW_SELECTION=true ./bktec plan --json --selection-strategy percent \
75+
--selection-param percent=40
76+
```
77+
78+
#### Automatic git metadata collection
79+
80+
When `--selection-strategy` is set, the `plan` command automatically collects
81+
git metadata from the current repository and sends it with the API request.
82+
This includes commit information (SHA, author, committer, message), diff data
83+
(files changed, numstat, full diff), and context fields (branch name, base
84+
branch, pipeline slug, build UUID).
85+
86+
The base branch for diff computation is resolved using a fallback chain:
87+
88+
1. Explicit override via `--metadata base_branch=<branch>`
89+
2. `BUILDKITE_PULL_REQUEST_BASE_BRANCH` (auto-set by Buildkite on PR builds)
90+
3. Auto-detection via `<remote>/HEAD`, then `<remote>/main`, then `<remote>/master`
91+
92+
Most users don't need to configure anything. Override `base_branch` only if
93+
your repository uses a non-standard default branch (for example, `develop` or `trunk`)
94+
and `<remote>/HEAD` isn't configured.
95+
96+
The `--remote` flag (default `origin`) controls which git remote is used for
97+
base branch detection. You can also set `BUILDKITE_TEST_ENGINE_REMOTE`.
98+
99+
Auto-collected values are merged with any explicit `--metadata` flags you
100+
provide. Your explicit values always take precedence.
101+
102+
#### Manual metadata overrides
103+
104+
Use `--metadata key=value` to pass additional metadata or override
105+
auto-collected values. Use `--selection-param key=value` to pass strategy
106+
parameters. Both flags are repeatable. Values can be large and multiline.
107+
73108
```sh
74109
BKTEC_PREVIEW_SELECTION=true ./bktec plan --json --selection-strategy percent \
75110
--selection-param percent=40 \
76-
--metadata commit_message="fix flaky tests" \
77-
--metadata git_diff="$(git diff --no-color)"
111+
--metadata base_branch=develop
78112
```
79113

80-
Use repeated `--selection-param key=value` and `--metadata key=value` to pass multiple entries. Values can be large and multiline.
81114
`--selection-param` and `--metadata` are only supported as repeatable CLI flags.
82115

83116
### Preview: Commit Metadata Backfill

cli.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,20 @@ var remoteFlag = &cli.StringFlag{
390390
Category: "BACKFILL",
391391
Usage: "Git remote name for fetching missing commits and detecting default branch",
392392
Value: "origin",
393-
Sources: cli.EnvVars("BUILDKITE_TEST_ENGINE_BACKFILL_REMOTE"),
393+
Sources: cli.EnvVars("BUILDKITE_TEST_ENGINE_REMOTE", "BUILDKITE_TEST_ENGINE_BACKFILL_REMOTE"),
394+
Destination: &cfg.Remote,
395+
}
396+
397+
// planRemoteFlag is a selection-scoped variant of remoteFlag for the plan
398+
// command. It only reads BUILDKITE_TEST_ENGINE_REMOTE (not the backfill-
399+
// specific BUILDKITE_TEST_ENGINE_BACKFILL_REMOTE) to avoid the backfill
400+
// env var unexpectedly affecting plan behaviour.
401+
var planRemoteFlag = &cli.StringFlag{
402+
Name: "remote",
403+
Category: "PREVIEW: TEST SELECTION",
404+
Usage: "Git remote name for metadata auto-collection",
405+
Value: "origin",
406+
Sources: cli.EnvVars("BUILDKITE_TEST_ENGINE_REMOTE"),
394407
Destination: &cfg.Remote,
395408
}
396409

@@ -433,6 +446,7 @@ func previewSelectionFlags() []cli.Flag {
433446
selectionStrategyFlag,
434447
selectionParamFlag,
435448
metadataFlag,
449+
planRemoteFlag,
436450
}
437451
}
438452

docs/commit-metadata-backfill.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ This is useful when you want to generate and upload in separate steps or when re
8686
| `BUILDKITE_TEST_ENGINE_BASE_URL` | `--base-url` | `https://api.buildkite.com` | Buildkite API base URL |
8787
| `BUILDKITE_TEST_ENGINE_SKIP_DIFFS` | `--skip-diffs` | `false` | Omit full git diffs from the export |
8888
| `BUILDKITE_TEST_ENGINE_BACKFILL_DAYS` | `--days` | `90` | Number of days of commit history to export (1-90) |
89-
| `BUILDKITE_TEST_ENGINE_BACKFILL_REMOTE` | `--remote` | `origin` | Git remote name for fetching and branch detection |
89+
| `BUILDKITE_TEST_ENGINE_REMOTE` or `BUILDKITE_TEST_ENGINE_BACKFILL_REMOTE` | `--remote` | `origin` | Git remote name for fetching and branch detection |
9090
| `BUILDKITE_TEST_ENGINE_BACKFILL_CONCURRENCY` | `--concurrency` | `10` | Number of concurrent git operations for diff collection |
9191
| `BUILDKITE_TEST_ENGINE_DEBUG_ENABLED` | `--debug` | `false` | Enable debug output |
9292

internal/command/plan.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/buildkite/test-engine-client/internal/api"
1414
"github.com/buildkite/test-engine-client/internal/config"
1515
"github.com/buildkite/test-engine-client/internal/debug"
16+
"github.com/buildkite/test-engine-client/internal/git"
1617
"github.com/buildkite/test-engine-client/internal/plan"
1718
"github.com/buildkite/test-engine-client/internal/runner"
1819
"github.com/buildkite/test-engine-client/internal/version"
@@ -36,6 +37,11 @@ var (
3637
func Plan(ctx context.Context, cfg *config.Config, testFileList string, outputFormat PlanOutput, template string) error {
3738
fmt.Fprintln(os.Stderr, "+++ Buildkite Test Engine Client: bktec "+version.Version+"\n")
3839

40+
// Auto-collect git metadata when selection is active
41+
if cfg.SelectionStrategy != "" {
42+
autoCollectGitMetadata(ctx, cfg, &git.ExecGitRunner{})
43+
}
44+
3945
testRunner, err := runner.DetectRunner(cfg)
4046
if err != nil {
4147
return fmt.Errorf("unsupported value for BUILDKITE_TEST_ENGINE_TEST_RUNNER: %w", err)
@@ -147,6 +153,31 @@ func createTestPlan(ctx context.Context, cfg *config.Config, files []string, api
147153
return testPlan, nil
148154
}
149155

156+
// autoCollectGitMetadata collects git commit metadata and merges it into
157+
// cfg.Metadata. User-provided metadata values (from --metadata) take
158+
// precedence over auto-collected values.
159+
func autoCollectGitMetadata(ctx context.Context, cfg *config.Config, runner git.GitRunner) {
160+
// Check if we're in a git repo
161+
if _, err := runner.Output(ctx, "rev-parse", "--git-dir"); err != nil {
162+
fmt.Fprintln(os.Stderr, "Warning: not a git repository, skipping metadata auto-collection")
163+
return
164+
}
165+
166+
// Use user-provided base_branch from --metadata if present
167+
explicit := cfg.Metadata["base_branch"]
168+
remote := cfg.Remote
169+
baseBranch, err := git.ResolveBaseBranch(ctx, runner, explicit, remote)
170+
if err != nil {
171+
fmt.Fprintln(os.Stderr, "Warning: could not resolve base branch for diff metadata. "+
172+
"Set --metadata base_branch=<branch> if your repo uses a non-standard default branch.")
173+
} else {
174+
debug.Printf("auto-detected base branch: %s", baseBranch)
175+
}
176+
177+
autoMetadata := git.CollectPlanMetadata(ctx, runner, baseBranch)
178+
cfg.Metadata = git.MergeMetadata(cfg.Metadata, autoMetadata)
179+
}
180+
150181
func handleError(err error) error {
151182
if errors.Is(err, api.ErrRetryTimeout) {
152183
fmt.Fprintln(os.Stderr, "⚠️ Could not fetch or create plan from server, falling back to non-intelligent splitting. Your build may take longer than usual.")

internal/git/auto_metadata.go

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
package git
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"os"
7+
"strings"
8+
9+
"github.com/buildkite/test-engine-client/internal/debug"
10+
)
11+
12+
// ResolveBaseBranch determines the base branch ref to diff against.
13+
//
14+
// Resolution order:
15+
// 1. explicit (from --metadata base_branch=...) -- for repos with
16+
// non-standard default branches (not main/master), or PRs targeting
17+
// non-default branches outside Buildkite CI.
18+
// 2. $BUILDKITE_PULL_REQUEST_BASE_BRANCH -- auto-set by the Buildkite
19+
// agent on PR builds.
20+
// 3. DetectDefaultBranch() -- tries remote/HEAD, remote/main, remote/master.
21+
//
22+
// Most users should NOT need to set base_branch explicitly. Override is
23+
// only needed when:
24+
// - The repo uses a non-standard default branch (e.g. "develop", "trunk")
25+
// AND remote/HEAD is not configured
26+
// - The build targets a non-default branch (e.g. a PR into "release/v2")
27+
// AND $BUILDKITE_PULL_REQUEST_BASE_BRANCH is not set (non-Buildkite CI
28+
// or manual trigger)
29+
//
30+
// Each candidate is probed against the repository using
31+
// git rev-parse --verify. We try the candidate verbatim first, then fall
32+
// back to "<remote>/<candidate>". This handles every common shape without
33+
// heuristics: bare branch names ("main") resolve via the fallback to
34+
// "origin/main"; refs from a different remote ("upstream/main"), fully-
35+
// qualified refs ("refs/heads/release"), and values already including the
36+
// configured remote ("origin/main") all resolve on the first probe.
37+
// Without the verbatim probe, prefixing a qualified ref would rewrite it
38+
// into an invalid value like "origin/upstream/main" and silently drop the
39+
// explicit override.
40+
// Returns the resolved ref (e.g. "origin/main") or an error.
41+
func ResolveBaseBranch(ctx context.Context, runner GitRunner, explicit string, remote string) (string, error) {
42+
if remote == "" {
43+
return "", fmt.Errorf("remote must not be empty")
44+
}
45+
46+
type candidate struct {
47+
value string
48+
source string
49+
}
50+
candidates := []candidate{
51+
{value: explicit, source: "explicit --metadata base_branch"},
52+
{value: os.Getenv("BUILDKITE_PULL_REQUEST_BASE_BRANCH"), source: "BUILDKITE_PULL_REQUEST_BASE_BRANCH"},
53+
}
54+
55+
for _, c := range candidates {
56+
if c.value == "" {
57+
continue
58+
}
59+
60+
// Try the candidate verbatim first. This accepts qualified refs
61+
// from a non-default remote ("upstream/main"), fully-qualified
62+
// refs ("refs/heads/release"), and values already including the
63+
// configured remote ("origin/main") without rewriting them.
64+
if _, err := runner.Output(ctx, "rev-parse", "--verify", c.value); err == nil {
65+
debug.Printf("base branch resolved via %s: %s", c.source, c.value)
66+
return c.value, nil
67+
}
68+
69+
// Fall back to "<remote>/<candidate>" for bare branch names
70+
// ("main" -> "origin/main") and release-style names with a "/"
71+
// that are still relative to the configured remote ("release/v2"
72+
// -> "origin/release/v2").
73+
prefixed := remote + "/" + c.value
74+
if _, err := runner.Output(ctx, "rev-parse", "--verify", prefixed); err == nil {
75+
debug.Printf("base branch resolved via %s: %q -> %s", c.source, c.value, prefixed)
76+
return prefixed, nil
77+
}
78+
debug.Printf("base branch candidate %q from %s not found (also tried %q), trying next", c.value, c.source, prefixed)
79+
}
80+
ref, err := DetectDefaultBranch(ctx, runner, remote)
81+
if err == nil {
82+
debug.Printf("base branch resolved via DetectDefaultBranch: %s", ref)
83+
}
84+
return ref, err
85+
}
86+
87+
// CollectPlanMetadata collects git metadata for the current HEAD commit.
88+
// Returns a map of metadata keys to values. Skips keys that cannot be
89+
// collected (e.g. if not in a git repo). Does not error on git failures;
90+
// returns partial results with warnings logged via debug.Printf.
91+
func CollectPlanMetadata(ctx context.Context, runner GitRunner, baseBranch string) map[string]string {
92+
metadata := make(map[string]string)
93+
94+
// Phase 1: Commit metadata via git log -1 --format=...
95+
// Reuses MetadataFormat from metadata.go for consistency with backfill.
96+
collectCommitMetadata(ctx, runner, metadata)
97+
98+
// Phase 2: Diff fields against base branch (only if base branch is resolved)
99+
if baseBranch != "" {
100+
collectDiffMetadata(ctx, runner, baseBranch, metadata)
101+
}
102+
103+
// Phase 3: Context fields
104+
collectContextFields(ctx, runner, baseBranch, metadata)
105+
106+
return metadata
107+
}
108+
109+
// collectCommitMetadata extracts commit metadata for HEAD using a single
110+
// git log call with the same format as FetchBulkMetadata, parses it into
111+
// a CommitMetadata struct, and flattens it into the metadata map via ToMap.
112+
func collectCommitMetadata(ctx context.Context, runner GitRunner, metadata map[string]string) {
113+
output, err := runner.Output(ctx, "log", "-1", fmt.Sprintf("--format=%s", MetadataFormat))
114+
if err != nil {
115+
debug.Printf("Warning: git log failed, skipping commit metadata: %v", err)
116+
return
117+
}
118+
119+
// Real git log output ends in "\x1e\n" (git always appends a trailing
120+
// newline). TrimSpace must run first to strip the "\n", otherwise
121+
// TrimSuffix("\x1e") sees the "\n" at the end and no-ops, leaving the
122+
// record separator trapped inside the final field (the commit message).
123+
// TrimSpace does not strip "\x1e" because it is not Unicode whitespace.
124+
record := strings.TrimSuffix(strings.TrimSpace(output), recordSeparator)
125+
meta, ok := parseRecord(record)
126+
if !ok {
127+
debug.Printf("Warning: git log returned unparseable output; skipping commit metadata")
128+
return
129+
}
130+
131+
mergeNonEmpty(metadata, meta.ToMap())
132+
}
133+
134+
// collectDiffMetadata computes the merge-base between baseBranch and HEAD,
135+
// then runs diff commands using two-arg form (merge-base, HEAD). This is
136+
// equivalent to git diff baseBranch...HEAD but makes the fork-point
137+
// resolution explicit and uses the same two-arg diff form as the backfill
138+
// path.
139+
func collectDiffMetadata(ctx context.Context, runner GitRunner, baseBranch string, metadata map[string]string) {
140+
forkPoint, err := runner.Output(ctx, "merge-base", baseBranch, "HEAD")
141+
if err != nil {
142+
debug.Printf("Warning: git merge-base failed: %v", err)
143+
return
144+
}
145+
forkPoint = strings.TrimSpace(forkPoint)
146+
147+
diffs := runDiffCommands(ctx, runner, false, forkPoint, "HEAD")
148+
mergeNonEmpty(metadata, diffs.ToMap())
149+
}
150+
151+
// MergeMetadata merges auto-collected metadata into existing user-provided
152+
// metadata. User-provided keys take precedence: auto-collected values only
153+
// fill in keys that are not already present. Empty auto-collected values
154+
// are skipped. If existing is nil, the auto map is returned as-is.
155+
func MergeMetadata(existing, auto map[string]string) map[string]string {
156+
if existing == nil {
157+
return auto
158+
}
159+
for k, v := range auto {
160+
if v == "" {
161+
continue
162+
}
163+
if _, exists := existing[k]; !exists {
164+
existing[k] = v
165+
}
166+
}
167+
return existing
168+
}
169+
170+
// mergeNonEmpty copies entries from src into dst, skipping empty values.
171+
// This avoids sending meaningless keys (e.g. "git_diff":"") in the API
172+
// request, since json.Marshal does not omit empty strings within a map.
173+
func mergeNonEmpty(dst, src map[string]string) {
174+
for k, v := range src {
175+
if v != "" {
176+
dst[k] = v
177+
}
178+
}
179+
}
180+
181+
// collectContextFields adds branch, base_branch, and Buildkite env var fields.
182+
func collectContextFields(ctx context.Context, runner GitRunner, baseBranch string, metadata map[string]string) {
183+
// branch: current branch name, falling back to BUILDKITE_BRANCH
184+
// for detached HEAD (common in CI where the agent checks out a commit SHA)
185+
if out, err := runner.Output(ctx, "branch", "--show-current"); err == nil {
186+
if branch := strings.TrimSpace(out); branch != "" {
187+
metadata["branch"] = branch
188+
debug.Printf("branch resolved via git branch --show-current: %s", branch)
189+
}
190+
} else {
191+
debug.Printf("Warning: git branch --show-current failed: %v", err)
192+
}
193+
if _, ok := metadata["branch"]; !ok {
194+
if branch := os.Getenv("BUILDKITE_BRANCH"); branch != "" {
195+
metadata["branch"] = branch
196+
debug.Printf("branch resolved via BUILDKITE_BRANCH env var: %s", branch)
197+
} else {
198+
debug.Printf("branch could not be determined (detached HEAD, no BUILDKITE_BRANCH)")
199+
}
200+
}
201+
202+
// base_branch: the resolved base ref (not a git command)
203+
if baseBranch != "" {
204+
metadata["base_branch"] = baseBranch
205+
}
206+
207+
// pipeline_slug from Buildkite env (omitted if not set)
208+
if slug := os.Getenv("BUILDKITE_PIPELINE_SLUG"); slug != "" {
209+
metadata["pipeline_slug"] = slug
210+
}
211+
212+
// build_uuid from Buildkite env (omitted if not set)
213+
if buildID := os.Getenv("BUILDKITE_BUILD_ID"); buildID != "" {
214+
metadata["build_uuid"] = buildID
215+
}
216+
}

0 commit comments

Comments
 (0)