Skip to content

Commit c6680a8

Browse files
sai-miduthuriclaudegemini-code-assist[bot]
authored
[change_lister] Validate inputs and handle rev-parse error (#478)
### Issue `change_lister` doesn't verify `remote` and `branch` inputs ### Fix - Add new constructor helper function to handle input validation - Handle the error from `git rev-parse --is-shallow-repository` instead of silently discarding it, so environment misconfigurations surface clearly. --------- Signed-off-by: Sai Miduthuri <sai.miduthuri@anyscale.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 2d4c509 commit c6680a8

6 files changed

Lines changed: 222 additions & 53 deletions

File tree

raycicmd/change_lister.go

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,52 +3,74 @@ package raycicmd
33
import (
44
"fmt"
55
"os/exec"
6+
"regexp"
67
"strings"
78
)
89

10+
var (
11+
gitRefRe = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._/-]*$`)
12+
commitHashRe = regexp.MustCompile(`^[0-9a-fA-F]{4,40}$`)
13+
)
14+
915
// ChangeLister lists changed files between two directory states.
1016
type ChangeLister interface {
1117
ListChangedFiles() ([]string, error)
1218
}
1319

14-
// GitChangeLister lists files changed by finding the merge-base (common ancestor)
20+
// gitChangeLister lists files changed by finding the merge-base (common ancestor)
1521
// between the base branch and the commit, then diffing against that. This
1622
// correctly shows only changes in a style similar to GitHub PR diffs, excluding
1723
// changes that happened on the base branch after the PR branch was created.
18-
type GitChangeLister struct {
24+
type gitChangeLister struct {
1925
// WorkDir is the directory to run git commands in. If empty, uses current
2026
// directory.
21-
WorkDir string
27+
workDir string
2228

2329
// Remote is the name of the git remote. If empty, defaults to "origin".
24-
Remote string
30+
remote string
2531

2632
// BaseBranch is the base branch to diff against (e.g., "main" or "master").
27-
BaseBranch string
33+
baseBranch string
2834

2935
// Commit is the commit to diff from the base branch.
30-
Commit string
36+
commit string
3137
}
3238

33-
func (g *GitChangeLister) remote() string {
34-
if g.Remote != "" {
35-
return g.Remote
39+
func newGitChangeLister(
40+
workDir, remote, baseBranch, commit string,
41+
) (*gitChangeLister, error) {
42+
if remote == "" {
43+
remote = "origin"
44+
}
45+
if !gitRefRe.MatchString(remote) {
46+
return nil, fmt.Errorf("invalid remote name: %q", remote)
47+
}
48+
if !gitRefRe.MatchString(baseBranch) {
49+
return nil, fmt.Errorf("invalid base branch name: %q", baseBranch)
3650
}
37-
return "origin"
51+
if !commitHashRe.MatchString(commit) {
52+
return nil, fmt.Errorf("invalid commit hash: %q", commit)
53+
}
54+
return &gitChangeLister{
55+
workDir: workDir,
56+
remote: remote,
57+
baseBranch: baseBranch,
58+
commit: commit,
59+
}, nil
3860
}
3961

40-
func (g *GitChangeLister) run(args ...string) ([]byte, error) {
62+
func (g *gitChangeLister) run(args ...string) ([]byte, error) {
4163
cmd := exec.Command("git", args...)
42-
if g.WorkDir != "" {
43-
cmd.Dir = g.WorkDir
64+
if g.workDir != "" {
65+
cmd.Dir = g.workDir
4466
}
4567
return cmd.Output()
4668
}
4769

48-
func (g *GitChangeLister) runNoOutput(args ...string) error {
70+
func (g *gitChangeLister) runNoOutput(args ...string) error {
4971
cmd := exec.Command("git", args...)
50-
if g.WorkDir != "" {
51-
cmd.Dir = g.WorkDir
72+
if g.workDir != "" {
73+
cmd.Dir = g.workDir
5274
}
5375
return cmd.Run()
5476
}
@@ -58,14 +80,17 @@ func (g *GitChangeLister) runNoOutput(args ...string) error {
5880
// It uses the merge-base of (remote/BaseBranch, Commit) so it only includes
5981
// changes introduced on the feature branch, matching GitHub PR diffs and
6082
// excluding new commits on the base branch.
61-
func (g *GitChangeLister) ListChangedFiles() ([]string, error) {
62-
remote := g.remote()
83+
func (g *gitChangeLister) ListChangedFiles() ([]string, error) {
84+
remote := g.remote
6385

6486
// Fetch the base branch with a refspec so the remote-tracking ref exists
6587
// for merge-base. Plain `git fetch origin <branch>` only updates FETCH_HEAD.
66-
remoteBranchRef := fmt.Sprintf("refs/remotes/%s/%s", remote, g.BaseBranch)
67-
refspec := fmt.Sprintf("+%s:%s", g.BaseBranch, remoteBranchRef)
68-
isShallowOut, _ := g.run("rev-parse", "--is-shallow-repository")
88+
remoteBranchRef := fmt.Sprintf("refs/remotes/%s/%s", remote, g.baseBranch)
89+
refspec := fmt.Sprintf("+%s:%s", g.baseBranch, remoteBranchRef)
90+
isShallowOut, err := g.run("rev-parse", "--is-shallow-repository")
91+
if err != nil {
92+
return nil, fmt.Errorf("git rev-parse --is-shallow-repository: %w", err)
93+
}
6994
if strings.TrimSpace(string(isShallowOut)) == "true" {
7095
if err := g.runNoOutput("fetch", "--unshallow", "-q", remote, refspec); err != nil {
7196
return nil, fmt.Errorf("git fetch --unshallow %s %s: %w", remote, refspec, err)
@@ -89,15 +114,15 @@ func (g *GitChangeLister) ListChangedFiles() ([]string, error) {
89114
// merge-base(remote/base, B) = A
90115
// diff(A, B) = [feature.go] <-- correct
91116
// diff(C, B) = [feature.go, other.go] <-- wrong
92-
remoteBranch := remote + "/" + g.BaseBranch
93-
mergeBaseOut, err := g.run("merge-base", remoteBranch, g.Commit)
117+
remoteBranch := remote + "/" + g.baseBranch
118+
mergeBaseOut, err := g.run("merge-base", remoteBranch, g.commit)
94119
if err != nil {
95-
return nil, fmt.Errorf("git merge-base %s %s: %w", remoteBranch, g.Commit, err)
120+
return nil, fmt.Errorf("git merge-base %s %s: %w", remoteBranch, g.commit, err)
96121
}
97122
mergeBase := strings.TrimSpace(string(mergeBaseOut))
98123

99124
// Diff only the PR changes: merge-base..Commit.
100-
diffRange := mergeBase + ".." + g.Commit
125+
diffRange := mergeBase + ".." + g.commit
101126
diffOut, err := g.run("diff", "--name-only", diffRange, "--")
102127
if err != nil {
103128
return nil, fmt.Errorf("git diff %s: %w", diffRange, err)

raycicmd/change_lister_test.go

Lines changed: 128 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,17 @@ func (h *gitTestHelper) initialCommit() {
9898
h.git("commit", "-m", "initial commit")
9999
}
100100

101+
func mustNewGitChangeLister(
102+
t *testing.T, workDir, remote, baseBranch, commit string,
103+
) *gitChangeLister {
104+
t.Helper()
105+
lister, err := newGitChangeLister(workDir, remote, baseBranch, commit)
106+
if err != nil {
107+
t.Fatalf("newGitChangeLister: %v", err)
108+
}
109+
return lister
110+
}
111+
101112
// TestListChangedFiles verifies basic changed file detection.
102113
//
103114
// Git history (time flows left to right):
@@ -121,7 +132,7 @@ func TestListChangedFiles(t *testing.T) {
121132
wantFiles := []string{"src/main.go", "src/util.go", "docs/readme.txt"}
122133
commit := h.commitFiles("add files", wantFiles...)
123134

124-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: commit}
135+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", commit)
125136
files, err := lister.ListChangedFiles()
126137
if err != nil {
127138
t.Fatalf("ListChangedFiles: %v", err)
@@ -175,7 +186,7 @@ func TestListChangedFiles_MergeBase(t *testing.T) {
175186

176187
// The diff should only show feature.go, not other.go,
177188
// because we diff against the merge-base (common ancestor).
178-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: featureCommit}
189+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", featureCommit)
179190
files, err := lister.ListChangedFiles()
180191
if err != nil {
181192
t.Fatalf("ListChangedFiles: %v", err)
@@ -203,7 +214,7 @@ func TestListChangedFiles_CustomRemote(t *testing.T) {
203214
commit := h.commitFiles("add feature", "feature.go")
204215

205216
// Use a lister with custom remote.
206-
lister := &GitChangeLister{WorkDir: h.WorkDir, Remote: "upstream", BaseBranch: "master", Commit: commit}
217+
lister := mustNewGitChangeLister(t, h.WorkDir, "upstream", "master", commit)
207218
files, err := lister.ListChangedFiles()
208219
if err != nil {
209220
t.Fatalf("ListChangedFiles: %v", err)
@@ -234,7 +245,7 @@ func TestListChangedFiles_MultipleCommits(t *testing.T) {
234245
h.commitFiles("first commit", "first.go")
235246
commit := h.commitFiles("second commit", "second.go")
236247

237-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: commit}
248+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", commit)
238249
files, err := lister.ListChangedFiles()
239250
if err != nil {
240251
t.Fatalf("ListChangedFiles: %v", err)
@@ -275,7 +286,7 @@ func TestListChangedFiles_ModifyExistingFile(t *testing.T) {
275286
h.git("commit", "-m", "modify readme")
276287
commit := h.head()
277288

278-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: commit}
289+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", commit)
279290
files, err := lister.ListChangedFiles()
280291
if err != nil {
281292
t.Fatalf("ListChangedFiles: %v", err)
@@ -312,7 +323,7 @@ func TestListChangedFiles_DeleteFile(t *testing.T) {
312323
h.git("commit", "-m", "delete file")
313324
commit := h.head()
314325

315-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: commit}
326+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", commit)
316327
files, err := lister.ListChangedFiles()
317328
if err != nil {
318329
t.Fatalf("ListChangedFiles: %v", err)
@@ -347,7 +358,7 @@ func TestListChangedFiles_RenameFile(t *testing.T) {
347358
h.git("commit", "-m", "rename file")
348359
commit := h.head()
349360

350-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: commit}
361+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", commit)
351362
files, err := lister.ListChangedFiles()
352363
if err != nil {
353364
t.Fatalf("ListChangedFiles: %v", err)
@@ -382,7 +393,7 @@ func TestListChangedFiles_NoChanges(t *testing.T) {
382393
// No changes, just branch off
383394
commit := h.head()
384395

385-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: commit}
396+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", commit)
386397
files, err := lister.ListChangedFiles()
387398
if err != nil {
388399
t.Fatalf("ListChangedFiles: %v", err)
@@ -425,7 +436,7 @@ func TestListChangedFiles_SameFileModifiedOnBothBranches(t *testing.T) {
425436
h.git("commit", "-m", "modify shared.go on master")
426437
h.git("push", "origin", "master")
427438

428-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: featureCommit}
439+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", featureCommit)
429440
files, err := lister.ListChangedFiles()
430441
if err != nil {
431442
t.Fatalf("ListChangedFiles: %v", err)
@@ -467,7 +478,7 @@ func TestListChangedFiles_ShallowClone(t *testing.T) {
467478
runGitCommand(t, shallowDir, "fetch", "--depth=1", "origin", "feature-branch")
468479
runGitCommand(t, shallowDir, "checkout", "-f", featureCommit)
469480

470-
lister := &GitChangeLister{WorkDir: shallowDir, BaseBranch: "master", Commit: featureCommit}
481+
lister := mustNewGitChangeLister(t, shallowDir, "", "master", featureCommit)
471482
files, err := lister.ListChangedFiles()
472483
if err != nil {
473484
t.Fatalf("ListChangedFiles() = %v, want nil", err)
@@ -520,7 +531,7 @@ func TestListChangedFiles_ShallowCloneNonDefaultBase(t *testing.T) {
520531
runGitCommand(t, shallowDir, "fetch", "--depth=1", "origin", "stacked-branch")
521532
runGitCommand(t, shallowDir, "checkout", "-f", stackedCommit)
522533

523-
lister := &GitChangeLister{WorkDir: shallowDir, BaseBranch: "base-branch", Commit: stackedCommit}
534+
lister := mustNewGitChangeLister(t, shallowDir, "", "base-branch", stackedCommit)
524535
files, err := lister.ListChangedFiles()
525536
if err != nil {
526537
t.Fatalf("ListChangedFiles() got %v, want nil", err)
@@ -550,7 +561,7 @@ func TestListChangedFiles_DeepDirectoryStructure(t *testing.T) {
550561
wantFiles := []string{"a/b/c/deep.go", "x/y/z/another.go"}
551562
commit := h.commitFiles("add nested files", wantFiles...)
552563

553-
lister := &GitChangeLister{WorkDir: h.WorkDir, BaseBranch: "master", Commit: commit}
564+
lister := mustNewGitChangeLister(t, h.WorkDir, "", "master", commit)
554565
files, err := lister.ListChangedFiles()
555566
if err != nil {
556567
t.Fatalf("ListChangedFiles: %v", err)
@@ -565,3 +576,108 @@ func TestListChangedFiles_DeepDirectoryStructure(t *testing.T) {
565576
}
566577
}
567578
}
579+
580+
func TestNewGitChangeLister(t *testing.T) {
581+
tests := []struct {
582+
name string
583+
remote string
584+
baseBranch string
585+
commit string
586+
wantErr bool
587+
wantRemote string
588+
}{
589+
{
590+
name: "valid inputs",
591+
remote: "origin",
592+
baseBranch: "main",
593+
commit: "abc123def456",
594+
wantRemote: "origin",
595+
},
596+
{
597+
name: "empty remote defaults to origin",
598+
remote: "",
599+
baseBranch: "main",
600+
commit: "abc123def456",
601+
wantRemote: "origin",
602+
},
603+
{
604+
name: "remote with dash prefix",
605+
remote: "--upload-pack=evil",
606+
baseBranch: "main",
607+
commit: "abc123def456",
608+
wantErr: true,
609+
},
610+
{
611+
name: "base branch with dash prefix",
612+
remote: "origin",
613+
baseBranch: "--upload-pack=evil",
614+
commit: "abc123def456",
615+
wantErr: true,
616+
},
617+
{
618+
name: "commit with non-hex characters",
619+
remote: "origin",
620+
baseBranch: "main",
621+
commit: "not-a-hex-hash",
622+
wantErr: true,
623+
},
624+
{
625+
name: "commit too short",
626+
remote: "origin",
627+
baseBranch: "main",
628+
commit: "abc",
629+
wantErr: true,
630+
},
631+
{
632+
name: "full 40-char commit hash",
633+
remote: "origin",
634+
baseBranch: "main",
635+
commit: "abc123def456abc123def456abc123def456abcd",
636+
wantRemote: "origin",
637+
},
638+
{
639+
name: "branch with slashes",
640+
remote: "origin",
641+
baseBranch: "feature/my-branch",
642+
commit: "abc123def456",
643+
wantRemote: "origin",
644+
},
645+
{
646+
name: "empty base branch",
647+
remote: "origin",
648+
baseBranch: "",
649+
commit: "abc123def456",
650+
wantErr: true,
651+
},
652+
{
653+
name: "empty commit",
654+
remote: "origin",
655+
baseBranch: "main",
656+
commit: "",
657+
wantErr: true,
658+
},
659+
}
660+
661+
for _, tc := range tests {
662+
t.Run(tc.name, func(t *testing.T) {
663+
lister, err := newGitChangeLister(
664+
"/tmp", tc.remote, tc.baseBranch, tc.commit,
665+
)
666+
if tc.wantErr {
667+
if err == nil {
668+
t.Errorf("newGitChangeLister() = nil, want error")
669+
}
670+
return
671+
}
672+
if err != nil {
673+
t.Fatalf("newGitChangeLister() = %v, want nil", err)
674+
}
675+
if lister.remote != tc.wantRemote {
676+
t.Errorf(
677+
"remote = %q, want %q",
678+
lister.remote, tc.wantRemote,
679+
)
680+
}
681+
})
682+
}
683+
}

raycicmd/main.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,19 @@ func Main(args []string, envs Envs) error {
197197
return fmt.Errorf("make build info: %w", err)
198198
}
199199

200-
lister := &GitChangeLister{
201-
WorkDir: flags.RepoDir,
202-
BaseBranch: getEnv(envs, "BUILDKITE_PULL_REQUEST_BASE_BRANCH"),
203-
Commit: getEnv(envs, "BUILDKITE_COMMIT"),
200+
// baseBranch and commit are empty for non-PR Buildkite builds (e.g.,
201+
// branch pushes, scheduled builds). The lister is only used for PR
202+
// builds, where RunTagAnalysis diffs changed files against the base.
203+
var lister ChangeLister
204+
baseBranch := getEnv(envs, "BUILDKITE_PULL_REQUEST_BASE_BRANCH")
205+
commit := getEnv(envs, "BUILDKITE_COMMIT")
206+
if baseBranch != "" && commit != "" {
207+
lister, err = newGitChangeLister(
208+
flags.RepoDir, "origin", baseBranch, commit,
209+
)
210+
if err != nil {
211+
return fmt.Errorf("create change lister: %w", err)
212+
}
204213
}
205214
pipeline, err := makePipeline(&pipelineContext{
206215
repoDir: flags.RepoDir,

0 commit comments

Comments
 (0)