Skip to content

Commit 9342181

Browse files
check status of stuck jobs before starting new runner
1 parent 0c34ebf commit 9342181

3 files changed

Lines changed: 83 additions & 3 deletions

File tree

Database/DbContext.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,8 @@ public enum JobState
130130
Unknown = 0,
131131
Queued = 1,
132132
InProgress = 2,
133-
Completed = 3
133+
Completed = 3,
134+
Vanished = 4
134135
}
135136
public class RunnerLifecycle
136137
{

GitHubApi.cs

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,4 +156,66 @@ public static async Task<bool> RemoveRunnerFromRepo(string repoName, string orgG
156156
return response.IsSuccessStatusCode;
157157

158158
}
159-
}
159+
160+
public static async Task<GitHubJob> GetJobInfo(long stuckJobGithubJobId,string repoName, string orgGitHubToken)
161+
{
162+
using HttpClient client = new();
163+
client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/vnd.github+json"));
164+
client.DefaultRequestHeaders.Add("X-GitHub-Api-Version", "2022-11-28");
165+
client.DefaultRequestHeaders.Authorization = AuthenticationHeaderValue.Parse($"Bearer {orgGitHubToken}");
166+
client.DefaultRequestHeaders.UserAgent.Add(new ProductInfoHeaderValue("hetzner-autoscale", "1"));
167+
168+
HttpResponseMessage response = await client.GetAsync(
169+
$"https://api.github.com/orgs/{repoName}/actions/jobs/{stuckJobGithubJobId}");
170+
if(response.IsSuccessStatusCode)
171+
{
172+
string content = await response.Content.ReadAsStringAsync();
173+
GitHubJob responseObject = JsonSerializer.Deserialize<GitHubJob>(content);
174+
175+
return responseObject;
176+
}
177+
Log.Warning($"Unable to get GH job info for {repoName}/{stuckJobGithubJobId}: [{response.StatusCode}] {response.ReasonPhrase}");
178+
179+
return null;
180+
}
181+
}
182+
183+
public class GitHubJob
184+
{
185+
186+
public int Id { get; set; }
187+
public int RunId { get; set; }
188+
public string RunUrl { get; set; }
189+
public int RunAttempt { get; set; }
190+
public string NodeId { get; set; }
191+
public string HeadSha { get; set; }
192+
public string Url { get; set; }
193+
public string HtmlUrl { get; set; }
194+
public string Status { get; set; }
195+
public string Conclusion { get; set; }
196+
public DateTime CreatedAt { get; set; }
197+
public DateTime StartedAt { get; set; }
198+
public DateTime? CompletedAt { get; set; } // Nullable
199+
public string Name { get; set; }
200+
public List<GitHubJobStep> Steps { get; set; }
201+
public string CheckRunUrl { get; set; }
202+
public List<string> Labels { get; set; }
203+
public int? RunnerId { get; set; } // Nullable
204+
public string RunnerName { get; set; } // Nullable
205+
public int? RunnerGroupId { get; set; } // Nullable
206+
public string RunnerGroupName { get; set; } // Nullable
207+
public string WorkflowName { get; set; } // Nullable
208+
public string HeadBranch { get; set; } // Nullable
209+
210+
}
211+
212+
public class GitHubJobStep
213+
{
214+
public string Status { get; set; }
215+
public string Conclusion { get; set; }
216+
public string Name { get; set; }
217+
public int Number { get; set; }
218+
public DateTime? StartedAt { get; set; } // Nullable
219+
public DateTime? CompletedAt { get; set; } // Nullable
220+
}
221+

PoolManager.cs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,24 @@ private async Task CheckForStuckJobs(List<GithubTargetConfiguration> targetConfi
277277
int replacementsInQueue = _queues.CreateTasks.Count(x => x.IsStuckReplacement);
278278
if (replacementsInQueue > 25)
279279
{
280-
_logger.LogWarning($"Creating queue already has {replacementsInQueue} stuck jobs replacements. No adding more strain");
280+
_logger.LogWarning($"Creating queue already has {replacementsInQueue} stuck jobs replacements. Not adding more strain.");
281+
continue;
282+
}
283+
284+
// check job on github
285+
GitHubJob ghJob = await GitHubApi.GetJobInfo(stuckJob.GithubJobId, owner.Name, owner.GitHubToken);
286+
if (ghJob == null || ghJob.Status != "queued")
287+
{
288+
_logger.LogWarning($"job info for {stuckJob.JobId} not found or job not queued anymore.");
289+
290+
if (stuckJob.QueueTime + TimeSpan.FromHours(2) > DateTime.UtcNow)
291+
{
292+
_logger.LogWarning($"Marking stuck job {stuckJob.GithubJobId} vanished as it's no longer in the GitHub queued state for more than 2h.");
293+
stuckJob.State = JobState.Vanished;
294+
stuckJob.CompleteTime = DateTime.UtcNow;
295+
await db.SaveChangesAsync();
296+
}
297+
281298
continue;
282299
}
283300

0 commit comments

Comments
 (0)