Skip to content

Commit 0fffa89

Browse files
authored
Fix StateChangedAt mismatch during container recovery from storage (#14482)
* Use Docker's FinishedAt timestamp in Transition() to fix StateChangedAt mismatch during container recovery * Extract GetDockerFinishedAt() helper * Address Feedback * Fail if Docker event time is missing * Address copilot feedback * Use Docker stop event timestamp instead of InspectContainer() * Apply copilot feedback * PR Feedback
1 parent cc5f358 commit 0fffa89

6 files changed

Lines changed: 77 additions & 42 deletions

File tree

src/windows/wslcsession/ContainerEventTracker.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,18 @@ void ContainerEventTracker::OnEvent(const std::string_view& event)
145145
}
146146
}
147147

148+
auto timeEntry = parsed.find("time");
149+
THROW_HR_IF_MSG(
150+
E_INVALIDARG, timeEntry == parsed.end(), "Failed to parse time from event: %.*hs", static_cast<int>(event.size()), event.data());
151+
std::uint64_t eventTime = timeEntry->get<std::uint64_t>();
152+
148153
std::lock_guard lock{m_lock};
149154

150155
for (const auto& e : m_callbacks)
151156
{
152157
if (e.ContainerId == containerId && (!e.ExecId.has_value() || e.ExecId == execId))
153158
{
154-
e.Callback(it->second, exitCode);
159+
e.Callback(it->second, exitCode, eventTime);
155160
}
156161
}
157162
}

src/windows/wslcsession/ContainerEventTracker.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class ContainerEventTracker
5454
ContainerEventTracker* m_tracker = nullptr;
5555
};
5656

57-
using ContainerStateChangeCallback = std::function<void(ContainerEvent, std::optional<int>)>;
57+
using ContainerStateChangeCallback = std::function<void(ContainerEvent, std::optional<int>, std::uint64_t)>;
5858

5959
ContainerEventTracker(DockerHTTPClient& dockerClient, ULONG sessionId, IORelay& relay);
6060
~ContainerEventTracker();

src/windows/wslcsession/WSLCContainer.cpp

Lines changed: 58 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ WSLCContainerImpl::WSLCContainerImpl(
355355
m_eventTracker(EventTracker),
356356
m_ioRelay(Relay),
357357
m_containerEvents(EventTracker.RegisterContainerStateUpdates(
358-
m_id, std::bind(&WSLCContainerImpl::OnEvent, this, std::placeholders::_1, std::placeholders::_2))),
358+
m_id, std::bind(&WSLCContainerImpl::OnEvent, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3))),
359359
m_state(InitialState),
360360
m_initProcessFlags(InitProcessFlags),
361361
m_containerFlags(ContainerFlags)
@@ -547,8 +547,6 @@ void WSLCContainerImpl::Start(WSLCContainerStartFlags Flags, LPCSTR DetachKeys)
547547
m_initProcessControl = nullptr;
548548
});
549549

550-
m_stoppedNotifiedEvent.ResetEvent();
551-
552550
auto volumeCleanup = MountVolumes(m_mountedVolumes, m_virtualMachine);
553551

554552
auto portCleanup = wil::scope_exit_log(WI_DIAGNOSTICS_INFO, [this]() { UnmapPorts(); });
@@ -567,34 +565,34 @@ void WSLCContainerImpl::Start(WSLCContainerStartFlags Flags, LPCSTR DetachKeys)
567565
cleanup.release();
568566
}
569567

570-
void WSLCContainerImpl::OnEvent(ContainerEvent event, std::optional<int> exitCode)
568+
void WSLCContainerImpl::OnEvent(ContainerEvent event, std::optional<int> exitCode, std::uint64_t eventTime)
571569
{
572570
if (event == ContainerEvent::Stop)
573571
{
574-
m_stoppedNotifiedEvent.SetEvent();
575-
576572
THROW_HR_IF(E_UNEXPECTED, !exitCode.has_value());
577-
auto lock = m_lock.lock_exclusive();
578-
auto previousState = m_state;
579573

574+
// If a Stop() call is in progress, provide the timestamp via the promise
575+
// and let Stop() handle the state transition.
580576
{
581-
std::lock_guard processesLock{m_processesLock};
582-
583-
// Notify all processes that the container has exited.
584-
// N.B. The exec callback isn't always sent to execed processes, so do this to avoid 'stuck' processes.
585-
for (auto& process : m_processes)
577+
std::lock_guard stopLock{m_stopStateLock};
578+
if (m_stopState.has_value())
586579
{
587-
process->OnContainerReleased();
580+
m_stopState->set_value(eventTime);
581+
m_stopState.reset();
582+
return;
588583
}
589-
590-
m_processes.clear();
591584
}
592585

586+
auto lock = m_lock.lock_exclusive();
587+
auto previousState = m_state;
588+
589+
ReleaseProcesses();
590+
593591
// Don't run the deletion logic if the container is already in a stopped / deleted state.
594592
// This can happen if Delete() is called by the user.
595593
if (previousState == WslcContainerStateRunning)
596594
{
597-
Transition(WslcContainerStateExited);
595+
Transition(WslcContainerStateExited, eventTime);
598596

599597
ReleaseRuntimeResources();
600598

@@ -639,6 +637,22 @@ void WSLCContainerImpl::Stop(WSLCSignal Signal, LONG TimeoutSeconds)
639637
m_state);
640638
}
641639

640+
// Set up a waitable stop state so OnEvent() can pass the Docker timestamp
641+
// back to Stop() without needing to take m_lock.
642+
std::future<std::uint64_t> stopFuture;
643+
{
644+
std::lock_guard stopLock{m_stopStateLock};
645+
m_stopState.emplace();
646+
stopFuture = m_stopState->get_future();
647+
}
648+
649+
// Ensure m_stopState is cleared on all exit paths so OnEvent() doesn't
650+
// take the promise path after a failed Stop().
651+
auto resetStopState = wil::scope_exit([this]() {
652+
std::lock_guard stopLock{m_stopStateLock};
653+
m_stopState.reset();
654+
});
655+
642656
try
643657
{
644658
std::optional<WSLCSignal> SignalArg;
@@ -664,25 +678,24 @@ void WSLCContainerImpl::Stop(WSLCSignal Signal, LONG TimeoutSeconds)
664678
}
665679
}
666680

667-
Transition(WslcContainerStateExited);
681+
// Wait for the stop event to get the Docker timestamp.
682+
// Safe while holding m_lock since OnEvent() uses m_stopStateLock on this path.
683+
std::optional<std::uint64_t> stopTimestamp;
684+
if (stopFuture.wait_for(60s) == std::future_status::ready)
685+
{
686+
stopTimestamp = stopFuture.get();
687+
}
688+
689+
Transition(WslcContainerStateExited, stopTimestamp);
690+
691+
ReleaseProcesses();
668692

669693
ReleaseRuntimeResources();
670694

671695
if (WI_IsFlagSet(m_containerFlags, WSLCContainerFlagsRm))
672696
{
673697
DeleteExclusiveLockHeld(WSLCDeleteFlagsForce);
674698
}
675-
else
676-
{
677-
// Wait for the stop notification to arrive before returning.
678-
// This is required so that a caller that Stops() and immediately calls Start() again doesn't see the container
679-
// switch back to 'stopped' state due to the delayed event notification.
680-
681-
auto io = m_wslcSession.CreateIOContext();
682-
io.AddHandle(std::make_unique<EventHandle>(m_stoppedNotifiedEvent.get()), MultiHandleWait::CancelOnCompleted);
683-
684-
io.Run({60s});
685-
}
686699
}
687700

688701
void WSLCContainerImpl::Delete(WSLCDeleteFlags Flags)
@@ -1427,6 +1440,20 @@ void WSLCContainerImpl::UnmapPorts()
14271440
}
14281441
}
14291442

1443+
__requires_exclusive_lock_held(m_lock) void WSLCContainerImpl::ReleaseProcesses()
1444+
{
1445+
std::lock_guard processesLock{m_processesLock};
1446+
1447+
// Notify all processes that the container has exited.
1448+
// The exec callback isn't always sent to execed processes, so do this to avoid 'stuck' processes.
1449+
for (auto& process : m_processes)
1450+
{
1451+
process->OnContainerReleased();
1452+
}
1453+
1454+
m_processes.clear();
1455+
}
1456+
14301457
__requires_exclusive_lock_held(m_lock) void WSLCContainerImpl::ReleaseRuntimeResources()
14311458
{
14321459
WSL_LOG("ReleaseRuntimeResources", TraceLoggingValue(m_id.c_str(), "ID"));
@@ -1468,7 +1495,7 @@ __requires_exclusive_lock_held(m_lock) void WSLCContainerImpl::DisconnectComWrap
14681495
}
14691496
}
14701497

1471-
__requires_lock_held(m_lock) void WSLCContainerImpl::Transition(WSLCContainerState State) noexcept
1498+
__requires_lock_held(m_lock) void WSLCContainerImpl::Transition(WSLCContainerState State, std::optional<std::uint64_t> stateChangedAt) noexcept
14721499
{
14731500
// N.B. A deleted container cannot transition back to any other state.
14741501
WI_ASSERT(m_state != WslcContainerStateDeleted);
@@ -1480,7 +1507,7 @@ __requires_lock_held(m_lock) void WSLCContainerImpl::Transition(WSLCContainerSta
14801507
TraceLoggingValue(m_id.c_str(), "ID"));
14811508

14821509
m_state = State;
1483-
m_stateChangedAt = static_cast<std::uint64_t>(std::time(nullptr));
1510+
m_stateChangedAt = stateChangedAt.value_or(static_cast<std::uint64_t>(std::time(nullptr)));
14841511
}
14851512

14861513
WSLCContainer::WSLCContainer(WSLCContainerImpl* impl, std::function<void(const WSLCContainerImpl*)>&& OnDeleted) :

src/windows/wslcsession/WSLCContainer.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class WSLCContainerImpl
9393
const std::string& Name() const noexcept;
9494
WSLCContainerState State() const noexcept;
9595

96-
__requires_lock_held(m_lock) void Transition(WSLCContainerState State) noexcept;
96+
__requires_lock_held(m_lock) void Transition(WSLCContainerState State, std::optional<std::uint64_t> stateChangedAt = std::nullopt) noexcept;
9797

9898
void OnProcessReleased(DockerExecProcessControl* process) noexcept;
9999

@@ -130,10 +130,11 @@ class WSLCContainerImpl
130130
__requires_exclusive_lock_held(m_lock) void DeleteExclusiveLockHeld(WSLCDeleteFlags Flags);
131131

132132
void AllocateBridgedModePorts();
133-
void OnEvent(ContainerEvent event, std::optional<int> exitCode);
133+
void OnEvent(ContainerEvent event, std::optional<int> exitCode, std::uint64_t eventTime);
134134
void WaitForContainerEvent();
135135
__requires_exclusive_lock_held(m_lock) void ReleaseResources();
136136
__requires_exclusive_lock_held(m_lock) void ReleaseRuntimeResources();
137+
__requires_exclusive_lock_held(m_lock) void ReleaseProcesses();
137138
__requires_exclusive_lock_held(m_lock) void DisconnectComWrapper();
138139

139140
std::unique_ptr<RelayedProcessIO> CreateRelayedProcessIO(wil::unique_handle&& stream, WSLCProcessFlags flags);
@@ -154,7 +155,8 @@ class WSLCContainerImpl
154155
__guarded_by(m_processesLock) Microsoft::WRL::ComPtr<IWSLCProcess> m_initProcess;
155156
__guarded_by(m_processesLock) DockerContainerProcessControl* m_initProcessControl = nullptr;
156157

157-
wil::unique_event m_stoppedNotifiedEvent{wil::EventOptions::ManualReset};
158+
std::mutex m_stopStateLock;
159+
std::optional<std::promise<std::uint64_t>> m_stopState;
158160
DockerHTTPClient& m_dockerClient;
159161
std::uint64_t m_stateChangedAt{static_cast<std::uint64_t>(std::time(nullptr))};
160162
std::uint64_t m_createdAt{static_cast<std::uint64_t>(std::time(nullptr))};

src/windows/wslcsession/WSLCProcessControl.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ DockerContainerProcessControl::DockerContainerProcessControl(WSLCContainerImpl&
4545
m_container(&Container),
4646
m_client(DockerClient),
4747
m_trackingReference(EventTracker.RegisterContainerStateUpdates(
48-
Container.ID(), std::bind(&DockerContainerProcessControl::OnEvent, this, std::placeholders::_1, std::placeholders::_2)))
48+
Container.ID(),
49+
std::bind(&DockerContainerProcessControl::OnEvent, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3)))
4950
{
5051
}
5152

@@ -69,7 +70,7 @@ void DockerContainerProcessControl::ResizeTty(ULONG Rows, ULONG Columns)
6970
m_client.ResizeContainerTty(m_container->ID(), Rows, Columns);
7071
}
7172

72-
void DockerContainerProcessControl::OnEvent(ContainerEvent Event, std::optional<int> ExitCode)
73+
void DockerContainerProcessControl::OnEvent(ContainerEvent Event, std::optional<int> ExitCode, std::uint64_t /*eventTime*/)
7374
{
7475
if (Event == ContainerEvent::Stop)
7576
{
@@ -118,7 +119,7 @@ DockerExecProcessControl::DockerExecProcessControl(
118119
m_id(Id),
119120
m_client(DockerClient),
120121
m_trackingReference(EventTracker.RegisterExecStateUpdates(
121-
Container.ID(), Id, std::bind(&DockerExecProcessControl::OnEvent, this, std::placeholders::_1, std::placeholders::_2)))
122+
Container.ID(), Id, std::bind(&DockerExecProcessControl::OnEvent, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3)))
122123
{
123124
}
124125

@@ -173,7 +174,7 @@ void DockerExecProcessControl::SetExitCode(int ExitCode)
173174
}
174175
}
175176

176-
void DockerExecProcessControl::OnEvent(ContainerEvent Event, std::optional<int> ExitCode)
177+
void DockerExecProcessControl::OnEvent(ContainerEvent Event, std::optional<int> ExitCode, std::uint64_t /*eventTime*/)
177178
{
178179
if (Event == ContainerEvent::ExecDied && !m_exitEvent.is_signaled())
179180
{

src/windows/wslcsession/WSLCProcessControl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class DockerContainerProcessControl : public WSLCProcessControl
5050
void OnContainerReleased() noexcept;
5151

5252
private:
53-
void OnEvent(ContainerEvent Event, std::optional<int> ExitCode);
53+
void OnEvent(ContainerEvent Event, std::optional<int> ExitCode, std::uint64_t eventTime);
5454

5555
std::mutex m_lock;
5656
DockerHTTPClient& m_client;
@@ -72,7 +72,7 @@ class DockerExecProcessControl : public WSLCProcessControl
7272
void SetExitCode(int ExitCode);
7373

7474
private:
75-
void OnEvent(ContainerEvent Event, std::optional<int> ExitCode);
75+
void OnEvent(ContainerEvent Event, std::optional<int> ExitCode, std::uint64_t eventTime);
7676

7777
mutable std::mutex m_lock;
7878
std::string m_id;

0 commit comments

Comments
 (0)