From 5d509181c6ef768aedfadd63d56b9b20c950fb5d Mon Sep 17 00:00:00 2001
From: Bhagirath Mehta <bhamehta@microsoft.com>
Date: Mon, 22 Jun 2026 17:10:42 -0500
Subject: [PATCH 1/7] Prevent event loss when a disk write fails during Flush()

OfflineStorageHandler::Flush() moved records out of the in-memory queue with
GetRecords() (which removes them) before handing them to the disk store via
StoreRecords(). If the disk write failed for some or all records (e.g. a SQLite
write error), those records had already been removed from memory and were never
re-queued, so events were silently lost -- only the smaller totalSaved count
hinted at it.

Reserve-then-confirm-delete instead:
- Reserve the records in memory (GetAndReserveRecords with a nominal lease) so
  the originals stay retrievable rather than being dropped outright.
- Persist them one at a time, recording exactly which ids succeeded and failed.
- DeleteRecords() only the persisted ids (now safely on disk).
- ReleaseRecords() the failed ids back into the in-memory queue so they are
  retried on a subsequent flush instead of being lost.

Added OfflineStorageHandlerFlushTests.FailedDiskWriteDuringFlushReturnsRecordsToMemory:
records that the SQLite store rejects (timestamp<=0) remain in memory after
Flush(). Verified the test fails against the previous GetRecords()-based Flush
(GetRecordCount()==0) and passes with this change.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 lib/offline/OfflineStorageHandler.cpp   | 57 ++++++++++++++-------
 tests/unittests/OfflineStorageTests.cpp | 66 +++++++++++++++++++++++++
 2 files changed, 106 insertions(+), 17 deletions(-)
diff --git a/lib/offline/OfflineStorageHandler.cpp b/lib/offline/OfflineStorageHandler.cpp
index 9049339c4..9b3169e7d 100644
--- a/lib/offline/OfflineStorageHandler.cpp
+++ b/lib/offline/OfflineStorageHandler.cpp
@@ -177,28 +177,51 @@ namespace MAT_NS_BEGIN {
         size_t dbSizeBeforeFlush = m_offlineStorageMemory->GetSize();
         if ((m_offlineStorageMemory) && (dbSizeBeforeFlush > 0) && (m_offlineStorageDisk))
         {
-            // This will block on and then take a lock for the duration of this move, and
-            // StoreRecord() will then block until the move completes.
-            auto records = m_offlineStorageMemory->GetRecords(false, EventLatency_Unspecified);
-            std::vector<StorageRecordId> ids;
-
-            // TODO: [MG] - consider running the batch in transaction
-            //            if (sqlite)
-            //                sqlite->Execute("BEGIN");
-
-            size_t totalSaved = m_offlineStorageDisk->StoreRecords(records);
-
-            // TODO: [MG] - consider running the batch in transaction
-            //            if (sqlite)
-            //                sqlite->Execute("END");
+            // Reserve the records in memory (with a nominal lease) instead of
+            // removing them outright. The lease duration is not time-critical
+            // here because the records are persisted and resolved synchronously
+            // under m_flushLock; reserving simply keeps the originals retrievable
+            // so any that fail to persist can be returned to the queue.
+            const unsigned reserveLeaseMs = 120000;
+            std::vector<StorageRecord> records;
+            auto consumer = [&records](StorageRecord&& record) -> bool {
+                records.push_back(std::move(record));
+                return true;
+            };
+            m_offlineStorageMemory->GetAndReserveRecords(consumer, reserveLeaseMs, EventLatency_Unspecified);
+
+            // Persist to disk one record at a time, tracking exactly which records
+            // were stored so we only drop the persisted ones from memory.
+            std::vector<StorageRecordId> persistedIds;
+            std::vector<StorageRecordId> failedIds;
+            persistedIds.reserve(records.size());
+            for (auto& record : records)
+            {
+                if (m_offlineStorageDisk->StoreRecord(record))
+                    persistedIds.push_back(record.id);
+                else
+                    failedIds.push_back(record.id);
+            }
 
-            // Delete records from reserved on flush
             HttpHeaders dummy;
             bool fromMemory = true;
-            m_offlineStorageMemory->DeleteRecords(ids, dummy, fromMemory);
+
+            // Persisted records are now safely on disk: drop them from memory.
+            if (!persistedIds.empty())
+                m_offlineStorageMemory->DeleteRecords(persistedIds, dummy, fromMemory);
+
+            // Records that failed to persist are returned to the in-memory queue
+            // so a partial or total disk write failure does not silently lose
+            // events; they are retried on a subsequent flush.
+            if (!failedIds.empty())
+            {
+                LOG_WARN("Flush: %zu of %zu records failed to persist to disk; returning them to the queue for retry",
+                    failedIds.size(), records.size());
+                m_offlineStorageMemory->ReleaseRecords(failedIds, false, dummy, fromMemory);
+            }
 
             // Notify event listener about the records cached
-            OnStorageRecordsSaved(totalSaved);
+            OnStorageRecordsSaved(persistedIds.size());
 
             if (m_offlineStorageMemory->GetSize() > dbSizeBeforeFlush)
             {
diff --git a/tests/unittests/OfflineStorageTests.cpp b/tests/unittests/OfflineStorageTests.cpp
index bbb8da8e0..36da3029b 100644
--- a/tests/unittests/OfflineStorageTests.cpp
+++ b/tests/unittests/OfflineStorageTests.cpp
@@ -2,7 +2,13 @@
 
 #include "common/Common.hpp"
 #include "common/MockIOfflineStorage.hpp"
+#include "common/MockIOfflineStorageObserver.hpp"
+#include "common/MockIRuntimeConfig.hpp"
+#include "offline/OfflineStorageHandler.hpp"
 #include "offline/StorageObserver.hpp"
+#include "NullObjects.hpp"
+
+#include <sstream>
 
 using namespace testing;
 using namespace MAT;
@@ -162,3 +168,63 @@ TEST_F(OfflineStorageTests, ReleaseRecordsIsForwarded)
         .WillOnce(Return());
     EXPECT_THAT(offlineStorage.releaseRecordsIncRetryCount(ctx), true);
 }
+
+namespace
+{
+    // Dispatcher that drops queued work, so flushes only run when invoked directly.
+    class NoopTaskDispatcher : public ITaskDispatcher
+    {
+    public:
+        void Join() override {}
+        void Queue(Task* task) override { UNREFERENCED_PARAMETER(task); }
+        bool Cancel(Task* task, uint64_t waitTime = 0) override
+        {
+            UNREFERENCED_PARAMETER(task);
+            UNREFERENCED_PARAMETER(waitTime);
+            return true;
+        }
+    };
+}
+
+// Regression test: when records pulled from the in-memory queue fail to persist
+// to disk during Flush(), they must be returned to the queue rather than lost.
+TEST(OfflineStorageHandlerFlushTests, FailedDiskWriteDuringFlushReturnsRecordsToMemory)
+{
+    NullLogManager logManager;
+    NiceMock<MockIRuntimeConfig> config;
+    NoopTaskDispatcher dispatcher;
+    NiceMock<MockIOfflineStorageObserver> observer;
+
+    ON_CALL(config, GetOfflineStorageMaximumSizeBytes()).WillByDefault(Return(32 * 4096));
+    ON_CALL(config, GetMaximumRetryCount()).WillByDefault(Return(5));
+
+    std::ostringstream dbPath;
+    dbPath << GetTempDirectory() << "FlushReserveTest.db";
+    std::remove(dbPath.str().c_str());
+    config[CFG_STR_CACHE_FILE_PATH] = dbPath.str();
+    config[CFG_INT_RAM_QUEUE_SIZE] = 1024 * 1024;  // enable the in-memory queue
+
+    OfflineStorageHandler handler(logManager, config, dispatcher);
+    handler.Initialize(observer);
+
+    // timestamp <= 0 is accepted by the in-memory queue but rejected by the
+    // SQLite disk store, simulating a disk write failure during flush.
+    const size_t kCount = 5;
+    for (size_t i = 0; i < kCount; i++)
+    {
+        StorageRecord r("flush-id-" + std::to_string(i), "tenant-token",
+            EventLatency_Normal, EventPersistence_Normal, /*timestamp*/ 0,
+            std::vector<uint8_t>{ 'x' });
+        handler.StoreRecord(r);
+    }
+    EXPECT_EQ(handler.GetRecordCount(), kCount);
+
+    handler.Flush();
+
+    // The disk rejected every record; with the fix they are returned to the
+    // in-memory queue rather than silently dropped.
+    EXPECT_EQ(handler.GetRecordCount(), kCount);
+
+    handler.Shutdown();
+    std::remove(dbPath.str().c_str());
+}

From 202e8c3d67e2a6894abe94038c8224a71fd7f7a1 Mon Sep 17 00:00:00 2001
From: Bhagirath Mehta <bhamehta@microsoft.com>
Date: Mon, 22 Jun 2026 17:44:00 -0500
Subject: [PATCH 2/7] Address Copilot round-1 comments

OfflineStorageTests.cpp: add explicit #include <cstdio> for std::remove (was
relying on transitive includes), and clean up the SQLite WAL companion files
(-wal/-shm/-journal) in addition to the main .db via a RemoveDbFiles() helper,
so the test does not leave temp files accumulating in the temp directory.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tests/unittests/OfflineStorageTests.cpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tests/unittests/OfflineStorageTests.cpp b/tests/unittests/OfflineStorageTests.cpp
index 36da3029b..bb412f2af 100644
--- a/tests/unittests/OfflineStorageTests.cpp
+++ b/tests/unittests/OfflineStorageTests.cpp
@@ -8,6 +8,7 @@
 #include "offline/StorageObserver.hpp"
 #include "NullObjects.hpp"
 
+#include <cstdio>
 #include <sstream>
 
 using namespace testing;
@@ -171,6 +172,16 @@ TEST_F(OfflineStorageTests, ReleaseRecordsIsForwarded)
 
 namespace
 {
+    // Remove a SQLite db file along with its WAL-mode companion files
+    // (-wal/-shm/-journal), which would otherwise accumulate in the temp dir.
+    void RemoveDbFiles(const std::string& path)
+    {
+        std::remove(path.c_str());
+        std::remove((path + "-wal").c_str());
+        std::remove((path + "-shm").c_str());
+        std::remove((path + "-journal").c_str());
+    }
+
     // Dispatcher that drops queued work, so flushes only run when invoked directly.
     class NoopTaskDispatcher : public ITaskDispatcher
     {
@@ -200,7 +211,7 @@ TEST(OfflineStorageHandlerFlushTests, FailedDiskWriteDuringFlushReturnsRecordsTo
 
     std::ostringstream dbPath;
     dbPath << GetTempDirectory() << "FlushReserveTest.db";
-    std::remove(dbPath.str().c_str());
+    RemoveDbFiles(dbPath.str());
     config[CFG_STR_CACHE_FILE_PATH] = dbPath.str();
     config[CFG_INT_RAM_QUEUE_SIZE] = 1024 * 1024;  // enable the in-memory queue
 
@@ -226,5 +237,5 @@ TEST(OfflineStorageHandlerFlushTests, FailedDiskWriteDuringFlushReturnsRecordsTo
     EXPECT_EQ(handler.GetRecordCount(), kCount);
 
     handler.Shutdown();
-    std::remove(dbPath.str().c_str());
+    RemoveDbFiles(dbPath.str());
 }

From d79d6cf7be30368adcea42037ff55a12ef234d01 Mon Sep 17 00:00:00 2001
From: Bhagirath Mehta <bhamehta@microsoft.com>
Date: Mon, 22 Jun 2026 18:00:48 -0500
Subject: [PATCH 3/7] Address Copilot round-2 comments

OfflineStorageHandler::Flush(): null-guard the m_offlineStorageMemory->GetSize()
read so Flush() is safe when the RAM queue is disabled (CFG_INT_RAM_QUEUE_SIZE==0)
and only disk storage exists.

OfflineStorageTests.cpp: use a unique per-run temp DB filename (append
getUtcSystemTimeMs()) to avoid cross-run/parallel interference; keep cleanup
best-effort.

Per-record StoreRecord() in Flush is kept deliberately and now documented: a
batched StoreRecords() only returns a count, so on a partial failure we could not
tell which records to delete vs. retry, and re-storing already-persisted records
would duplicate them (events table has no unique record_id constraint). The Room
batched-JNI efficiency trade-off is noted for a possible follow-up (a batch
disk-store API that reports per-record success).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 lib/offline/OfflineStorageHandler.cpp   | 8 ++++++--
 tests/unittests/OfflineStorageTests.cpp | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/offline/OfflineStorageHandler.cpp b/lib/offline/OfflineStorageHandler.cpp
index 9b3169e7d..d68d42132 100644
--- a/lib/offline/OfflineStorageHandler.cpp
+++ b/lib/offline/OfflineStorageHandler.cpp
@@ -174,7 +174,7 @@ namespace MAT_NS_BEGIN {
         // than the handle gets replaced by nullptr in this DeferredCallbackHandle obj.
         m_flushHandle.Cancel();
 
-        size_t dbSizeBeforeFlush = m_offlineStorageMemory->GetSize();
+        size_t dbSizeBeforeFlush = (m_offlineStorageMemory != nullptr) ? m_offlineStorageMemory->GetSize() : 0;
         if ((m_offlineStorageMemory) && (dbSizeBeforeFlush > 0) && (m_offlineStorageDisk))
         {
             // Reserve the records in memory (with a nominal lease) instead of
@@ -191,7 +191,11 @@ namespace MAT_NS_BEGIN {
             m_offlineStorageMemory->GetAndReserveRecords(consumer, reserveLeaseMs, EventLatency_Unspecified);
 
             // Persist to disk one record at a time, tracking exactly which records
-            // were stored so we only drop the persisted ones from memory.
+            // were stored so we only drop the persisted ones from memory. This
+            // intentionally favors correctness over batching: a batched StoreRecords()
+            // only returns a count, so on a partial failure we could not tell which
+            // records to keep vs. retry, and re-storing already-saved records would
+            // duplicate them (the events table has no unique record_id constraint).
             std::vector<StorageRecordId> persistedIds;
             std::vector<StorageRecordId> failedIds;
             persistedIds.reserve(records.size());
diff --git a/tests/unittests/OfflineStorageTests.cpp b/tests/unittests/OfflineStorageTests.cpp
index bb412f2af..aa828dc6a 100644
--- a/tests/unittests/OfflineStorageTests.cpp
+++ b/tests/unittests/OfflineStorageTests.cpp
@@ -210,7 +210,7 @@ TEST(OfflineStorageHandlerFlushTests, FailedDiskWriteDuringFlushReturnsRecordsTo
     ON_CALL(config, GetMaximumRetryCount()).WillByDefault(Return(5));
 
     std::ostringstream dbPath;
-    dbPath << GetTempDirectory() << "FlushReserveTest.db";
+    dbPath << GetTempDirectory() << "FlushReserveTest-" << PAL::getUtcSystemTimeMs() << ".db";
     RemoveDbFiles(dbPath.str());
     config[CFG_STR_CACHE_FILE_PATH] = dbPath.str();
     config[CFG_INT_RAM_QUEUE_SIZE] = 1024 * 1024;  // enable the in-memory queue

From 342be951e1b7457fa853a198ed17fa5e016b00ea Mon Sep 17 00:00:00 2001
From: Bhagirath Mehta <bhamehta@microsoft.com>
Date: Mon, 22 Jun 2026 20:30:39 -0500
Subject: [PATCH 4/7] Address Copilot round-3 comments

OfflineStorage_SQLite::StoreRecord: return false when the INSERT execute() fails
(was ignoring the result and returning true even on a real write failure). The
Flush() reserve/confirm-delete logic in this PR relies on the disk backend
reporting per-record failure; without this, a failed sqlite3_step would still be
treated as persisted and dropped from memory. (This is the same one-line fix as
PR #1491, included here so this PR is correct on its own; trivial overlap that
resolves cleanly at merge.)

OfflineStorageTests.cpp NoopTaskDispatcher: own queued tasks and delete them on
Cancel()/Join()/destruction instead of dropping the Task* (PAL::scheduleTask
allocates with new and expects the dispatcher to take ownership), so the helper
cannot leak.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 lib/offline/OfflineStorage_SQLite.cpp   |  8 +++++++-
 tests/unittests/OfflineStorageTests.cpp | 24 ++++++++++++++++++++----
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/lib/offline/OfflineStorage_SQLite.cpp b/lib/offline/OfflineStorage_SQLite.cpp
index b9b2ed83d..b1c7c82b4 100644
--- a/lib/offline/OfflineStorage_SQLite.cpp
+++ b/lib/offline/OfflineStorage_SQLite.cpp
@@ -177,7 +177,13 @@ namespace MAT_NS_BEGIN {
                 return false;
             }
 #endif
-            SqliteStatement(*m_db, m_stmtInsertEvent_id_tenant_prio_ts_data).execute(record.id, record.tenantToken, static_cast<int>(record.latency), static_cast<int>(record.persistence), record.timestamp, record.blob);
+            if (!SqliteStatement(*m_db, m_stmtInsertEvent_id_tenant_prio_ts_data).execute(record.id, record.tenantToken, static_cast<int>(record.latency), static_cast<int>(record.persistence), record.timestamp, record.blob))
+            {
+                LOG_ERROR("Failed to store event %s:%s: database write failed",
+                    tenantTokenToId(record.tenantToken).c_str(), record.id.c_str());
+                m_observer->OnStorageFailed("Database write failed");
+                return false;
+            }
             m_DbSizeEstimate += record.id.size() + record.tenantToken.size() + record.blob.size();
         }
 
diff --git a/tests/unittests/OfflineStorageTests.cpp b/tests/unittests/OfflineStorageTests.cpp
index aa828dc6a..7c2950ca8 100644
--- a/tests/unittests/OfflineStorageTests.cpp
+++ b/tests/unittests/OfflineStorageTests.cpp
@@ -182,18 +182,34 @@ namespace
         std::remove((path + "-journal").c_str());
     }
 
-    // Dispatcher that drops queued work, so flushes only run when invoked directly.
+    // No-op dispatcher that owns queued tasks and frees them, so flushes only
+    // run when invoked directly and scheduled tasks (if any) are not leaked.
     class NoopTaskDispatcher : public ITaskDispatcher
     {
     public:
-        void Join() override {}
-        void Queue(Task* task) override { UNREFERENCED_PARAMETER(task); }
+        void Join() override { clear(); }
+        void Queue(Task* task) override { m_tasks.push_back(task); }
         bool Cancel(Task* task, uint64_t waitTime = 0) override
         {
-            UNREFERENCED_PARAMETER(task);
             UNREFERENCED_PARAMETER(waitTime);
+            auto it = std::find(m_tasks.begin(), m_tasks.end(), task);
+            if (it != m_tasks.end())
+            {
+                delete *it;
+                m_tasks.erase(it);
+            }
             return true;
         }
+        ~NoopTaskDispatcher() override { clear(); }
+
+    private:
+        void clear()
+        {
+            for (auto* t : m_tasks)
+                delete t;
+            m_tasks.clear();
+        }
+        std::vector<Task*> m_tasks;
     };
 }
 

From 9d1b92a315e505c1b81aa3d57119de5602ee2bd6 Mon Sep 17 00:00:00 2001
From: Bhagirath Mehta <bhamehta@microsoft.com>
Date: Mon, 22 Jun 2026 20:40:56 -0500
Subject: [PATCH 5/7] Address Copilot round-4 comment: don't carry reservation
 lease onto disk

Flush() reserves records with a non-zero lease, which stamps reservedUntil on the
records handed to the consumer. SQLite ignores reservedUntil on insert, but the
Room backend persists it, so freshly flushed records would be written as
"reserved" for ~2 minutes and become temporarily ineligible for upload selection.
Clear record.reservedUntil before the disk StoreRecord() so the in-memory lease
never leaks to durable storage.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 lib/offline/OfflineStorageHandler.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/offline/OfflineStorageHandler.cpp b/lib/offline/OfflineStorageHandler.cpp
index d68d42132..828488a6c 100644
--- a/lib/offline/OfflineStorageHandler.cpp
+++ b/lib/offline/OfflineStorageHandler.cpp
@@ -201,6 +201,11 @@ namespace MAT_NS_BEGIN {
             persistedIds.reserve(records.size());
             for (auto& record : records)
             {
+                // The in-memory reservation lease must not be carried onto disk:
+                // SQLite ignores reservedUntil, but the Room backend persists it,
+                // which would make freshly flushed records temporarily ineligible
+                // for upload selection.
+                record.reservedUntil = 0;
                 if (m_offlineStorageDisk->StoreRecord(record))
                     persistedIds.push_back(record.id);
                 else

From 123440ea084fd3c904da33a278d0268a11a0ddd2 Mon Sep 17 00:00:00 2001
From: Bhagirath Mehta <bhamehta@microsoft.com>
Date: Mon, 22 Jun 2026 20:49:57 -0500
Subject: [PATCH 6/7] Address Copilot round-5 comment: clarify test mechanism
 wording

The test comment said it simulates a "disk write failure"; in fact StoreRecord()
fails via the SQLite backend's input validation (timestamp <= 0), not a literal
disk I/O error. Reword to describe the actual mechanism while noting it exercises
the same Flush() failure-handling path (a failed record is returned to memory).

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 tests/unittests/OfflineStorageTests.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/unittests/OfflineStorageTests.cpp b/tests/unittests/OfflineStorageTests.cpp
index 7c2950ca8..1bc834755 100644
--- a/tests/unittests/OfflineStorageTests.cpp
+++ b/tests/unittests/OfflineStorageTests.cpp
@@ -234,8 +234,10 @@ TEST(OfflineStorageHandlerFlushTests, FailedDiskWriteDuringFlushReturnsRecordsTo
     OfflineStorageHandler handler(logManager, config, dispatcher);
     handler.Initialize(observer);
 
-    // timestamp <= 0 is accepted by the in-memory queue but rejected by the
-    // SQLite disk store, simulating a disk write failure during flush.
+    // A timestamp <= 0 is accepted by the in-memory queue but rejected by the
+    // SQLite disk store's input validation, so its StoreRecord() returns false.
+    // This drives the same Flush() failure-handling path as a disk write failure
+    // (a failed record must be returned to memory, not dropped).
     const size_t kCount = 5;
     for (size_t i = 0; i < kCount; i++)
     {

From ce957eeb3320556591a9fb6d744965cdbe3ac55d Mon Sep 17 00:00:00 2001
From: Bhagirath Mehta <bhamehta@microsoft.com>
Date: Mon, 22 Jun 2026 21:05:46 -0500
Subject: [PATCH 7/7] Address Copilot round-6: simplify Flush to
 drain-and-re-insert-failed

Replace the reserve/confirm-delete approach with the simpler drain-and-re-insert
variant Copilot originally suggested. Flush() now drains the in-memory queue into
a single local batch (GetRecords), persists each record, and re-inserts only the
records that fail to persist.

This keeps the data-loss fix (failed records are returned to memory, not dropped)
while resolving two issues with the reserve approach:
- Peak memory: it no longer holds two full copies of every record (reserved map
  + local vector) during the disk-write loop -- only a single copy is in flight.
- It no longer sets a reservation lease, so there is no reservedUntil to clear
  and no risk of the Room backend persisting a stale reservation.

Per-record StoreRecord() is still used deliberately (documented): a batched
StoreRecords() only returns a count, so on partial failure we could not tell which
records to re-queue, and re-storing already-saved records would duplicate them
(no unique record_id constraint). The Room per-record JNI cost remains a possible
follow-up that would need a public IOfflineStorage batch-with-results API.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 lib/offline/OfflineStorageHandler.cpp | 72 +++++++++++----------------
 1 file changed, 28 insertions(+), 44 deletions(-)

diff --git a/lib/offline/OfflineStorageHandler.cpp b/lib/offline/OfflineStorageHandler.cpp
index 828488a6c..90f9c4a27 100644
--- a/lib/offline/OfflineStorageHandler.cpp
+++ b/lib/offline/OfflineStorageHandler.cpp
@@ -177,60 +177,44 @@ namespace MAT_NS_BEGIN {
         size_t dbSizeBeforeFlush = (m_offlineStorageMemory != nullptr) ? m_offlineStorageMemory->GetSize() : 0;
         if ((m_offlineStorageMemory) && (dbSizeBeforeFlush > 0) && (m_offlineStorageDisk))
         {
-            // Reserve the records in memory (with a nominal lease) instead of
-            // removing them outright. The lease duration is not time-critical
-            // here because the records are persisted and resolved synchronously
-            // under m_flushLock; reserving simply keeps the originals retrievable
-            // so any that fail to persist can be returned to the queue.
-            const unsigned reserveLeaseMs = 120000;
-            std::vector<StorageRecord> records;
-            auto consumer = [&records](StorageRecord&& record) -> bool {
-                records.push_back(std::move(record));
-                return true;
-            };
-            m_offlineStorageMemory->GetAndReserveRecords(consumer, reserveLeaseMs, EventLatency_Unspecified);
-
-            // Persist to disk one record at a time, tracking exactly which records
-            // were stored so we only drop the persisted ones from memory. This
-            // intentionally favors correctness over batching: a batched StoreRecords()
-            // only returns a count, so on a partial failure we could not tell which
-            // records to keep vs. retry, and re-storing already-saved records would
-            // duplicate them (the events table has no unique record_id constraint).
-            std::vector<StorageRecordId> persistedIds;
-            std::vector<StorageRecordId> failedIds;
-            persistedIds.reserve(records.size());
+            // Drain the in-memory queue into a local batch. Records are removed
+            // from memory here; any that fail to persist below are re-inserted, so
+            // a disk write failure does not silently lose events. Draining (rather
+            // than reserving) keeps only a single copy of each record in flight and
+            // avoids stamping a reservation lease that the Room backend would
+            // persist to disk.
+            auto records = m_offlineStorageMemory->GetRecords(false, EventLatency_Unspecified);
+
+            // Persist one record at a time so we know exactly which succeeded. A
+            // batched StoreRecords() only returns a count, so on a partial failure
+            // we could not tell which records to re-queue, and re-storing
+            // already-saved records would duplicate them (the events table has no
+            // unique record_id constraint).
+            size_t totalSaved = 0;
+            size_t totalFailed = 0;
             for (auto& record : records)
             {
-                // The in-memory reservation lease must not be carried onto disk:
-                // SQLite ignores reservedUntil, but the Room backend persists it,
-                // which would make freshly flushed records temporarily ineligible
-                // for upload selection.
-                record.reservedUntil = 0;
                 if (m_offlineStorageDisk->StoreRecord(record))
-                    persistedIds.push_back(record.id);
+                {
+                    ++totalSaved;
+                }
                 else
-                    failedIds.push_back(record.id);
+                {
+                    // Return the record to the in-memory queue for retry on a
+                    // subsequent flush instead of dropping it.
+                    ++totalFailed;
+                    m_offlineStorageMemory->StoreRecord(record);
+                }
             }
 
-            HttpHeaders dummy;
-            bool fromMemory = true;
-
-            // Persisted records are now safely on disk: drop them from memory.
-            if (!persistedIds.empty())
-                m_offlineStorageMemory->DeleteRecords(persistedIds, dummy, fromMemory);
-
-            // Records that failed to persist are returned to the in-memory queue
-            // so a partial or total disk write failure does not silently lose
-            // events; they are retried on a subsequent flush.
-            if (!failedIds.empty())
+            if (totalFailed > 0)
             {
-                LOG_WARN("Flush: %zu of %zu records failed to persist to disk; returning them to the queue for retry",
-                    failedIds.size(), records.size());
-                m_offlineStorageMemory->ReleaseRecords(failedIds, false, dummy, fromMemory);
+                LOG_WARN("Flush: %zu of %zu records failed to persist to disk; returned to the queue for retry",
+                    totalFailed, records.size());
             }
 
             // Notify event listener about the records cached
-            OnStorageRecordsSaved(persistedIds.size());
+            OnStorageRecordsSaved(totalSaved);
 
             if (m_offlineStorageMemory->GetSize() > dbSizeBeforeFlush)
             {