make ArrowWriter thread safe

geeksville · geeksville · commit 7e007e7e2486 · 2024-07-11T11:48:53.000-07:00
diff --git a/meshtastic/slog/arrow.py b/meshtastic/slog/arrow.py
@@ -1,6 +1,7 @@
 """Utilities for Apache Arrow serialization."""
 
 import logging
+import threading
 import os
 from typing import Optional
 
@@ -22,23 +23,26 @@ def __init__(self, file_name: str):
         self.new_rows: list[dict] = []
         self.schema: Optional[pa.Schema] = None  # haven't yet learned the schema
         self.writer: Optional[pa.RecordBatchStreamWriter] = None
+        self._lock = threading.Condition()  # Ensure only one thread writes at a time
 
     def close(self):
         """Close the stream and writes the file as needed."""
-        self._write()
-        if self.writer:
-            self.writer.close()
-        self.sink.close()
+        with self._lock:
+            self._write()
+            if self.writer:
+                self.writer.close()
+            self.sink.close()
 
     def set_schema(self, schema: pa.Schema):
         """Set the schema for the file.
         Only needed for datasets where we can't learn it from the first record written.
 
         schema (pa.Schema): The schema to use.
         """
-        assert self.schema is None
-        self.schema = schema
-        self.writer = pa.ipc.new_stream(self.sink, schema)
+        with self._lock:
+            assert self.schema is None
+            self.schema = schema
+            self.writer = pa.ipc.new_stream(self.sink, schema)
 
     def _write(self):
         """Write the new rows to the file."""
@@ -56,9 +60,10 @@ def add_row(self, row_dict: dict):
         """Add a row to the arrow file.
         We will automatically learn the schema from the first row. But all rows must use that schema.
         """
-        self.new_rows.append(row_dict)
-        if len(self.new_rows) >= chunk_size:
-            self._write()
+        with self._lock:
+            self.new_rows.append(row_dict)
+            if len(self.new_rows) >= chunk_size:
+                self._write()
 
 
 class FeatherWriter(ArrowWriter):