@@ -346,9 +346,34 @@ void deeplake_sync_schemas_for_db(const std::string& db_name,
346346 * Sync tables for a specific database from pre-loaded catalog data via libpq.
347347 * Creates missing tables in the target database.
348348 */
349+ /* *
350+ * Parse comma-separated column names string into a vector.
351+ * The column_names string uses trailing comma format: "col1,col2,"
352+ */
353+ std::vector<std::string> parse_column_names (const std::string& column_names)
354+ {
355+ std::vector<std::string> result;
356+ std::string current;
357+ for (char c : column_names) {
358+ if (c == ' ,' ) {
359+ if (!current.empty ()) {
360+ result.push_back (current);
361+ current.clear ();
362+ }
363+ } else {
364+ current += c;
365+ }
366+ }
367+ if (!current.empty ()) {
368+ result.push_back (current);
369+ }
370+ return result;
371+ }
372+
349373void deeplake_sync_tables_for_db (const std::string& db_name,
350374 const std::vector<pg::dl_catalog::table_meta>& tables,
351- const std::vector<pg::dl_catalog::column_meta>& columns)
375+ const std::vector<pg::dl_catalog::column_meta>& columns,
376+ const std::vector<pg::dl_catalog::index_meta>& indexes)
352377{
353378 for (const auto & meta : tables) {
354379 if (meta.state == " dropping" ) {
@@ -373,6 +398,24 @@ void deeplake_sync_tables_for_db(const std::string& db_name,
373398 continue ;
374399 }
375400
401+ // Find indexes for this table
402+ std::vector<pg::dl_catalog::index_meta> table_indexes;
403+ for (const auto & idx : indexes) {
404+ if (idx.table_id == meta.table_id ) {
405+ table_indexes.push_back (idx);
406+ }
407+ }
408+
409+ // Determine which columns are part of a primary key (inverted_index on non-nullable columns)
410+ // The primary key columns are stored as comma-separated names in column_names
411+ std::vector<std::string> pk_columns;
412+ for (const auto & idx : table_indexes) {
413+ if (idx.index_type == " inverted_index" ) {
414+ pk_columns = parse_column_names (idx.column_names );
415+ break ;
416+ }
417+ }
418+
376419 const char * qschema = quote_identifier (meta.schema_name .c_str ());
377420 const char * qtable = quote_identifier (meta.table_name .c_str ());
378421
@@ -390,6 +433,19 @@ void deeplake_sync_tables_for_db(const std::string& db_name,
390433 first = false ;
391434 appendStringInfo (&buf, " %s %s" , quote_identifier (col.column_name .c_str ()), col.pg_type .c_str ());
392435 }
436+
437+ // Add PRIMARY KEY table constraint if we have PK columns
438+ if (!pk_columns.empty ()) {
439+ appendStringInfoString (&buf, " , PRIMARY KEY (" );
440+ for (size_t i = 0 ; i < pk_columns.size (); ++i) {
441+ if (i > 0 ) {
442+ appendStringInfoString (&buf, " , " );
443+ }
444+ appendStringInfoString (&buf, quote_identifier (pk_columns[i].c_str ()));
445+ }
446+ appendStringInfoChar (&buf, ' )' );
447+ }
448+
393449 appendStringInfo (&buf, " ) USING deeplake" );
394450
395451 if (execute_via_libpq (db_name.c_str (), buf.data )) {
@@ -487,9 +543,10 @@ void sync_all_databases(
487543 }
488544
489545 auto [tables, columns] = pg::dl_catalog::load_tables_and_columns (root_path, db_name, creds);
490- deeplake_sync_tables_for_db (db_name, tables, columns);
491- elog (LOG, " pg_deeplake sync: synced %zu schemas, %zu tables for database '%s'" ,
492- schemas.size (), tables.size (), db_name.c_str ());
546+ auto indexes = pg::dl_catalog::load_indexes (root_path, db_name, creds);
547+ deeplake_sync_tables_for_db (db_name, tables, columns, indexes);
548+ elog (LOG, " pg_deeplake sync: synced %zu schemas, %zu tables, %zu indexes for database '%s'" ,
549+ schemas.size (), tables.size (), indexes.size (), db_name.c_str ());
493550 } catch (const std::exception& e) {
494551 elog (WARNING, " pg_deeplake sync: failed to sync database '%s': %s" , db_name.c_str (), e.what ());
495552 } catch (...) {
0 commit comments