Skip to content

Commit 23ab3a8

Browse files
committed
Fixed out of bounds fix after drop column.
1 parent 0487488 commit 23ab3a8

7 files changed

Lines changed: 53 additions & 30 deletions

File tree

cpp/deeplake_pg/column_statistics.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -218,20 +218,10 @@ bool inject_column_statistics(Relation rel, int16_t attnum)
218218
return false;
219219
}
220220

221-
// Get DeepLake column view - attnum is 1-based, column index is 0-based
222-
int32_t col_idx = attnum - 1;
223-
224-
// Skip dropped columns by finding the actual column index
225-
int32_t logical_idx = 0;
226-
for (int32_t i = 0; i < tupdesc->natts && logical_idx <= col_idx; ++i) {
227-
Form_pg_attribute a = TupleDescAttr(tupdesc, i);
228-
if (!a->attisdropped) {
229-
if (logical_idx == col_idx) {
230-
col_idx = i;
231-
break;
232-
}
233-
logical_idx++;
234-
}
221+
// Map PG attnum to logical column index (handles dropped columns correctly)
222+
const auto col_idx = table_data.logical_index_for_attnum(attnum);
223+
if (col_idx < 0) {
224+
return false;
235225
}
236226

237227
heimdall::column_view_ptr column_view;

cpp/deeplake_pg/deeplake_executor.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ void analyze_plan(PlannedStmt* plan)
7777
if (attnum <= 0) { // Only positive attribute numbers are real columns
7878
continue;
7979
}
80-
auto col_idx = static_cast<int32_t>(attnum - 1);
80+
auto col_idx = table_data->logical_index_for_attnum(attnum);
81+
if (col_idx < 0) {
82+
continue; // Dropped column or out of range
83+
}
8184
if (!table_data->is_column_requested(col_idx)) {
8285
table_data->set_column_requested(col_idx, true);
8386
if (!table_data->column_has_streamer(col_idx) && table_data->can_stream_column(col_idx)) {

cpp/deeplake_pg/table_am.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -367,10 +367,13 @@ double deeplake_index_build_range_scan(Relation heap_rel,
367367
AttrNumber* indexkeys = index_info->ii_IndexAttrNumbers;
368368
const auto table_id = RelationGetRelid(heap_rel);
369369
auto& td = pg::table_storage::instance().get_table_data(table_id);
370+
// Map index key attnums to logical column indices
371+
std::vector<int32_t> key_logical_indices(nkeys, -1);
370372
for (int32_t i = 0; i < nkeys; ++i) {
371-
int32_t attnum = indexkeys[i] - 1;
372-
if (attnum >= 0 && !td.column_has_streamer(attnum) && td.can_stream_column(attnum)) {
373-
td.create_streamer(attnum, -1);
373+
auto logical = td.logical_index_for_attnum(indexkeys[i]);
374+
key_logical_indices[i] = logical;
375+
if (logical >= 0 && !td.column_has_streamer(logical) && td.can_stream_column(logical)) {
376+
td.create_streamer(logical, -1);
374377
}
375378
}
376379
std::vector<Datum> values(nkeys, 0);
@@ -382,12 +385,12 @@ double deeplake_index_build_range_scan(Relation heap_rel,
382385
auto [block_number, offset_number] = pg::utils::row_number_to_tid(row);
383386
ItemPointerSet(&tid, block_number, offset_number);
384387
for (int32_t i = 0; i < nkeys; ++i) {
385-
int32_t attnum = indexkeys[i] - 1;
386-
if (attnum < 0) [[unlikely]] {
388+
auto logical = key_logical_indices[i];
389+
if (logical < 0) [[unlikely]] {
387390
nulls[i] = true;
388391
values[i] = 0;
389392
} else [[likely]] {
390-
auto [value, null] = tscan.get_datum(attnum, row);
393+
auto [value, null] = tscan.get_datum(logical, row);
391394
values[i] = value;
392395
nulls[i] = null;
393396
}

cpp/deeplake_pg/table_data.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ struct table_data
6767
inline std::string get_atttypename(AttrNumber attr_num) const noexcept;
6868
inline bool is_column_dropped(AttrNumber attr_num) const noexcept;
6969
inline int32_t get_tupdesc_index(AttrNumber attr_num) const noexcept;
70+
inline int32_t logical_index_for_attnum(int32_t attnum) const noexcept;
7071
inline bool is_column_nullable(AttrNumber attr_num) const noexcept;
7172
inline bool is_column_indexed(AttrNumber attr_num) const noexcept;
7273
inline int32_t num_columns() const noexcept;
@@ -176,6 +177,7 @@ struct table_data
176177
icm::vector<bool> requested_columns_;
177178
icm::vector<Oid> base_typeids_; // Cached base type OIDs for performance
178179
icm::vector<int32_t> active_column_indices_; // Maps logical index to TupleDesc index (excludes dropped)
180+
icm::vector<int32_t> tupdesc_to_logical_; // Maps TupleDesc index to logical index (-1 if dropped)
179181
icm::string_map<> creds_;
180182
TupleDesc tuple_descriptor_;
181183
http::uri dataset_path_ = http::uri(std::string());

cpp/deeplake_pg/table_data_impl.hpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ inline table_data::table_data(
6464
}
6565
}
6666

67+
// Build reverse mapping: TupleDesc index → logical index (-1 for dropped)
68+
tupdesc_to_logical_.resize(tuple_descriptor_->natts, -1);
69+
for (int32_t logical = 0; logical < static_cast<int32_t>(active_column_indices_.size()); ++logical) {
70+
tupdesc_to_logical_[active_column_indices_[logical]] = logical;
71+
}
72+
6773
const auto num_active = active_column_indices_.size();
6874
requested_columns_.resize(num_active, false);
6975
base_typeids_.resize(num_active);
@@ -272,6 +278,15 @@ inline int32_t table_data::get_tupdesc_index(AttrNumber attr_num) const noexcept
272278
return active_column_indices_[attr_num];
273279
}
274280

281+
inline int32_t table_data::logical_index_for_attnum(int32_t attnum) const noexcept
282+
{
283+
const auto tupdesc_idx = attnum - 1;
284+
if (tupdesc_idx < 0 || tupdesc_idx >= static_cast<int32_t>(tupdesc_to_logical_.size())) {
285+
return -1;
286+
}
287+
return tupdesc_to_logical_[tupdesc_idx];
288+
}
289+
275290
inline bool table_data::is_column_indexed(AttrNumber attr_num) const noexcept
276291
{
277292
return pg::pg_index::get_oid(table_name_, get_atttypename(attr_num)) != InvalidOid;
@@ -318,13 +333,14 @@ inline void table_data::add_insert_slots(int32_t nslots, TupleTableSlot** slots)
318333
for (int32_t i = 0; i < num_columns(); ++i) {
319334
auto& column_values = insert_rows_[get_atttypename(i)];
320335
const auto dt = get_column_view(i)->dtype();
336+
const auto slot_pos = get_tupdesc_index(i);
321337
for (int32_t k = 0; k < nslots; ++k) {
322338
auto slot = slots[k];
323339
nd::array val;
324-
if (slot->tts_isnull[i]) {
340+
if (slot->tts_isnull[slot_pos]) {
325341
val = (nd::dtype_is_numeric(dt) ? nd::adapt(0) : nd::none(dt, 0));
326342
} else {
327-
val = pg::utils::datum_to_nd(slot->tts_values[i], get_base_atttypid(i), get_atttypmod(i));
343+
val = pg::utils::datum_to_nd(slot->tts_values[slot_pos], get_base_atttypid(i), get_atttypmod(i));
328344
}
329345
column_values.push_back(std::move(val));
330346
}

cpp/deeplake_pg/table_scan_impl.hpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,19 +158,23 @@ inline std::pair<Datum, bool> table_scan::get_datum(int32_t column_number, int64
158158
inline void table_scan::convert_nd_to_pg(int64_t row_number, Datum* values, bool* nulls) const noexcept
159159
{
160160
for (auto col : null_columns_) {
161-
nulls[col] = true;
161+
const auto slot_pos = table_data_.get_tupdesc_index(col);
162+
nulls[slot_pos] = true;
162163
}
163164
for (auto col : scored_columns_) {
164-
nulls[col] = false;
165+
const auto slot_pos = table_data_.get_tupdesc_index(col);
166+
nulls[slot_pos] = false;
165167
}
166168
for (auto col : special_columns_) {
167-
values[col] = pg::utils::make_special_datum(table_id_, row_number, col, table_data_.get_base_atttypid(col));
168-
nulls[col] = false;
169+
const auto slot_pos = table_data_.get_tupdesc_index(col);
170+
values[slot_pos] = pg::utils::make_special_datum(table_id_, row_number, col, table_data_.get_base_atttypid(col));
171+
nulls[slot_pos] = false;
169172
}
170173
for (auto col : process_columns_) {
174+
const auto slot_pos = table_data_.get_tupdesc_index(col);
171175
auto [datum, is_null] = get_datum(col, row_number);
172-
values[col] = datum;
173-
nulls[col] = is_null;
176+
values[slot_pos] = datum;
177+
nulls[slot_pos] = is_null;
174178
}
175179
}
176180

cpp/deeplake_pg/table_storage.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -951,10 +951,15 @@ void table_storage::create_table(const std::string& table_name, Oid table_id, Tu
951951

952952
void table_storage::drop_table(const std::string& table_name)
953953
{
954-
pg::table_ddl_lock_guard ddl_lock;
954+
// Load metadata BEFORE acquiring the DDL lock.
955+
// force_load_table_metadata() may trigger CREATE TABLE (via SPI) for tables
956+
// in the S3 catalog that don't exist in pg_class yet. CREATE TABLE goes
957+
// through the table AM which also acquires the DDL lock — doing that while
958+
// we already hold it would self-deadlock (LWLocks are not recursive).
955959
if (!table_exists(table_name)) {
956960
force_load_table_metadata();
957961
}
962+
pg::table_ddl_lock_guard ddl_lock;
958963
if (table_exists(table_name)) {
959964
auto& table_data = get_table_data(table_name);
960965
auto creds = session_credentials::get_credentials();

0 commit comments

Comments
 (0)