Skip to content

Commit 5694d9f

Browse files
committed
Fix - use column index to store decoded data in RNtuple
It is more easier to process it
1 parent d805d8e commit 5694d9f

1 file changed

Lines changed: 19 additions & 49 deletions

File tree

modules/rntuple.mjs

Lines changed: 19 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -925,24 +925,23 @@ async function readHeaderFooter(tuple) {
925925
function readEntry(rntuple, fieldName, entryIndex) {
926926
const builder = rntuple.builder,
927927
field = builder.fieldDescriptors.find(f => f.fieldName === fieldName),
928-
fieldData = rntuple._clusterData[fieldName];
928+
columns = rntuple.fieldToColumns[fieldName];
929929

930930
if (!field)
931931
throw new Error(`No descriptor for field ${fieldName}`);
932-
if (!fieldData)
933-
throw new Error(`No data for field ${fieldName}`);
932+
if (!columns)
933+
throw new Error(`No columns field ${fieldName}`);
934934

935-
// Detect and decode string fields
936-
if (Array.isArray(fieldData) && fieldData.length === 2) {
937-
const [offsets, payload] = fieldData,
935+
if (field.typeName === 'std::string') {
936+
// string extracted from two columns
937+
const offsets = rntuple._clusterData[columns[0].index][0],
938+
payload = rntuple._clusterData[columns[1].index][0],
938939
start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]),
939-
end = Number(offsets[entryIndex]),
940-
decoded = payload.slice(start, end).join(''); // Convert to string
941-
return decoded;
940+
end = Number(offsets[entryIndex]);
941+
return payload.slice(start, end).join(''); // Convert to string
942942
}
943-
944-
// Fallback: primitive type (e.g. int, float)
945-
return fieldData[0][entryIndex];
943+
const values = rntuple._clusterData[columns[0].index];
944+
return values[0][entryIndex];
946945
}
947946

948947
/** @summary Return field name for specified branch index
@@ -1040,7 +1039,7 @@ function readNextCluster(rntuple, selector) {
10401039
});
10411040

10421041
return Promise.all(unzipPromises).then(unzipBlobs => {
1043-
rntuple._clusterData = {}; // store deserialized data per field
1042+
rntuple._clusterData = {}; // store deserialized data per column index
10441043

10451044
for (let i = 0; i < unzipBlobs.length; ++i) {
10461045
const blob = unzipBlobs[i];
@@ -1051,54 +1050,25 @@ function readNextCluster(rntuple, selector) {
10511050
page,
10521051
colDesc
10531052
} = pages[i],
1054-
field = builder.fieldDescriptors[colDesc.fieldId],
1055-
values = builder.deserializePage(blob, colDesc, page);
1053+
field = builder.fieldDescriptors[colDesc.fieldId],
1054+
values = builder.deserializePage(blob, colDesc, page);
10561055

10571056
// Support multiple representations (e.g., string fields with offsets + payload)
1058-
if (!rntuple._clusterData[field.fieldName])
1059-
rntuple._clusterData[field.fieldName] = [];
1060-
1061-
// splitting string fields into offset and payload components
1062-
if (field.typeName === 'std::string') {
1063-
if (
1064-
colDesc.coltype === ENTupleColumnType.kIndex64 ||
1065-
colDesc.coltype === ENTupleColumnType.kIndex32 ||
1066-
colDesc.coltype === ENTupleColumnType.kSplitIndex64 ||
1067-
colDesc.coltype === ENTupleColumnType.kSplitIndex32
1068-
) // Index64/Index32
1069-
rntuple._clusterData[field.fieldName][0] = values; // Offsets
1070-
else if (colDesc.coltype === ENTupleColumnType.kChar)
1071-
rntuple._clusterData[field.fieldName][1] = values; // Payload
1072-
else
1073-
throw new Error(`Unsupported column type for string field: ${colDesc.coltype}`);
1074-
} else
1075-
rntuple._clusterData[field.fieldName][0] = values;
1076-
}
1057+
if (!rntuple._clusterData[colDesc.index])
1058+
rntuple._clusterData[colDesc.index] = [];
10771059

1078-
// Ensure string fields have ending offset for proper reconstruction of the last entry
1079-
for (const fieldName of selectedFields) {
1080-
const field = builder.fieldDescriptors.find(f => f.fieldName === fieldName),
1081-
colData = rntuple._clusterData[fieldName];
1082-
if (field.typeName === 'std::string') {
1083-
if (!Array.isArray(colData) || colData.length !== 2)
1084-
throw new Error(`String field '${fieldName}' must have 2 columns`);
1085-
if (colData[0].length !== builder.clusterSummaries[clusterIndex].numEntries)
1086-
throw new Error(`Malformed string field '${fieldName}': missing final offset`);
1087-
}
1060+
rntuple._clusterData[colDesc.index].push(values);
10881061
}
10891062

10901063
const numEntries = clusterSummary.numEntries;
10911064
for (let i = 0; i < numEntries; ++i) {
10921065
for (let b = 0; b < selector.numBranches(); ++b) {
10931066
const fieldName = getSelectorFieldName(selector, b),
1094-
tgtName = selector.nameOfBranch(b),
1095-
values = rntuple._clusterData[fieldName];
1067+
tgtName = selector.nameOfBranch(b);
10961068

1097-
if (!values)
1098-
throw new Error(`Missing values for selected field: ${fieldName}`);
10991069
selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, i);
11001070
}
1101-
selector.Process();
1071+
selector.Process(i);
11021072
}
11031073

11041074
selector.Terminate(true);

0 commit comments

Comments
 (0)