Skip to content

Commit 38859cc

Browse files
authored
[Issue #531] support configurable endianness (#534)
The endianness of Pixels writer is configured by column.chunk.little.endian=true/false. The endianness is then saved in the ColumnChunkIndex of each column chunk. The Pixels column readers will check the ColumnChunkIndex to use the right endianness. This is currently implemented in the Java codebase, and the C++ codebase should at least check and report errors for unsupported endianness.
1 parent 0686d09 commit 38859cc

59 files changed

Lines changed: 374 additions & 360 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

pixels-cache/src/main/java/io/pixelsdb/pixels/cache/PixelsPhysicalReader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import io.pixelsdb.pixels.core.PixelsProto;
2626

2727
import java.io.IOException;
28+
import java.nio.ByteOrder;
2829

2930
/**
3031
* @author guodong
@@ -48,7 +49,7 @@ private PixelsProto.FileTail readFileTail()
4849
{
4950
long fileLen = physicalReader.getFileLength();
5051
physicalReader.seek(fileLen - Long.BYTES);
51-
long fileTailOffset = physicalReader.readLong();
52+
long fileTailOffset = physicalReader.readLong(ByteOrder.BIG_ENDIAN);
5253
int fileTailLength = (int) (fileLen - fileTailOffset - Long.BYTES);
5354
physicalReader.seek(fileTailOffset);
5455
byte[] fileTailBuffer = new byte[fileTailLength];

pixels-common/src/main/java/io/pixelsdb/pixels/common/physical/PhysicalReader.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.io.Closeable;
2323
import java.io.IOException;
2424
import java.nio.ByteBuffer;
25+
import java.nio.ByteOrder;
2526
import java.util.concurrent.CompletableFuture;
2627

2728
/**
@@ -33,8 +34,21 @@ public interface PhysicalReader extends Closeable
3334
{
3435
long getFileLength() throws IOException;
3536

37+
/**
38+
* Set the current offset to the desired value.
39+
* @param desired the desired offset
40+
* @throws IOException
41+
*/
3642
void seek(long desired) throws IOException;
3743

44+
/**
45+
* Read a byte buffer of the given length from the current offset. The byte order
46+
* of the byte buffer is default byte order of {@link ByteBuffer}. Please check and
47+
* reset it if necessary.
48+
* @param length the number of bytes to read
49+
* @return the byte buffer been read
50+
* @throws IOException
51+
*/
3852
ByteBuffer readFully(int length) throws IOException;
3953

4054
void readFully(byte[] buffer) throws IOException;
@@ -61,9 +75,21 @@ default CompletableFuture<ByteBuffer> readAsync(long offset, int length) throws
6175
throw new UnsupportedOperationException("asynchronous read is not supported for " + getStorageScheme().name());
6276
}
6377

64-
long readLong() throws IOException;
78+
/**
79+
* Read an eight-byte signed integer from the current offset using the specified byte order.
80+
* @param byteOrder the byte order
81+
* @return the integer been read
82+
* @throws IOException
83+
*/
84+
long readLong(ByteOrder byteOrder) throws IOException;
6585

66-
int readInt() throws IOException;
86+
/**
87+
* Read a four-byte signed integer from the current offset using the specified byte order.
88+
* @param byteOrder the byte order
89+
* @return the integer been read
90+
* @throws IOException
91+
*/
92+
int readInt(ByteOrder byteOrder) throws IOException;
6793

6894
void close() throws IOException;
6995

@@ -78,7 +104,7 @@ default CompletableFuture<ByteBuffer> readAsync(long offset, int length) throws
78104
/**
79105
* For a file or object in the storage, it may have one or more
80106
* blocks. Each block has its unique id. This method returns the
81-
* block id of the current block that is been reading.
107+
* block id of the current block that is being reading.
82108
*
83109
* For local fs, each file has only one block id, which is also
84110
* the file id.

pixels-common/src/main/resources/pixels.properties

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,21 +54,29 @@ cache.read.direct=false
5454

5555
###### storage engine settings ######
5656

57-
### pixels reader and writer settings ###
58-
# properties for pixels writer
57+
### pixels reader, writer, and compactor settings ###
58+
# the pixels stride (number of values in a pixel) for pixels writer
5959
pixel.stride=10000
60+
# the row group size in bytes for pixels writer
6061
row.group.size=268435456
61-
# The alignment is for SIMD and its unit is byte
62+
# the chunk alignment for pixels writer, it is for SIMD and its unit is byte
6263
column.chunk.alignment=32
64+
# whether column chunks are encoded in pixels writer
6365
column.chunk.encoding=true
66+
# the little-endian is used on the column chunks in pixels writer
67+
column.chunk.little.endian=true
68+
# the block size for block-wise storage systems such as HDFS
6469
block.size=2147483648
70+
# the number of replications of each block for block-wise storage systems such as HDFS
6571
block.replication=1
72+
# for block-wise storage systems, whether padding the leftover space in current block
6673
block.padding=true
67-
compression.block.size=1
74+
# the number of bytes to be compressed as a block using heavy compression algorithms
75+
compression.block.size=1048576
76+
# for pixels compactor how many row groups are compacted into one file
77+
compact.factor=32
6878
# row batch size for pixels record reader, default value is 10000
6979
row.batch.size=10000
70-
# how many row groups are compacted into one file
71-
compact.factor=32
7280

7381
### file storage and I/O ###
7482
# the scheme of the storage systems that are enabled, e.g., hdfs,file,s3,gcs,minio,redis

pixels-core/src/main/java/io/pixelsdb/pixels/core/PixelsReaderImpl.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import javax.annotation.concurrent.NotThreadSafe;
3939
import java.io.IOException;
4040
import java.nio.ByteBuffer;
41+
import java.nio.ByteOrder;
4142
import java.util.LinkedList;
4243
import java.util.List;
4344
import java.util.Random;
@@ -171,7 +172,7 @@ public PixelsReader build()
171172
// get FileTail
172173
long fileLen = fsReader.getFileLength();
173174
fsReader.seek(fileLen - Long.BYTES);
174-
long fileTailOffset = fsReader.readLong();
175+
long fileTailOffset = fsReader.readLong(ByteOrder.BIG_ENDIAN);
175176
int fileTailLength = (int) (fileLen - fileTailOffset - Long.BYTES);
176177
fsReader.seek(fileTailOffset);
177178
ByteBuffer fileTailBuffer = fsReader.readFully(fileTailLength);

pixels-core/src/main/java/io/pixelsdb/pixels/core/PixelsWriter.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@
2929
*
3030
* @author guodong
3131
*/
32-
public interface PixelsWriter
33-
extends Closeable
32+
public interface PixelsWriter extends Closeable
3433
{
3534
/**
3635
* Add row batch into the file that is not hash partitioned.

pixels-core/src/main/java/io/pixelsdb/pixels/core/PixelsWriterImpl.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import javax.annotation.concurrent.NotThreadSafe;
3939
import java.io.IOException;
4040
import java.nio.ByteBuffer;
41+
import java.nio.ByteOrder;
4142
import java.util.LinkedList;
4243
import java.util.List;
4344
import java.util.Optional;
@@ -68,6 +69,22 @@ public class PixelsWriterImpl implements PixelsWriter
6869
{
6970
private static final Logger LOGGER = LogManager.getLogger(PixelsWriterImpl.class);
7071

72+
static final ByteOrder WRITER_ENDIAN;
73+
74+
static
75+
{
76+
boolean littleEndian = Boolean.parseBoolean(
77+
ConfigFactory.Instance().getProperty("column.chunk.little.endian"));
78+
if (littleEndian)
79+
{
80+
WRITER_ENDIAN = ByteOrder.LITTLE_ENDIAN;
81+
}
82+
else
83+
{
84+
WRITER_ENDIAN = ByteOrder.BIG_ENDIAN;
85+
}
86+
}
87+
7188
private final TypeDescription schema;
7289
private final int pixelStride;
7390
private final int rowGroupSize;
@@ -143,7 +160,7 @@ private PixelsWriterImpl(
143160
fileColStatRecorders = new StatsRecorder[children.size()];
144161
for (int i = 0; i < children.size(); ++i)
145162
{
146-
columnWriters[i] = newColumnWriter(children.get(i), pixelStride, encoding);
163+
columnWriters[i] = newColumnWriter(children.get(i), pixelStride, encoding, WRITER_ENDIAN);
147164
fileColStatRecorders[i] = StatsRecorder.create(children.get(i));
148165
}
149166

@@ -610,7 +627,7 @@ private void writeRowGroup() throws IOException
610627
* We temporarily fix this problem by creating a new column writer for each row group.
611628
*/
612629
// writer.reset();
613-
columnWriters[i] = newColumnWriter(children.get(i), pixelStride, encoding);
630+
columnWriters[i] = newColumnWriter(children.get(i), pixelStride, encoding, WRITER_ENDIAN);
614631
}
615632

616633
// put curRowGroupIndex into rowGroupFooter

pixels-core/src/main/java/io/pixelsdb/pixels/core/compactor/PixelsCompactor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
import java.io.IOException;
3636
import java.nio.ByteBuffer;
37+
import java.nio.ByteOrder;
3738
import java.util.LinkedList;
3839
import java.util.List;
3940
import java.util.TimeZone;
@@ -250,7 +251,7 @@ public PixelsCompactor build()
250251
// get FileTail
251252
long fileLen = fsReader.getFileLength();
252253
fsReader.seek(fileLen - Long.BYTES);
253-
long fileTailOffset = fsReader.readLong();
254+
long fileTailOffset = fsReader.readLong(ByteOrder.BIG_ENDIAN);
254255
int fileTailLength = (int) (fileLen - fileTailOffset - Long.BYTES);
255256
fsReader.seek(fileTailOffset);
256257
byte[] fileTailBuffer = new byte[fileTailLength];

pixels-core/src/main/java/io/pixelsdb/pixels/core/encoding/Dictionary.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ public interface Dictionary
6767
* This method prepares a {@link VisitorContext} instance for each item (key) in this dictionary,
6868
* and calls {@link Visitor#visit(VisitorContext)} to visit the item.
6969
* <p>
70-
* The items <b>MUST</b> be visited in the ascending order of the item's key position, i.e., the encoded id of the item in this dictionary.
70+
* The items <b>MUST</b> be visited in the ascending order of the item's key position, i.e.,
71+
* the encoded id of the item in this dictionary.
7172
* The visitor can write (serialize) the dictionary item to an output stream.
7273
* </p>
7374
* @param visitor the visitor that is going to serialize the dictionary to an output stream.

pixels-core/src/main/java/io/pixelsdb/pixels/core/encoding/Encoder.java

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -49,38 +49,32 @@ public byte[] encode(char[] values, int offset, int length)
4949
throw new PixelsEncodingException("Encoding char values is not supported");
5050
}
5151

52-
public byte[] encode(byte[] values)
53-
throws IOException
52+
public byte[] encode(byte[] values) throws IOException
5453
{
5554
throw new PixelsEncodingException("Encoding byte values is not supported");
5655
}
5756

58-
public byte[] encode(byte[] values, int offset, int length)
59-
throws IOException
57+
public byte[] encode(byte[] values, int offset, int length) throws IOException
6058
{
6159
throw new PixelsEncodingException("Encoding byte values is not supported");
6260
}
6361

64-
public byte[] encode(long[] values)
65-
throws IOException
62+
public byte[] encode(long[] values) throws IOException
6663
{
6764
throw new PixelsEncodingException("Encoding long values is not supported");
6865
}
6966

70-
public byte[] encode(long[] values, int offset, int length)
71-
throws IOException
67+
public byte[] encode(long[] values, int offset, int length) throws IOException
7268
{
7369
throw new PixelsEncodingException("Encoding long values is not supported");
7470
}
7571

76-
public byte[] encode(int[] values)
77-
throws IOException
72+
public byte[] encode(int[] values) throws IOException
7873
{
7974
throw new PixelsEncodingException("Encoding int values is not supported");
8075
}
8176

82-
public byte[] encode(int[] values, int offset, int length)
83-
throws IOException
77+
public byte[] encode(int[] values, int offset, int length) throws IOException
8478
{
8579
throw new PixelsEncodingException("Encoding int values is not supported");
8680
}
@@ -100,6 +94,5 @@ public byte[] encode(double[] values)
10094
throw new PixelsEncodingException("Encoding double values is not supported");
10195
}
10296

103-
abstract public void close()
104-
throws IOException;
97+
abstract public void close() throws IOException;
10598
}

pixels-core/src/main/java/io/pixelsdb/pixels/core/encoding/IntDecoder.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,5 @@
2626
*/
2727
public abstract class IntDecoder extends Decoder
2828
{
29-
public abstract long next()
30-
throws IOException;
29+
public abstract long next() throws IOException;
3130
}

0 commit comments

Comments
 (0)