@@ -7118,7 +7118,7 @@ void GenerateM0(Gcm* gcm)
71187118
71197119#elif defined(GCM_TABLE_4BIT)
71207120
7121- #if !defined(BIG_ENDIAN_ORDER) && !defined( WC_16BIT_CPU)
7121+ #if !defined(WC_16BIT_CPU)
71227122static WC_INLINE void Shift4_M0(byte *r8, byte *z8)
71237123{
71247124 int i;
@@ -7130,7 +7130,7 @@ static WC_INLINE void Shift4_M0(byte *r8, byte *z8)
71307130
71317131void GenerateM0(Gcm* gcm)
71327132{
7133- #if !defined(BIG_ENDIAN_ORDER) && !defined( WC_16BIT_CPU)
7133+ #if !defined(WC_16BIT_CPU)
71347134 int i;
71357135#endif
71367136 byte (*m)[WC_AES_BLOCK_SIZE] = gcm->M0;
@@ -7188,7 +7188,7 @@ void GenerateM0(Gcm* gcm)
71887188 }
71897189#endif
71907190
7191- #if !defined(BIG_ENDIAN_ORDER) && !defined( WC_16BIT_CPU)
7191+ #if !defined(WC_16BIT_CPU)
71927192 for (i = 0; i < 16; i++) {
71937193 Shift4_M0(m[16+i], m[i]);
71947194 }
@@ -7830,13 +7830,25 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
78307830 *
78317831 * Second half is same values rotated by 4-bits.
78327832 */
7833- #if defined(BIG_ENDIAN_ORDER) || defined( WC_16BIT_CPU)
7833+ #if defined(WC_16BIT_CPU)
78347834static const byte R[16][2] = {
78357835 {0x00, 0x00}, {0x1c, 0x20}, {0x38, 0x40}, {0x24, 0x60},
78367836 {0x70, 0x80}, {0x6c, 0xa0}, {0x48, 0xc0}, {0x54, 0xe0},
78377837 {0xe1, 0x00}, {0xfd, 0x20}, {0xd9, 0x40}, {0xc5, 0x60},
78387838 {0x91, 0x80}, {0x8d, 0xa0}, {0xa9, 0xc0}, {0xb5, 0xe0},
78397839};
7840+ #elif defined(BIG_ENDIAN_ORDER)
7841+ static const word16 R[32] = {
7842+ 0x0000, 0x1c20, 0x3840, 0x2460,
7843+ 0x7080, 0x6ca0, 0x48c0, 0x54e0,
7844+ 0xe100, 0xfd20, 0xd940, 0xc560,
7845+ 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
7846+
7847+ 0x0000, 0x01c2, 0x0384, 0x0246,
7848+ 0x0708, 0x06ca, 0x048c, 0x054e,
7849+ 0x0e10, 0x0fd2, 0x0d94, 0x0c56,
7850+ 0x0918, 0x08da, 0x0a9c, 0x0b5e,
7851+ };
78407852#else
78417853static const word16 R[32] = {
78427854 0x0000, 0x201c, 0x4038, 0x6024,
@@ -7861,7 +7873,7 @@ static const word16 R[32] = {
78617873 * m: 4-bit table
78627874 * [0..15] * H
78637875 */
7864- #if defined(BIG_ENDIAN_ORDER) || defined( WC_16BIT_CPU)
7876+ #if defined(WC_16BIT_CPU)
78657877static void GMULT(byte *x, byte m[16][WC_AES_BLOCK_SIZE])
78667878{
78677879 int i, j, n;
@@ -7892,6 +7904,71 @@ static void GMULT(byte *x, byte m[16][WC_AES_BLOCK_SIZE])
78927904
78937905 XMEMCPY(x, Z, WC_AES_BLOCK_SIZE);
78947906}
7907+ #elif defined(WC_32BIT_CPU) && defined(BIG_ENDIAN_ORDER)
7908+ static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
7909+ {
7910+ int i;
7911+ word32 z8[4] = {0, 0, 0, 0};
7912+ byte a;
7913+ word32* x8 = (word32*)x;
7914+ word32* m8;
7915+ byte xi;
7916+
7917+ for (i = 15; i > 0; i--) {
7918+ xi = x[i];
7919+
7920+ /* XOR in (msn * H) */
7921+ m8 = (word32*)m[xi & 0xf];
7922+ z8[0] ^= m8[0]; z8[1] ^= m8[1]; z8[2] ^= m8[2]; z8[3] ^= m8[3];
7923+
7924+ /* Cache top byte for remainder calculations - lost in rotate. */
7925+ a = (byte)(z8[3] & 0xff);
7926+
7927+ /* Rotate Z by 8-bits */
7928+ z8[3] = (z8[2] << 24) | (z8[3] >> 8);
7929+ z8[2] = (z8[1] << 24) | (z8[2] >> 8);
7930+ z8[1] = (z8[0] << 24) | (z8[1] >> 8);
7931+ z8[0] >>= 8;
7932+
7933+ /* XOR in (msn * remainder) [pre-rotated by 4 bits] */
7934+ z8[0] ^= ((word32)R[16 + (a & 0xf)]) << 16;
7935+
7936+ xi >>= 4;
7937+ /* XOR in next significant nibble (XORed with H) * remainder */
7938+ m8 = (word32*)m[xi];
7939+ a ^= (byte)(m8[3] >> 12) & 0xf;
7940+ a ^= (byte)((m8[3] << 4) & 0xf0);
7941+ z8[0] ^= ((word32)R[a >> 4]) << 16;
7942+
7943+ /* XOR in (next significant nibble * H) [pre-rotated by 4 bits] */
7944+ m8 = (word32*)m[16 + xi];
7945+ z8[0] ^= m8[0]; z8[1] ^= m8[1];
7946+ z8[2] ^= m8[2]; z8[3] ^= m8[3];
7947+ }
7948+
7949+ xi = x[0];
7950+
7951+ /* XOR in most significant nibble * H */
7952+ m8 = (word32*)m[xi & 0xf];
7953+ z8[0] ^= m8[0]; z8[1] ^= m8[1]; z8[2] ^= m8[2]; z8[3] ^= m8[3];
7954+
7955+ /* Cache top byte for remainder calculations - lost in rotate. */
7956+ a = (byte)(z8[3] & 0x0f);
7957+
7958+ z8[3] = (z8[2] << 28) | (z8[3] >> 4);
7959+ z8[2] = (z8[1] << 28) | (z8[2] >> 4);
7960+ z8[1] = (z8[0] << 28) | (z8[1] >> 4);
7961+ z8[0] >>= 4;
7962+
7963+ /* XOR in most significant nibble * remainder */
7964+ z8[0] ^= ((word32)R[a]) << 16;
7965+ /* XOR in next significant nibble * H */
7966+ m8 = (word32*)m[xi >> 4];
7967+ z8[0] ^= m8[0]; z8[1] ^= m8[1]; z8[2] ^= m8[2]; z8[3] ^= m8[3];
7968+
7969+ /* Write back result. */
7970+ x8[0] = z8[0]; x8[1] = z8[1]; x8[2] = z8[2]; x8[3] = z8[3];
7971+ }
78957972#elif defined(WC_32BIT_CPU)
78967973static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
78977974{
@@ -7966,6 +8043,70 @@ static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
79668043 /* Write back result. */
79678044 x8[0] = z8[0]; x8[1] = z8[1]; x8[2] = z8[2]; x8[3] = z8[3];
79688045}
8046+ #elif defined(WC_64BIT_CPU) && defined(BIG_ENDIAN_ORDER)
8047+ static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
8048+ {
8049+ int i;
8050+ word64 z8[2] = {0, 0};
8051+ byte a;
8052+ word64* x8 = (word64*)x;
8053+ word64* m8;
8054+ byte xi;
8055+
8056+ for (i = 15; i > 0; i--) {
8057+ xi = x[i];
8058+
8059+ /* XOR in (msn * H) */
8060+ m8 = (word64*)m[xi & 0xf];
8061+ z8[0] ^= m8[0];
8062+ z8[1] ^= m8[1];
8063+
8064+ /* Cache top byte for remainder calculations - lost in rotate. */
8065+ a = (byte)(z8[1] & 0xff);
8066+
8067+ /* Rotate Z by 8-bits */
8068+ z8[1] = (z8[0] << 56) | (z8[1] >> 8);
8069+ z8[0] >>= 8;
8070+
8071+ /* XOR in (next significant nibble * H) [pre-rotated by 4 bits] */
8072+ m8 = (word64*)m[16 + (xi >> 4)];
8073+ z8[0] ^= m8[0];
8074+ z8[1] ^= m8[1];
8075+
8076+ /* XOR in (msn * remainder) [pre-rotated by 4 bits] */
8077+ z8[0] ^= ((word64)R[16 + (a & 0xf)]) << 48;
8078+ /* XOR in next significant nibble (XORed with H) * remainder */
8079+ m8 = (word64*)m[xi >> 4];
8080+ a ^= (byte)(m8[1] >> 12) & 0xf;
8081+ a ^= (byte)((m8[1] << 4) & 0xf0);
8082+ z8[0] ^= ((word64)R[a >> 4]) << 48;
8083+ }
8084+
8085+ xi = x[0];
8086+
8087+ /* XOR in most significant nibble * H */
8088+ m8 = (word64*)m[xi & 0xf];
8089+ z8[0] ^= m8[0];
8090+ z8[1] ^= m8[1];
8091+
8092+ /* Cache top byte for remainder calculations - lost in rotate. */
8093+ a = (byte)(z8[1] & 0x0f);
8094+
8095+ /* Rotate z by 4-bits */
8096+ z8[1] = (z8[0] << 60) | (z8[1] >> 4);
8097+ z8[0] >>= 4;
8098+
8099+ /* XOR in next significant nibble * H */
8100+ m8 = (word64*)m[xi >> 4];
8101+ z8[0] ^= m8[0];
8102+ z8[1] ^= m8[1];
8103+ /* XOR in most significant nibble * remainder */
8104+ z8[0] ^= ((word64)R[a]) << 48;
8105+
8106+ /* Write back result. */
8107+ x8[0] = z8[0];
8108+ x8[1] = z8[1];
8109+ }
79698110#else
79708111static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
79718112{
0 commit comments