Skip to content

Commit 1dfa4d1

Browse files
authored
Merge pull request #9488 from SparkiDev/aes_gcm_4bit_be
AES-GCM, 4-bit table, Big Endian: fast impl of GMULT
2 parents 003f238 + 697bc47 commit 1dfa4d1

3 files changed

Lines changed: 148 additions & 7 deletions

File tree

configure.ac

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3179,7 +3179,7 @@ then
31793179
ENABLED_AESGCM=no
31803180
fi
31813181

3182-
if test "$ENABLED_AESGCM" = "yes" && test "$ac_cv_c_bigendian" != "yes"
3182+
if test "$ENABLED_AESGCM" = "yes"
31833183
then
31843184
ENABLED_AESGCM="4bit"
31853185
fi

wolfcrypt/src/aes.c

Lines changed: 146 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7118,7 +7118,7 @@ void GenerateM0(Gcm* gcm)
71187118

71197119
#elif defined(GCM_TABLE_4BIT)
71207120

7121-
#if !defined(BIG_ENDIAN_ORDER) && !defined(WC_16BIT_CPU)
7121+
#if !defined(WC_16BIT_CPU)
71227122
static WC_INLINE void Shift4_M0(byte *r8, byte *z8)
71237123
{
71247124
int i;
@@ -7130,7 +7130,7 @@ static WC_INLINE void Shift4_M0(byte *r8, byte *z8)
71307130

71317131
void GenerateM0(Gcm* gcm)
71327132
{
7133-
#if !defined(BIG_ENDIAN_ORDER) && !defined(WC_16BIT_CPU)
7133+
#if !defined(WC_16BIT_CPU)
71347134
int i;
71357135
#endif
71367136
byte (*m)[WC_AES_BLOCK_SIZE] = gcm->M0;
@@ -7188,7 +7188,7 @@ void GenerateM0(Gcm* gcm)
71887188
}
71897189
#endif
71907190

7191-
#if !defined(BIG_ENDIAN_ORDER) && !defined(WC_16BIT_CPU)
7191+
#if !defined(WC_16BIT_CPU)
71927192
for (i = 0; i < 16; i++) {
71937193
Shift4_M0(m[16+i], m[i]);
71947194
}
@@ -7830,13 +7830,25 @@ void GHASH(Gcm* gcm, const byte* a, word32 aSz, const byte* c,
78307830
*
78317831
* Second half is same values rotated by 4-bits.
78327832
*/
7833-
#if defined(BIG_ENDIAN_ORDER) || defined(WC_16BIT_CPU)
7833+
#if defined(WC_16BIT_CPU)
78347834
static const byte R[16][2] = {
78357835
{0x00, 0x00}, {0x1c, 0x20}, {0x38, 0x40}, {0x24, 0x60},
78367836
{0x70, 0x80}, {0x6c, 0xa0}, {0x48, 0xc0}, {0x54, 0xe0},
78377837
{0xe1, 0x00}, {0xfd, 0x20}, {0xd9, 0x40}, {0xc5, 0x60},
78387838
{0x91, 0x80}, {0x8d, 0xa0}, {0xa9, 0xc0}, {0xb5, 0xe0},
78397839
};
7840+
#elif defined(BIG_ENDIAN_ORDER)
7841+
static const word16 R[32] = {
7842+
0x0000, 0x1c20, 0x3840, 0x2460,
7843+
0x7080, 0x6ca0, 0x48c0, 0x54e0,
7844+
0xe100, 0xfd20, 0xd940, 0xc560,
7845+
0x9180, 0x8da0, 0xa9c0, 0xb5e0,
7846+
7847+
0x0000, 0x01c2, 0x0384, 0x0246,
7848+
0x0708, 0x06ca, 0x048c, 0x054e,
7849+
0x0e10, 0x0fd2, 0x0d94, 0x0c56,
7850+
0x0918, 0x08da, 0x0a9c, 0x0b5e,
7851+
};
78407852
#else
78417853
static const word16 R[32] = {
78427854
0x0000, 0x201c, 0x4038, 0x6024,
@@ -7861,7 +7873,7 @@ static const word16 R[32] = {
78617873
* m: 4-bit table
78627874
* [0..15] * H
78637875
*/
7864-
#if defined(BIG_ENDIAN_ORDER) || defined(WC_16BIT_CPU)
7876+
#if defined(WC_16BIT_CPU)
78657877
static void GMULT(byte *x, byte m[16][WC_AES_BLOCK_SIZE])
78667878
{
78677879
int i, j, n;
@@ -7892,6 +7904,71 @@ static void GMULT(byte *x, byte m[16][WC_AES_BLOCK_SIZE])
78927904

78937905
XMEMCPY(x, Z, WC_AES_BLOCK_SIZE);
78947906
}
7907+
#elif defined(WC_32BIT_CPU) && defined(BIG_ENDIAN_ORDER)
7908+
static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
7909+
{
7910+
int i;
7911+
word32 z8[4] = {0, 0, 0, 0};
7912+
byte a;
7913+
word32* x8 = (word32*)x;
7914+
word32* m8;
7915+
byte xi;
7916+
7917+
for (i = 15; i > 0; i--) {
7918+
xi = x[i];
7919+
7920+
/* XOR in (msn * H) */
7921+
m8 = (word32*)m[xi & 0xf];
7922+
z8[0] ^= m8[0]; z8[1] ^= m8[1]; z8[2] ^= m8[2]; z8[3] ^= m8[3];
7923+
7924+
/* Cache top byte for remainder calculations - lost in rotate. */
7925+
a = (byte)(z8[3] & 0xff);
7926+
7927+
/* Rotate Z by 8-bits */
7928+
z8[3] = (z8[2] << 24) | (z8[3] >> 8);
7929+
z8[2] = (z8[1] << 24) | (z8[2] >> 8);
7930+
z8[1] = (z8[0] << 24) | (z8[1] >> 8);
7931+
z8[0] >>= 8;
7932+
7933+
/* XOR in (msn * remainder) [pre-rotated by 4 bits] */
7934+
z8[0] ^= ((word32)R[16 + (a & 0xf)]) << 16;
7935+
7936+
xi >>= 4;
7937+
/* XOR in next significant nibble (XORed with H) * remainder */
7938+
m8 = (word32*)m[xi];
7939+
a ^= (byte)(m8[3] >> 12) & 0xf;
7940+
a ^= (byte)((m8[3] << 4) & 0xf0);
7941+
z8[0] ^= ((word32)R[a >> 4]) << 16;
7942+
7943+
/* XOR in (next significant nibble * H) [pre-rotated by 4 bits] */
7944+
m8 = (word32*)m[16 + xi];
7945+
z8[0] ^= m8[0]; z8[1] ^= m8[1];
7946+
z8[2] ^= m8[2]; z8[3] ^= m8[3];
7947+
}
7948+
7949+
xi = x[0];
7950+
7951+
/* XOR in most significant nibble * H */
7952+
m8 = (word32*)m[xi & 0xf];
7953+
z8[0] ^= m8[0]; z8[1] ^= m8[1]; z8[2] ^= m8[2]; z8[3] ^= m8[3];
7954+
7955+
/* Cache top byte for remainder calculations - lost in rotate. */
7956+
a = (byte)(z8[3] & 0x0f);
7957+
7958+
z8[3] = (z8[2] << 28) | (z8[3] >> 4);
7959+
z8[2] = (z8[1] << 28) | (z8[2] >> 4);
7960+
z8[1] = (z8[0] << 28) | (z8[1] >> 4);
7961+
z8[0] >>= 4;
7962+
7963+
/* XOR in most significant nibble * remainder */
7964+
z8[0] ^= ((word32)R[a]) << 16;
7965+
/* XOR in next significant nibble * H */
7966+
m8 = (word32*)m[xi >> 4];
7967+
z8[0] ^= m8[0]; z8[1] ^= m8[1]; z8[2] ^= m8[2]; z8[3] ^= m8[3];
7968+
7969+
/* Write back result. */
7970+
x8[0] = z8[0]; x8[1] = z8[1]; x8[2] = z8[2]; x8[3] = z8[3];
7971+
}
78957972
#elif defined(WC_32BIT_CPU)
78967973
static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
78977974
{
@@ -7966,6 +8043,70 @@ static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
79668043
/* Write back result. */
79678044
x8[0] = z8[0]; x8[1] = z8[1]; x8[2] = z8[2]; x8[3] = z8[3];
79688045
}
8046+
#elif defined(WC_64BIT_CPU) && defined(BIG_ENDIAN_ORDER)
8047+
static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
8048+
{
8049+
int i;
8050+
word64 z8[2] = {0, 0};
8051+
byte a;
8052+
word64* x8 = (word64*)x;
8053+
word64* m8;
8054+
byte xi;
8055+
8056+
for (i = 15; i > 0; i--) {
8057+
xi = x[i];
8058+
8059+
/* XOR in (msn * H) */
8060+
m8 = (word64*)m[xi & 0xf];
8061+
z8[0] ^= m8[0];
8062+
z8[1] ^= m8[1];
8063+
8064+
/* Cache top byte for remainder calculations - lost in rotate. */
8065+
a = (byte)(z8[1] & 0xff);
8066+
8067+
/* Rotate Z by 8-bits */
8068+
z8[1] = (z8[0] << 56) | (z8[1] >> 8);
8069+
z8[0] >>= 8;
8070+
8071+
/* XOR in (next significant nibble * H) [pre-rotated by 4 bits] */
8072+
m8 = (word64*)m[16 + (xi >> 4)];
8073+
z8[0] ^= m8[0];
8074+
z8[1] ^= m8[1];
8075+
8076+
/* XOR in (msn * remainder) [pre-rotated by 4 bits] */
8077+
z8[0] ^= ((word64)R[16 + (a & 0xf)]) << 48;
8078+
/* XOR in next significant nibble (XORed with H) * remainder */
8079+
m8 = (word64*)m[xi >> 4];
8080+
a ^= (byte)(m8[1] >> 12) & 0xf;
8081+
a ^= (byte)((m8[1] << 4) & 0xf0);
8082+
z8[0] ^= ((word64)R[a >> 4]) << 48;
8083+
}
8084+
8085+
xi = x[0];
8086+
8087+
/* XOR in most significant nibble * H */
8088+
m8 = (word64*)m[xi & 0xf];
8089+
z8[0] ^= m8[0];
8090+
z8[1] ^= m8[1];
8091+
8092+
/* Cache top byte for remainder calculations - lost in rotate. */
8093+
a = (byte)(z8[1] & 0x0f);
8094+
8095+
/* Rotate z by 4-bits */
8096+
z8[1] = (z8[0] << 60) | (z8[1] >> 4);
8097+
z8[0] >>= 4;
8098+
8099+
/* XOR in next significant nibble * H */
8100+
m8 = (word64*)m[xi >> 4];
8101+
z8[0] ^= m8[0];
8102+
z8[1] ^= m8[1];
8103+
/* XOR in most significant nibble * remainder */
8104+
z8[0] ^= ((word64)R[a]) << 48;
8105+
8106+
/* Write back result. */
8107+
x8[0] = z8[0];
8108+
x8[1] = z8[1];
8109+
}
79698110
#else
79708111
static WC_INLINE void GMULT(byte *x, byte m[32][WC_AES_BLOCK_SIZE])
79718112
{

wolfssl/wolfcrypt/aes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ typedef struct Gcm {
4747
/* key-based fast multiplication table. */
4848
ALIGN16 byte M0[256][16];
4949
#elif defined(GCM_TABLE_4BIT)
50-
#if defined(BIG_ENDIAN_ORDER) || defined(WC_16BIT_CPU)
50+
#if defined(WC_16BIT_CPU)
5151
ALIGN16 byte M0[16][16];
5252
#else
5353
ALIGN16 byte M0[32][16];

0 commit comments

Comments
 (0)