Skip to content

Commit 440fb70

Browse files
authored
Merge pull request #10023 from SparkiDev/asm_gen_fixes_2
ASM generation fixes
2 parents 3e820e5 + ec958de commit 440fb70

22 files changed

Lines changed: 493 additions & 467 deletions

wolfcrypt/src/aes_xts_asm.S

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,7 +1444,6 @@ AES_XTS_init_avx1:
14441444
.p2align 4
14451445
_AES_XTS_init_avx1:
14461446
#endif /* __APPLE__ */
1447-
movl %edx, %eax
14481447
vmovdqu (%rdi), %xmm0
14491448
# aes_enc_block
14501449
vpxor (%rsi), %xmm0, %xmm0
@@ -1466,13 +1465,13 @@ _AES_XTS_init_avx1:
14661465
vaesenc %xmm2, %xmm0, %xmm0
14671466
vmovdqu 144(%rsi), %xmm2
14681467
vaesenc %xmm2, %xmm0, %xmm0
1469-
cmpl $11, %eax
1468+
cmpl $11, %edx
14701469
vmovdqu 160(%rsi), %xmm2
14711470
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
14721471
vaesenc %xmm2, %xmm0, %xmm0
14731472
vmovdqu 176(%rsi), %xmm3
14741473
vaesenc %xmm3, %xmm0, %xmm0
1475-
cmpl $13, %eax
1474+
cmpl $13, %edx
14761475
vmovdqu 192(%rsi), %xmm2
14771476
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
14781477
vaesenc %xmm2, %xmm0, %xmm0

wolfcrypt/src/aes_xts_asm.asm

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,7 +1456,6 @@ _text ENDS
14561456
IFDEF HAVE_INTEL_AVX1
14571457
_text SEGMENT READONLY PARA
14581458
AES_XTS_init_avx1 PROC
1459-
mov eax, r8d
14601459
vmovdqu xmm0, OWORD PTR [rcx]
14611460
; aes_enc_block
14621461
vpxor xmm0, xmm0, [rdx]
@@ -1478,13 +1477,13 @@ AES_XTS_init_avx1 PROC
14781477
vaesenc xmm0, xmm0, xmm2
14791478
vmovdqu xmm2, OWORD PTR [rdx+144]
14801479
vaesenc xmm0, xmm0, xmm2
1481-
cmp eax, 11
1480+
cmp r8d, 11
14821481
vmovdqu xmm2, OWORD PTR [rdx+160]
14831482
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
14841483
vaesenc xmm0, xmm0, xmm2
14851484
vmovdqu xmm3, OWORD PTR [rdx+176]
14861485
vaesenc xmm0, xmm0, xmm3
1487-
cmp eax, 13
1486+
cmp r8d, 13
14881487
vmovdqu xmm2, OWORD PTR [rdx+192]
14891488
jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last
14901489
vaesenc xmm0, xmm0, xmm2

wolfcrypt/src/fe_x25519_asm.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12372,7 +12372,7 @@ _fe_cmov_table_avx2:
1237212372
pushq %r15
1237312373
pushq %rbx
1237412374
movq %rdx, %rcx
12375-
xor %rbx, %rbx
12375+
xorq %rbx, %rbx
1237612376
movsbq %cl, %rax
1237712377
cdq
1237812378
xorb %dl, %al

wolfcrypt/src/port/arm/armv8-32-aes-asm.S

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12555,7 +12555,7 @@ L_AES_CBC_encrypt_block_nr_256:
1255512555
eor r5, r5, r9
1255612556
eor r6, r6, r10
1255712557
eor r7, r7, r11
12558-
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
12558+
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
1255912559
pop {r1, r2, lr}
1256012560
ldr r3, [sp]
1256112561
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -13269,7 +13269,7 @@ L_AES_CBC_encrypt_block_nr_192:
1326913269
eor r5, r5, r9
1327013270
eor r6, r6, r10
1327113271
eor r7, r7, r11
13272-
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
13272+
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
1327313273
pop {r1, r2, lr}
1327413274
ldr r3, [sp]
1327513275
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -13983,7 +13983,7 @@ L_AES_CBC_encrypt_block_nr_128:
1398313983
eor r5, r5, r9
1398413984
eor r6, r6, r10
1398513985
eor r7, r7, r11
13986-
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
13986+
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
1398713987
pop {r1, r2, lr}
1398813988
ldr r3, [sp]
1398913989
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)

wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12930,7 +12930,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
1293012930
"eor r5, r5, r9\n\t"
1293112931
"eor r6, r6, r10\n\t"
1293212932
"eor r7, r7, r11\n\t"
12933-
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
12933+
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
1293412934
"pop {r1, %[len], lr}\n\t"
1293512935
"ldr %[ks], [sp]\n\t"
1293612936
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -13647,7 +13647,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
1364713647
"eor r5, r5, r9\n\t"
1364813648
"eor r6, r6, r10\n\t"
1364913649
"eor r7, r7, r11\n\t"
13650-
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
13650+
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
1365113651
"pop {r1, %[len], lr}\n\t"
1365213652
"ldr %[ks], [sp]\n\t"
1365313653
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)
@@ -14364,7 +14364,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in,
1436414364
"eor r5, r5, r9\n\t"
1436514365
"eor r6, r6, r10\n\t"
1436614366
"eor r7, r7, r11\n\t"
14367-
#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */
14367+
#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */
1436814368
"pop {r1, %[len], lr}\n\t"
1436914369
"ldr %[ks], [sp]\n\t"
1437014370
#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6)

wolfcrypt/src/port/arm/armv8-32-chacha-asm.S

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,14 @@ wc_chacha_setkey:
8989
#endif /* BIG_ENDIAN_ORDER */
9090
stm r0!, {r4, r5, r12, lr}
9191
# Next 16 bytes of key.
92-
beq L_chacha_arm32_setkey_same_keyb_ytes
92+
beq L_chacha_arm32_setkey_same_key_bytes
9393
# Update key pointer for next 16 bytes.
9494
add r1, r1, r2
9595
ldr r4, [r1]
9696
ldr r5, [r1, #4]
9797
ldr r12, [r1, #8]
9898
ldr lr, [r1, #12]
99-
L_chacha_arm32_setkey_same_keyb_ytes:
99+
L_chacha_arm32_setkey_same_key_bytes:
100100
stm r0, {r4, r5, r12, lr}
101101
pop {r4, r5, pc}
102102
.size wc_chacha_setkey,.-wc_chacha_setkey
@@ -572,11 +572,11 @@ L_chacha_crypt_bytes_arm32_round_start_256:
572572
ror r11, r11, #16
573573
veor q7, q7, q4
574574
add r8, r8, r10
575-
vrev32.i16 q15, q15
575+
vrev32.16 q15, q15
576576
add r9, r9, r11
577-
vrev32.i16 q3, q3
577+
vrev32.16 q3, q3
578578
eor r4, r4, r8
579-
vrev32.i16 q7, q7
579+
vrev32.16 q7, q7
580580
eor r5, r5, r9
581581
# c += d; b ^= c; b <<<= 12;
582582
vadd.i32 q14, q14, q15
@@ -685,11 +685,11 @@ L_chacha_crypt_bytes_arm32_round_start_256:
685685
ror r10, r10, #16
686686
veor q7, q7, q4
687687
add r8, r8, r11
688-
vrev32.i16 q15, q15
688+
vrev32.16 q15, q15
689689
add r9, r9, r10
690-
vrev32.i16 q3, q3
690+
vrev32.16 q3, q3
691691
eor r5, r5, r8
692-
vrev32.i16 q7, q7
692+
vrev32.16 q7, q7
693693
eor r6, r6, r9
694694
# c += d; b ^= c; b <<<= 12;
695695
vadd.i32 q14, q14, q15
@@ -891,8 +891,8 @@ L_chacha_crypt_bytes_arm32_round_start_128:
891891
vadd.i32 q4, q4, q5
892892
veor q3, q3, q0
893893
veor q7, q7, q4
894-
vrev32.i16 q3, q3
895-
vrev32.i16 q7, q7
894+
vrev32.16 q3, q3
895+
vrev32.16 q7, q7
896896
# c += d; b ^= c; b <<<= 12;
897897
vadd.i32 q2, q2, q3
898898
vadd.i32 q6, q6, q7
@@ -932,8 +932,8 @@ L_chacha_crypt_bytes_arm32_round_start_128:
932932
vadd.i32 q4, q4, q5
933933
veor q3, q3, q0
934934
veor q7, q7, q4
935-
vrev32.i16 q3, q3
936-
vrev32.i16 q7, q7
935+
vrev32.16 q3, q3
936+
vrev32.16 q7, q7
937937
# c += d; b ^= c; b <<<= 12;
938938
vadd.i32 q2, q2, q3
939939
vadd.i32 q6, q6, q7
@@ -1178,13 +1178,13 @@ wc_chacha_setkey:
11781178
vldm r3, {q0}
11791179
vld1.8 {q1}, [r1]!
11801180
#ifdef BIG_ENDIAN_ORDER
1181-
vrev32.i16 q1, q1
1181+
vrev32.16 q1, q1
11821182
#endif /* BIG_ENDIAN_ORDER */
11831183
vstm r0!, {q0, q1}
11841184
beq L_chacha_setkey_arm32_done
11851185
vld1.8 {q1}, [r1]
11861186
#ifdef BIG_ENDIAN_ORDER
1187-
vrev32.i16 q1, q1
1187+
vrev32.16 q1, q1
11881188
#endif /* BIG_ENDIAN_ORDER */
11891189
L_chacha_setkey_arm32_done:
11901190
vstm r0, {q1}

wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,15 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
134134
#endif /* BIG_ENDIAN_ORDER */
135135
"stm %[x]!, {r4, r5, r12, lr}\n\t"
136136
/* Next 16 bytes of key. */
137-
"beq L_chacha_arm32_setkey_same_keyb_ytes_%=\n\t"
137+
"beq L_chacha_arm32_setkey_same_key_bytes_%=\n\t"
138138
/* Update key pointer for next 16 bytes. */
139139
"add %[key], %[key], %[keySz]\n\t"
140140
"ldr r4, [%[key]]\n\t"
141141
"ldr r5, [%[key], #4]\n\t"
142142
"ldr r12, [%[key], #8]\n\t"
143143
"ldr lr, [%[key], #12]\n\t"
144144
"\n"
145-
"L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t"
145+
"L_chacha_arm32_setkey_same_key_bytes_%=: \n\t"
146146
"stm %[x], {r4, r5, r12, lr}\n\t"
147147
#ifndef WOLFSSL_NO_VAR_ASSIGN_REG
148148
: [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz),
@@ -693,11 +693,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
693693
"ror r11, r11, #16\n\t"
694694
"veor q7, q7, q4\n\t"
695695
"add r8, r8, r10\n\t"
696-
"vrev32.i16 q15, q15\n\t"
696+
"vrev32.16 q15, q15\n\t"
697697
"add r9, r9, r11\n\t"
698-
"vrev32.i16 q3, q3\n\t"
698+
"vrev32.16 q3, q3\n\t"
699699
"eor r4, r4, r8\n\t"
700-
"vrev32.i16 q7, q7\n\t"
700+
"vrev32.16 q7, q7\n\t"
701701
"eor r5, r5, r9\n\t"
702702
/* c += d; b ^= c; b <<<= 12; */
703703
"vadd.i32 q14, q14, q15\n\t"
@@ -806,11 +806,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
806806
"ror r10, r10, #16\n\t"
807807
"veor q7, q7, q4\n\t"
808808
"add r8, r8, r11\n\t"
809-
"vrev32.i16 q15, q15\n\t"
809+
"vrev32.16 q15, q15\n\t"
810810
"add r9, r9, r10\n\t"
811-
"vrev32.i16 q3, q3\n\t"
811+
"vrev32.16 q3, q3\n\t"
812812
"eor r5, r5, r8\n\t"
813-
"vrev32.i16 q7, q7\n\t"
813+
"vrev32.16 q7, q7\n\t"
814814
"eor r6, r6, r9\n\t"
815815
/* c += d; b ^= c; b <<<= 12; */
816816
"vadd.i32 q14, q14, q15\n\t"
@@ -1014,8 +1014,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
10141014
"vadd.i32 q4, q4, q5\n\t"
10151015
"veor q3, q3, q0\n\t"
10161016
"veor q7, q7, q4\n\t"
1017-
"vrev32.i16 q3, q3\n\t"
1018-
"vrev32.i16 q7, q7\n\t"
1017+
"vrev32.16 q3, q3\n\t"
1018+
"vrev32.16 q7, q7\n\t"
10191019
/* c += d; b ^= c; b <<<= 12; */
10201020
"vadd.i32 q2, q2, q3\n\t"
10211021
"vadd.i32 q6, q6, q7\n\t"
@@ -1055,8 +1055,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c,
10551055
"vadd.i32 q4, q4, q5\n\t"
10561056
"veor q3, q3, q0\n\t"
10571057
"veor q7, q7, q4\n\t"
1058-
"vrev32.i16 q3, q3\n\t"
1059-
"vrev32.i16 q7, q7\n\t"
1058+
"vrev32.16 q3, q3\n\t"
1059+
"vrev32.16 q7, q7\n\t"
10601060
/* c += d; b ^= c; b <<<= 12; */
10611061
"vadd.i32 q2, q2, q3\n\t"
10621062
"vadd.i32 q6, q6, q7\n\t"
@@ -1329,13 +1329,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key,
13291329
"vldm r3, {q0}\n\t"
13301330
"vld1.8 {q1}, [%[key]]!\n\t"
13311331
#ifdef BIG_ENDIAN_ORDER
1332-
"vrev32.i16 q1, q1\n\t"
1332+
"vrev32.16 q1, q1\n\t"
13331333
#endif /* BIG_ENDIAN_ORDER */
13341334
"vstm %[x]!, {q0-q1}\n\t"
13351335
"beq L_chacha_setkey_arm32_done_%=\n\t"
13361336
"vld1.8 {q1}, [%[key]]\n\t"
13371337
#ifdef BIG_ENDIAN_ORDER
1338-
"vrev32.i16 q1, q1\n\t"
1338+
"vrev32.16 q1, q1\n\t"
13391339
#endif /* BIG_ENDIAN_ORDER */
13401340
"\n"
13411341
"L_chacha_setkey_arm32_done_%=: \n\t"

0 commit comments

Comments
 (0)