/**
* Optimized version of _update_crc32 for 16 bytes blocks
*/
static void _update_crc32_opt16(const unsigned char *s, unsigned int len)
{
/* unsigned int i; */
/* for (i = 0; i < len; i++) { */
/* crc32val = crc32_tab[(crc32val ^ s[i]) & 0xFF] ^ ((crc32val >> 8) & 0x00FFFFFF); */
/* } */
/*
r0 -> s
r1 -> len
r2 -> crc32val
r3 -> crc32tab
r4 -> curval[0]
r5 -> (crc32val ^ s[i]) & 0xFF
r6 -> crc32_tab[(crc32val ^ s[i]) & 0xFF]
r7 -> curval[1]
r8 -> curval[2]
r9 -> curval[3]
*/
__asm__ volatile (
"mov r0, %1\n"
"mov r1, %2\n"
"mov r2, %3\n"
"mov r3, %4\n"
"push {r7, r8, r9}\n"
"crc32_opt16_loop:\n"
"ldm r0!, {r4, r7, r8, r9}\n"
// curval[0]
"eor r5, r2, r4\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r4, ror #8\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r4, ror #16\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r4, ror #24\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
// curval[1]
"eor r5, r2, r7\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r7, ror #8\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r7, ror #16\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r7, ror #24\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
// curval[2]
"eor r5, r2, r8\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r8, ror #8\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r8, ror #16\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r8, ror #24\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
// curval[3]
"eor r5, r2, r9\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r9, ror #8\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r9, ror #16\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
"eor r2, r6, r2, lsr #8\n"
"eor r5, r2, r9, ror #24\n"
"uxtb r5, r5\n"
"ldr r6, [r3, r5, lsl #2]\n"
// Last two lines inverted
"subs r1, r1, #16\n"
"eor r2, r6, r2, lsr #8\n"
"bne crc32_opt16_loop\n"
"pop {r7, r8, r9}\n"
"str r2, %0\n"
: "=m" (crc32val)
: "r" (s), "r" (len), "r" (crc32val), "r" (crc32_tab)
// Missing r7-r9, manually save it
: "r0", "r1", "r2", "r3", "r4", "r5", "r6"
);
}