;; Phelix for powerpc. ;; This program is released to the public domain by The TOLIS Group, Inc. ;; Written by Andy Vaught, andy.vaught@gmail.com ;; The phelix context structure is always relative to r3, since a pointer ;; to the context is always the first argument to all of the subroutines. ;; This program makes a lot of use of the "red zone", 224 bytes below the ;; stack pointer (r1). Unlike other processors, this memory is guaranteed ;; not to be modified by interrupts or other asynchronous events. ;; Key states: .set s_keysize, 0 .set s_macsize, 4 .set s_X_1_bump, 8 .set s_X0_0, 12 .set s_X0_1, 16 .set s_X0_2, 20 .set s_X0_3, 24 .set s_X0_4, 28 .set s_X0_5, 32 .set s_X0_6, 36 .set s_X0_7, 40 .set s_X1_0, 44 .set s_X1_1, 48 .set s_X1_2, 52 .set s_X1_3, 56 .set s_X1_4, 60 .set s_X1_5, 64 .set s_X1_6, 68 .set s_X1_7, 72 ; Cipher states .set s_OldZ0, 76 .set s_OldZ1, 80 .set s_OldZ2, 84 .set s_OldZ3, 88 .set s_Z0, 92 .set s_Z1, 96 .set s_Z2, 100 .set s_Z3, 104 .set s_Z4, 108 .set s_i, 112 .set s_aadlen_l, 116 .set s_aadlen_h, 120 .set s_msglen, 124 .set s_aadxor, 128 .text .align 2 encrypt_jump_table: b encrypt_0 b encrypt_1 b encrypt_2 b encrypt_3 b encrypt_4 b encrypt_5 b encrypt_6 b encrypt_7 ; key_init()-- Code for initializing keys. Falls through to encrypt_0. key_init: la r4, -128(r1) la r5, -128(r1) li r6, 0 li r7, 0 stw r6, 0(r4) stw r6, 4(r4) stw r6, 8(r4) stw r6, 12(r4) lwz r14, s_keysize(r3) srawi r14, r14, 3 ; key_size / 8 addi r14, r14, 64 li r31, 1 mtctr r31 li r15, 0 li r23, 0 li r31, 0 ; encrypt()-- Unrolled phelix encryption/decryption. On entry, ; CTR is the number of words to encrypt and LR is the address to return to. ; The entire cipher state is loaded into registers as follows: ; r0 Used for the current input plaintext/ciphertext word ; r2 Used for intermediate computations ; r3 Points to cipher context structure (preserved for caller) ; r4 Points to next plaintext word for encryption, ciphertext for decryption ; r5 Points to next ciphertext word for encryption, plaintext for decryption ; ; r6 OldZ4_0 r10 Z0 r15 X0_0 r23 X1_0 ; r7 OldZ4_1 r11 Z1 r16 X0_1 r24 X1_1 ; r8 OldZ4_2 r12 Z2 r17 X0_2 r25 X1_2 ; r9 OldZ4_3 r13 Z3 r18 X0_3 r26 X1_3 ; r14 Z4 r19 X0_4 r27 X1_4 ; r20 X0_5 r28 X1_5 ; r21 X0_6 r29 X1_6 ; r22 X0_7 r30 X1_7 ; r31 Used for the 'i' counter ; ; r3, r4 and r5 correspond to standard ppc calling conventions. ; On exit, r4 through r14 are updated, r15 through r30 are unchanged. ; LR contains the address to resume encryption at. ; ; The only 'unused' registers are r1, which is the stack pointer, which ; we can't use anyhow and r3, which points to the context structure and ; is preserved for the benefit of callers. ; ; The r6-r9 registers are also co-opted to become the output word after they ; are used. At the end of the round, they go back to their old purpose. ; ; This code is sort of like a traffic roundabout-- the overall ; structure is a loop, but has entries and exits at regular ; points along the circle. Phelix itself was designed with superscalar ; execution in mind. If you look closely, you'll see that we have pairs ; and triples of instructions that don't depend on the result of the prior ; instructions, allowing a *lot* of out-of-order execution. encrypt_0: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r15 ; Round key X0_0, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r6, r6, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r6, r6, r0 add r2, r23, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r6, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r6, r14 bdzlr- encrypt_1: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r16 ; Round key X0_1, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r7, r7, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r7, r7, r0 add r2, r24, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r7, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r7, r14 bdzlr- encrypt_2: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r17 ; Round key X0_2, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r8, r8, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r8, r8, r0 add r2, r25, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r8, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r8, r14 bdzlr- encrypt_3: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r18 ; Round key X0_3, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r9, r9, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r9, r9, r0 add r2, r26, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r9, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r9, r14 bdzlr- encrypt_4: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r19 ; Round key X0_4, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r6, r6, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r6, r6, r0 add r2, r27, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r6, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r6, r14 bdzlr- encrypt_5: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r20 ; Round key X0_5, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r7, r7, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r7, r7, r0 add r2, r28, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r7, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r7, r14 bdzlr- encrypt_6: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r21 ; Round key X0_6, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r8, r8, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r8, r8, r0 add r2, r29, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r8, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r8, r14 bdzlr- encrypt_7: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r22 ; Round key X0_7, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 xor r2, r0, r13 rotlwi r13, r13, 15 add r9, r9, r14 add r10, r10, r2 add r11, r11, r14 rotlwi r14, r14, 25 xor r9, r9, r0 add r2, r30, r31 xor r12, r12, r10 rotlwi r10, r10, 9 stwbrx r9, 0, r5 addi r5, r5, 4 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r9, r14 bdnz+ encrypt_0 blr decrypt_jump_table: b decrypt_0 b decrypt_1 b decrypt_2 b decrypt_3 b decrypt_4 b decrypt_5 b decrypt_6 b decrypt_7 ; The decryption roundabout is much the same as encryption, except that it ; takes ciphertext and calculates plaintext. This means that things ; are reordered a little bit in the middle of the loop. decrypt_0: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r15 ; Round key X0_0, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r6, r6, r14 ; Decrypt add r11, r11, r14 xor r6, r6, r0 rotlwi r14, r14, 25 xor r2, r13, r6 stwbrx r6, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r23, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r6, r14 bdzlr- decrypt_1: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r16 ; Round key X0_1, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r7, r7, r14 ; Decrypt add r11, r11, r14 xor r7, r7, r0 rotlwi r14, r14, 25 xor r2, r13, r7 stwbrx r7, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r24, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r7, r14 bdzlr- decrypt_2: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r17 ; Round key X0_2, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r8, r8, r14 ; Decrypt add r11, r11, r14 xor r8, r8, r0 rotlwi r14, r14, 25 xor r2, r13, r8 stwbrx r8, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r25, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r8, r14 bdzlr- decrypt_3: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r18 ; Round key X0_3, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r9, r9, r14 ; Decrypt add r11, r11, r14 xor r9, r9, r0 rotlwi r14, r14, 25 xor r2, r13, r9 stwbrx r9, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r26, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r9, r14 bdzlr- decrypt_4: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r19 ; Round key X0_4, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r6, r6, r14 ; Decrypt add r11, r11, r14 xor r6, r6, r0 rotlwi r14, r14, 25 xor r2, r13, r6 stwbrx r6, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r27, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r6, r14 bdzlr- decrypt_5: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r20 ; Round key X0_5, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r7, r7, r14 ; Decrypt add r11, r11, r14 xor r7, r7, r0 rotlwi r14, r14, 25 xor r2, r13, r7 stwbrx r7, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r28, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r7, r14 bdzlr- decrypt_6: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r21 ; Round key X0_6, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r8, r8, r14 ; Decrypt add r11, r11, r14 xor r8, r8, r0 rotlwi r14, r14, 25 xor r2, r13, r8 stwbrx r8, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r29, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r8, r14 bdzlr- decrypt_7: lwbrx r0, 0, r4 addi r4, r4, 4 add r10, r10, r13 rotlwi r13, r13, 15 add r11, r11, r14 rotlwi r14, r14, 25 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r14, r14, r12 add r2, r13, r22 ; Round key X0_7, calculate it early rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 add r9, r9, r14 ; Decrypt add r11, r11, r14 xor r9, r9, r0 rotlwi r14, r14, 25 xor r2, r13, r9 stwbrx r9, 0, r5 addi r5, r5, 4 add r10, r10, r2 rotlwi r13, r13, 15 add r2, r30, r31 xor r12, r12, r10 rotlwi r10, r10, 9 xor r13, r13, r11 rotlwi r11, r11, 10 add r2, r2, r13 addi r31, r31, 1 add r14, r14, r12 rotlwi r12, r12, 17 xor r10, r10, r2 rotlwi r13, r13, 30 xor r11, r11, r14 rotlwi r14, r14, 13 add r12, r12, r10 rotlwi r10, r10, 20 add r13, r13, r11 rotlwi r11, r11, 11 xor r14, r14, r12 rotlwi r12, r12, 5 mr r9, r14 bdnz+ decrypt_0 blr ; save_regs()-- Save registers that must be preserved in the red zone, ; load cipher state. save_regs: stw r13, -12(r1) stw r14, -16(r1) stw r15, -20(r1) stw r16, -24(r1) stw r17, -28(r1) stw r18, -32(r1) stw r19, -36(r1) stw r20, -40(r1) stw r21, -44(r1) stw r22, -48(r1) stw r23, -52(r1) stw r24, -56(r1) stw r25, -60(r1) stw r26, -64(r1) stw r26, -68(r1) stw r27, -72(r1) stw r28, -76(r1) stw r29, -80(r1) stw r30, -84(r1) stw r31, -88(r1) blr ; restore_regs()-- Restore registers from the red zone, and return ; to the caller of our caller. restore_regs: lwz r13, -12(r1) lwz r14, -16(r1) lwz r15, -20(r1) lwz r16, -24(r1) lwz r17, -28(r1) lwz r18, -32(r1) lwz r19, -36(r1) lwz r20, -40(r1) lwz r21, -44(r1) lwz r22, -48(r1) lwz r23, -52(r1) lwz r24, -56(r1) lwz r25, -60(r1) lwz r26, -64(r1) lwz r26, -68(r1) lwz r27, -72(r1) lwz r28, -76(r1) lwz r29, -80(r1) lwz r30, -84(r1) lwz r31, -88(r1) lwz r2, 8(r1) mtlr r2 blr ; load_cipher_state()-- Load cipher state from a context structure. load_cipher_state: lwz r6, s_OldZ0(r3) lwz r7, s_OldZ1(r3) lwz r8, s_OldZ2(r3) lwz r9, s_OldZ3(r3) lwz r10, s_Z0(r3) lwz r11, s_Z1(r3) lwz r12, s_Z2(r3) lwz r13, s_Z3(r3) lwz r14, s_Z4(r3) lwz r15, s_X0_0(r3) lwz r16, s_X0_1(r3) lwz r17, s_X0_2(r3) lwz r18, s_X0_3(r3) lwz r19, s_X0_4(r3) lwz r20, s_X0_5(r3) lwz r21, s_X0_6(r3) lwz r22, s_X0_7(r3) lwz r23, s_X1_0(r3) lwz r24, s_X1_1(r3) lwz r25, s_X1_2(r3) lwz r26, s_X1_3(r3) lwz r27, s_X1_4(r3) lwz r28, s_X1_5(r3) lwz r29, s_X1_6(r3) lwz r30, s_X1_7(r3) lwz r31, s_i(r3) blr ;; save_cipher_state()-- Save cipher state to a context structure. save_cipher_state: stw r6, s_OldZ0(r3) stw r7, s_OldZ1(r3) stw r8, s_OldZ2(r3) stw r9, s_OldZ3(r3) stw r10, s_Z0(r3) stw r11, s_Z1(r3) stw r12, s_Z2(r3) stw r13, s_Z3(r3) stw r14, s_Z4(r3) stw r15, s_X0_0(r3) stw r16, s_X0_1(r3) stw r17, s_X0_2(r3) stw r18, s_X0_3(r3) stw r19, s_X0_4(r3) stw r20, s_X0_5(r3) stw r21, s_X0_6(r3) stw r22, s_X0_7(r3) stw r23, s_X1_0(r3) stw r24, s_X1_1(r3) stw r25, s_X1_2(r3) stw r26, s_X1_3(r3) stw r27, s_X1_4(r3) stw r28, s_X1_5(r3) stw r29, s_X1_6(r3) stw r30, s_X1_7(r3) stw r31, s_i(r3) blr ; PhelixSetupKey(ctx, keyptr, keysize, ivsize, macsize)-- Initialize a ; context structure given the key. .globl _PhelixSetupKey _PhelixSetupKey: mflr r2 stw r2, 8(r1) bl save_regs stw r5, s_keysize(r3) stw r7, s_macsize(r3) mr r8, r5 srawi r8, r8, 1 ; key_size / 2 mr r9, r7 andi. r9, r9, 0x7F slwi r9, r9, 8 add r8, r8, r9 stw r8, s_X_1_bump(r3) mr r8, r5 addi r8, r8, 31 srawi r8, r8, 5 ; r8 is number of words in the key to copy mtctr r8 addi r9, r1, -128 ; r9 -> X[] 0: lwbrx r2, 0, r4 addi r4, r4, 4 stw r2, 0(r9) addi r9, r9, 4 bdnz 0b ;; Zero out the top bits if the key is not a multiple of 32. mr r8, r5 andi. r8, r8, 0x1f beq 1f li r10, 1 rotlw r10, r10, r8 subi r10, r10, 1 and r2, r2, r10 stw r2, -4(r9) 1: ;; Zero out the rest of X out to eight words. sub r8, r1, r9 subi r8, r8, 96 srawi r8, r8, 2 ; # of words to zero addi r8, r8, 1 mtctr r8 li r10, 0 bdz 3f 2: stw r10, 0(r9) addi r9, r9, 4 bdnz 2b ;; Move the X[] array into some registers, the upper X0/1. 3: lwz r19, -128(r1) lwz r20, -124(r1) lwz r21, -120(r1) lwz r22, -116(r1) lwz r27, -112(r1) lwz r28, -108(r1) lwz r29, -104(r1) lwz r30, -100(r1) li r9, 4 4: mr r10, r19 mr r11, r20 mr r12, r21 mr r13, r22 bl key_init xor r27, r27, r10 xor r28, r28, r11 xor r29, r29, r12 xor r30, r30, r13 mr r10, r27 mr r11, r28 mr r12, r29 mr r13, r30 bl key_init xor r19, r19, r10 xor r20, r20, r11 xor r21, r21, r12 xor r22, r22, r13 li r0, -1 add. r9, r9, r0 bne 4b ; Store the X0 part of the key. stw r19, s_X0_0(r3) stw r20, s_X0_1(r3) stw r21, s_X0_2(r3) stw r22, s_X0_3(r3) stw r27, s_X0_4(r3) stw r28, s_X0_5(r3) stw r29, s_X0_6(r3) stw r30, s_X0_7(r3) b restore_regs ; PhelixSetupNonce()-- Initialize the cipher state with a new nonce. ; r3 = pointer to context, r4 = pointer to 4-word nonce. The idea here ; is to take the nonce and X0, and initialize the Z, oldZ and X1 parts ; of cipher state, along with msgLen, aadLen and aadXor. .globl _PhelixSetupNonce _PhelixSetupNonce: mflr r2 stw r2, 8(r1) bl save_regs bl load_cipher_state lwbrx r2, 0, r4 add r23, r19, r2 sub r27, r15, r2 addi r27, r27, 0 xor r10, r18, r2 addi r4, r4, 4 lwbrx r2, 0, r4 add r24, r20, r2 sub r28, r16, r2 addi r28, r28, 1 xor r11, r19, r2 addi r4, r4, 4 lwbrx r2, 0, r4 add r25, r21, r2 sub r29, r17, r2 addi r29, r29, 2 xor r12, r20, r2 addi r4, r4, 4 lwbrx r2, 0, r4 add r26, r22, r2 sub r30, r18, r2 addi r30, r30, 3 xor r13, r21, r2 lwz r2, s_X_1_bump(r3) add r24, r24, r2 add r28, r28, r2 mr r14, r22 li r2, 0 stw r2, s_aadlen_l(r3) stw r2, s_aadlen_h(r3) stw r2, s_msglen(r3) ;; Hash phase of the nonce setup. Encrypt some zeroes in the red zone. la r4, -200(r1) la r5, -200(r1) li r2, 0 stw r2, 0(r4) stw r2, 4(r4) stw r2, 8(r4) stw r2, 12(r4) stw r2, 16(r4) stw r2, 20(r4) stw r2, 24(r4) stw r2, 28(r4) li r31, 0 li r2, 8 mtctr r2 bl encrypt_0 lis r2, 0xAADA ori r2, r2, 0xADAA stw r2, s_aadxor(r3) xor r11, r11, r2 bl save_cipher_state b restore_regs ; crypt-- Combined encryption/decryption loop. r4 points to source ; bytes, r5 points to destination bytes, r6 is length and r7 points to ; the base of the correct jump table. crypt: mflr r2 stw r2, 8(r1) stw r7, -4(r1) lwz r2, s_msglen(r3) andi. r0, r2, 0x03 beq+ 0f ; Dump core on the second sub-word call. li r2, 0 lwz r2, 0(r2) 0: add r2, r2, r6 stw r2, s_msglen(r3) srawi r6, r6, 2 addi r6, r6, 1 mtctr r6 bl save_regs bl load_cipher_state lwz r2, s_aadxor(r3) xor r11, r11, r2 li r2, 0 stw r2, s_aadxor(r3) bdz- 1f ; Branch if zero full words to encrypt ; Jump to the right element of the jump table where the real ; jump awaits. PPC instructions are always four bytes long. lwz r0, -4(r1) andi. r2, r31, 0x07 slwi r2, r2, 2 add r2, r2, r0 mtlr r2 blrl ; Do it! ;; Deal with a partial word 1: lwz r2, s_msglen(r3) andi. r2, r2, 0x03 beq 2f lwz r4, 0(r4) stw r4, -100(r1) stw r5, -104(r1) la r4, -100(r1) la r5, -100(r1) li r2, 1 mtctr r2 lwz r0, -4(r1) andi. r2, r31, 0x07 slwi r2, r2, 2 add r2, r2, r0 mtlr r2 blrl ; Encrypt one word lwz r5, -104(r1) lwz r2, s_msglen(r3) andi. r2, r2, 0x03 lbz r0, -100(r1) stb r0, 0(r5) cmpwi r2, 1 beq 2f lbz r0, -99(r1) stb r0, 1(r5) cmpwi r2, 2 beq 2f lbz r0, -98(r1) stb r0, 2(r5) ; All done. Clean up. 2: bl save_cipher_state b restore_regs ; PhelixEncryptBytes(ctx, pt, ct, len)-- Encrypt a block of bytes. ; ctx=r3, pt=r4, ct=r5, len=r6. .globl _PhelixEncryptBytes _PhelixEncryptBytes: lis r7, ha16(encrypt_jump_table) addi r7, r7, lo16(encrypt_jump_table) b crypt ; PhelixDecryptBytes(ctx, pt, ct, len)-- Encrypt a block of bytes. ; ctx=r3, ct=r4, pt=r5, len=r6. .globl _PhelixDecryptBytes _PhelixDecryptBytes: mr r7, r4 mr r4, r5 mr r5, r7 lis r7, ha16(decrypt_jump_table) addi r7, r7, lo16(decrypt_jump_table) b crypt ; PhelixProcessAAD()-- Process Additional Authenticated Data (AAD). ; This amounts to encrypting the data and throwing away the ciphertext. ; Only the cipher state is updated. The register assignments on entry are: ; r3 = Pointer to context, r4 = Pointer to AAD data, r5 = Length of data. ; ; Because of the structure of the encryption loop, this subroutine works ; by encrypting 128-byte blocks (32 words) of data at a time and storing ; (and abandoning) it in the red zone, from -240(r1) to -112(r1). ; The register save area is at -88(r1). .globl _PhelixProcessAAD _PhelixProcessAAD: mflr r2 stw r2, 8(r1) lwz r6, s_aadlen_l(r3) lwz r7, s_aadlen_h(r3) andi. r2, r6, 0x03 beq+ 0f ; Dump core on the second sub-word call. li r2, 0 lwz r2, 0(r2) 0: addc r6, r6, r5 addze r7, r7 stw r6, s_aadlen_l(r3) stw r7, s_aadlen_h(r3) bl save_regs bl load_cipher_state ; Top of AAD loop 1: cmplwi r5, 4 blt 0f ; Handle a final partial word cmplwi r5, 128 blt 2f addi r5, r5, -128 li r2, 32 b 3f 2: srawi r2, r5, 2 ; r2 = words to process andi. r5, r5, 0x03 ; r5 = bytes left 3: mtctr r2 stw r5, -4(r1) ; Save it, no registers available la r5, -240(r1) andi. r2, r31, 0x07 slwi r2, r2, 2 addis r2, r2, ha16(encrypt_jump_table) addi r2, r2, lo16(encrypt_jump_table) mtlr r2 blrl ; Encrypt lwz r5, -4(r1) b 1b 0: cmplwi r5, 0 beq 2f li r2, 0 stw r2, -240(r1) lbz r2, 0(r4) stb r2, -240(r1) cmplwi r5, 1 beq 1f lbz r2, 1(r4) stb r2, -239(r1) cmplwi r5, 2 beq 1f lbz r2, 2(r4) stb r2, -238(r1) ; Process the partial word 1: la r4, -240(r1) la r5, -240(r1) li r2, 1 mtctr r2 andi. r2, r31, 0x07 slwi r2, r2, 2 addis r2, r2, ha16(encrypt_jump_table) addi r2, r2, lo16(encrypt_jump_table) mtlr r2 blrl ; Encrypt ; Clean up 2: bl save_cipher_state b restore_regs ; PhelixFinalize()-- Calculate and store the MAC. Registers are: ; r3 = Pointer to context, r4 = Pointer to the MAC buffer. ; The calculation of the MAC involves perturbing the cipher state, ; then repeatedly encrypting a constant that depends on the cipher state. .globl _PhelixFinalize _PhelixFinalize: mflr r2 stw r2, 8(r1) stw r4, -4(r1) bl save_regs bl load_cipher_state xoris r10, r10, 0x912D xori r10, r10, 0x94F1 lwz r2, s_aadxor(r3) xor r11, r11, r2 lwz r2, s_aadlen_h(r3) xor r12, r12, r2 lwz r2, s_aadlen_l(r3) xor r14, r14, r2 la r4, -232(r1) la r5, -232(r1) lwz r2, s_msglen(r3) andi. r2, r2, 0x03 stw r2, 0(r4) stw r2, 4(r4) stw r2, 8(r4) stw r2, 12(r4) stw r2, 16(r4) stw r2, 20(r4) stw r2, 24(r4) stw r2, 28(r4) stw r2, 32(r4) stw r2, 36(r4) stw r2, 40(r4) stw r2, 44(r4) li r2, 12 mtctr r2 andi. r2, r31, 0x07 slwi r2, r2, 2 addis r2, r2, ha16(encrypt_jump_table) addi r2, r2, lo16(encrypt_jump_table) mtlr r2 blrl ; Encrypt ; Unpack the MAC bytewise, taking endianness into account. la r4, -200(r1) lwz r5, -4(r1) lwz r2, s_macsize(r3) addi r2, r2, 7 srawi r2, r2, 3 addi r2, r2, 1 mtctr r2 bdz restore_regs 0: lwbrx r2, 0, r4 stb r2, 0(r5) bdz restore_regs srawi r2, r2, 8 stb r2, 1(r5) bdz restore_regs srawi r2, r2, 8 stb r2, 2(r5) bdz restore_regs srawi r2, r2, 8 stb r2, 3(r5) bdz restore_regs addi r4, r4, 4 addi r5, r5, 4 b 0b