csecp256k1

Haskell FFI bindings to bitcoin-core/secp256k1 (docs.ppad.tech/csecp256k1).
git clone git://git.ppad.tech/csecp256k1.git
Log | Files | Refs | README | LICENSE

field_10x26_arm.s (28449B)


      1 @ vim: set tabstop=8 softtabstop=8 shiftwidth=8 noexpandtab syntax=armasm:
      2 /***********************************************************************
      3  * Copyright (c) 2014 Wladimir J. van der Laan                         *
      4  * Distributed under the MIT software license, see the accompanying    *
      5  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
      6  ***********************************************************************/
      7 /*
      8 ARM implementation of field_10x26 inner loops.
      9 
     10 Note:
     11 
     12 - To avoid unnecessary loads and make use of available registers, two
     13   'passes' have every time been interleaved, with the odd passes accumulating c' and d' 
     14   which will be added to c and d respectively in the even passes
     15 
     16 */
     17 
     18 	.syntax unified
     19 	@ eabi attributes - see readelf -A
     20 	.eabi_attribute 24, 1 @ Tag_ABI_align_needed = 8-byte
     21 	.eabi_attribute 25, 1 @ Tag_ABI_align_preserved = 8-byte, except leaf SP
     22 	.text
     23 
     24 	@ Field constants
     25 	.set field_R0, 0x3d10
     26 	.set field_R1, 0x400
     27 	.set field_not_M, 0xfc000000	@ ~M = ~0x3ffffff
     28 
     29 	.align	2
     30 	.global haskellsecp256k1_v0_1_0_fe_mul_inner
     31 	.type	haskellsecp256k1_v0_1_0_fe_mul_inner, %function
     32 	.hidden haskellsecp256k1_v0_1_0_fe_mul_inner
     33 	@ Arguments:
     34 	@  r0  r      Restrict: can overlap with a, not with b
     35 	@  r1  a
     36 	@  r2  b
     37 	@ Stack (total 4+10*4 = 44)
     38 	@  sp + #0        saved 'r' pointer
     39 	@  sp + #4 + 4*X  t0,t1,t2,t3,t4,t5,t6,t7,u8,t9
     40 haskellsecp256k1_v0_1_0_fe_mul_inner:
     41 	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14}
     42 	sub	sp, sp, #48			@ frame=44 + alignment
     43 	str     r0, [sp, #0]			@ save result address, we need it only at the end
     44 
     45 	/******************************************
     46 	 * Main computation code.
     47 	 ******************************************
     48 
     49 	Allocation:
     50 	    r0,r14,r7,r8   scratch
     51 	    r1       a (pointer)
     52 	    r2       b (pointer)
     53 	    r3:r4    c
     54 	    r5:r6    d
     55 	    r11:r12  c'
     56 	    r9:r10   d'
     57 
     58 	Note: do not write to r[] here, it may overlap with a[]
     59 	*/
     60 
     61 	/* A - interleaved with B */
     62 	ldr	r7, [r1, #0*4]			@ a[0]
     63 	ldr	r8, [r2, #9*4]			@ b[9]
     64 	ldr	r0, [r1, #1*4]			@ a[1]
     65 	umull	r5, r6, r7, r8			@ d = a[0] * b[9]
     66 	ldr	r14, [r2, #8*4]			@ b[8]
     67 	umull	r9, r10, r0, r8			@ d' = a[1] * b[9]
     68 	ldr	r7, [r1, #2*4]			@ a[2]
     69 	umlal	r5, r6, r0, r14			@ d += a[1] * b[8]
     70 	ldr	r8, [r2, #7*4] 			@ b[7]
     71 	umlal	r9, r10, r7, r14		@ d' += a[2] * b[8]
     72 	ldr	r0, [r1, #3*4]   		@ a[3]
     73 	umlal	r5, r6, r7, r8   		@ d += a[2] * b[7]
     74 	ldr	r14, [r2, #6*4]   		@ b[6]
     75 	umlal	r9, r10, r0, r8  		@ d' += a[3] * b[7]
     76 	ldr	r7, [r1, #4*4]   		@ a[4]
     77 	umlal	r5, r6, r0, r14   		@ d += a[3] * b[6]
     78 	ldr	r8, [r2, #5*4]   		@ b[5]
     79 	umlal	r9, r10, r7, r14  		@ d' += a[4] * b[6]
     80 	ldr	r0, [r1, #5*4]   		@ a[5]
     81 	umlal	r5, r6, r7, r8   		@ d += a[4] * b[5]
     82 	ldr	r14, [r2, #4*4]   		@ b[4]
     83 	umlal	r9, r10, r0, r8  		@ d' += a[5] * b[5]
     84 	ldr	r7, [r1, #6*4]   		@ a[6]
     85 	umlal	r5, r6, r0, r14   		@ d += a[5] * b[4]
     86 	ldr	r8, [r2, #3*4]   		@ b[3]
     87 	umlal	r9, r10, r7, r14  		@ d' += a[6] * b[4]
     88 	ldr	r0, [r1, #7*4]   		@ a[7]
     89 	umlal	r5, r6, r7, r8   		@ d += a[6] * b[3]
     90 	ldr	r14, [r2, #2*4]   		@ b[2]
     91 	umlal	r9, r10, r0, r8  		@ d' += a[7] * b[3]
     92 	ldr	r7, [r1, #8*4]   		@ a[8]
     93 	umlal	r5, r6, r0, r14   		@ d += a[7] * b[2]
     94 	ldr	r8, [r2, #1*4]   		@ b[1]
     95 	umlal	r9, r10, r7, r14  		@ d' += a[8] * b[2]
     96 	ldr	r0, [r1, #9*4]   		@ a[9]
     97 	umlal	r5, r6, r7, r8   		@ d += a[8] * b[1]
     98 	ldr	r14, [r2, #0*4]   		@ b[0]
     99 	umlal	r9, r10, r0, r8  		@ d' += a[9] * b[1]
    100 	ldr	r7, [r1, #0*4]   		@ a[0]
    101 	umlal	r5, r6, r0, r14   		@ d += a[9] * b[0]
    102 	@ r7,r14 used in B
    103 
    104 	bic	r0, r5, field_not_M 		@ t9 = d & M
    105 	str     r0, [sp, #4 + 4*9]
    106 	mov	r5, r5, lsr #26     		@ d >>= 26 
    107 	orr	r5, r5, r6, asl #6
    108 	mov     r6, r6, lsr #26
    109 
    110 	/* B */
    111 	umull	r3, r4, r7, r14   		@ c = a[0] * b[0]
    112 	adds	r5, r5, r9       		@ d += d'
    113 	adc	r6, r6, r10
    114 
    115 	bic	r0, r5, field_not_M 		@ u0 = d & M
    116 	mov	r5, r5, lsr #26     		@ d >>= 26
    117 	orr	r5, r5, r6, asl #6
    118 	mov     r6, r6, lsr #26
    119 	movw    r14, field_R0			@ c += u0 * R0
    120 	umlal   r3, r4, r0, r14
    121 
    122 	bic	r14, r3, field_not_M 		@ t0 = c & M
    123 	str	r14, [sp, #4 + 0*4]
    124 	mov	r3, r3, lsr #26     		@ c >>= 26
    125 	orr	r3, r3, r4, asl #6
    126 	mov     r4, r4, lsr #26
    127 	mov     r14, field_R1			@ c += u0 * R1
    128 	umlal   r3, r4, r0, r14
    129 
    130 	/* C - interleaved with D */
    131 	ldr	r7, [r1, #0*4]   		@ a[0]
    132 	ldr	r8, [r2, #2*4]   		@ b[2]
    133 	ldr	r14, [r2, #1*4]   		@ b[1]
    134 	umull	r11, r12, r7, r8   		@ c' = a[0] * b[2]
    135 	ldr	r0, [r1, #1*4]   		@ a[1]
    136 	umlal   r3, r4, r7, r14   		@ c += a[0] * b[1]
    137 	ldr	r8, [r2, #0*4]   		@ b[0]
    138 	umlal   r11, r12, r0, r14   		@ c' += a[1] * b[1]
    139 	ldr	r7, [r1, #2*4]   		@ a[2]
    140 	umlal   r3, r4, r0, r8   		@ c += a[1] * b[0]
    141 	ldr	r14, [r2, #9*4]   		@ b[9]
    142 	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[0]
    143 	ldr	r0, [r1, #3*4]   		@ a[3]
    144 	umlal	r5, r6, r7, r14   		@ d += a[2] * b[9]
    145 	ldr	r8, [r2, #8*4]   		@ b[8]
    146 	umull	r9, r10, r0, r14   		@ d' = a[3] * b[9]
    147 	ldr	r7, [r1, #4*4]   		@ a[4]
    148 	umlal	r5, r6, r0, r8   		@ d += a[3] * b[8]
    149 	ldr	r14, [r2, #7*4]   		@ b[7]
    150 	umlal	r9, r10, r7, r8   		@ d' += a[4] * b[8]
    151 	ldr	r0, [r1, #5*4]   		@ a[5]
    152 	umlal	r5, r6, r7, r14   		@ d += a[4] * b[7]
    153 	ldr	r8, [r2, #6*4]   		@ b[6]
    154 	umlal	r9, r10, r0, r14   		@ d' += a[5] * b[7]
    155 	ldr	r7, [r1, #6*4]   		@ a[6]
    156 	umlal	r5, r6, r0, r8   		@ d += a[5] * b[6]
    157 	ldr	r14, [r2, #5*4]   		@ b[5]
    158 	umlal	r9, r10, r7, r8   		@ d' += a[6] * b[6]
    159 	ldr	r0, [r1, #7*4]   		@ a[7]
    160 	umlal	r5, r6, r7, r14   		@ d += a[6] * b[5]
    161 	ldr	r8, [r2, #4*4]   		@ b[4]
    162 	umlal	r9, r10, r0, r14   		@ d' += a[7] * b[5]
    163 	ldr	r7, [r1, #8*4]   		@ a[8]
    164 	umlal	r5, r6, r0, r8   		@ d += a[7] * b[4]
    165 	ldr	r14, [r2, #3*4]   		@ b[3]
    166 	umlal	r9, r10, r7, r8   		@ d' += a[8] * b[4]
    167 	ldr	r0, [r1, #9*4]   		@ a[9]
    168 	umlal	r5, r6, r7, r14   		@ d += a[8] * b[3]
    169 	ldr	r8, [r2, #2*4]   		@ b[2]
    170 	umlal	r9, r10, r0, r14   		@ d' += a[9] * b[3]
    171 	umlal	r5, r6, r0, r8   		@ d += a[9] * b[2]
    172 
    173 	bic	r0, r5, field_not_M 		@ u1 = d & M
    174 	mov	r5, r5, lsr #26     		@ d >>= 26
    175 	orr	r5, r5, r6, asl #6
    176 	mov     r6, r6, lsr #26
    177 	movw    r14, field_R0			@ c += u1 * R0
    178 	umlal   r3, r4, r0, r14
    179 
    180 	bic	r14, r3, field_not_M 		@ t1 = c & M
    181 	str	r14, [sp, #4 + 1*4]
    182 	mov	r3, r3, lsr #26     		@ c >>= 26
    183 	orr	r3, r3, r4, asl #6
    184 	mov     r4, r4, lsr #26
    185 	mov     r14, field_R1			@ c += u1 * R1
    186 	umlal   r3, r4, r0, r14
    187 
    188 	/* D */
    189 	adds	r3, r3, r11			@ c += c'
    190 	adc	r4, r4, r12
    191 	adds	r5, r5, r9			@ d += d'
    192 	adc	r6, r6, r10
    193 
    194 	bic	r0, r5, field_not_M 		@ u2 = d & M
    195 	mov	r5, r5, lsr #26     		@ d >>= 26
    196 	orr	r5, r5, r6, asl #6
    197 	mov     r6, r6, lsr #26
    198 	movw    r14, field_R0			@ c += u2 * R0
    199 	umlal   r3, r4, r0, r14
    200 
    201 	bic	r14, r3, field_not_M 		@ t2 = c & M
    202 	str	r14, [sp, #4 + 2*4]
    203 	mov	r3, r3, lsr #26     		@ c >>= 26
    204 	orr	r3, r3, r4, asl #6
    205 	mov     r4, r4, lsr #26
    206 	mov     r14, field_R1			@ c += u2 * R1
    207 	umlal   r3, r4, r0, r14
    208 
    209 	/* E - interleaved with F */
    210 	ldr	r7, [r1, #0*4]   		@ a[0]
    211 	ldr	r8, [r2, #4*4]   		@ b[4]
    212 	umull	r11, r12, r7, r8   		@ c' = a[0] * b[4]
    213 	ldr	r8, [r2, #3*4]   		@ b[3]
    214 	umlal   r3, r4, r7, r8   		@ c += a[0] * b[3]
    215 	ldr	r7, [r1, #1*4]   		@ a[1]
    216 	umlal   r11, r12, r7, r8   		@ c' += a[1] * b[3]
    217 	ldr	r8, [r2, #2*4]   		@ b[2]
    218 	umlal   r3, r4, r7, r8   		@ c += a[1] * b[2]
    219 	ldr	r7, [r1, #2*4]   		@ a[2]
    220 	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[2]
    221 	ldr	r8, [r2, #1*4]   		@ b[1]
    222 	umlal   r3, r4, r7, r8   		@ c += a[2] * b[1]
    223 	ldr	r7, [r1, #3*4]   		@ a[3]
    224 	umlal   r11, r12, r7, r8   		@ c' += a[3] * b[1]
    225 	ldr	r8, [r2, #0*4]   		@ b[0]
    226 	umlal   r3, r4, r7, r8   		@ c += a[3] * b[0]
    227 	ldr	r7, [r1, #4*4]   		@ a[4]
    228 	umlal   r11, r12, r7, r8   		@ c' += a[4] * b[0]
    229 	ldr	r8, [r2, #9*4]   		@ b[9]
    230 	umlal	r5, r6, r7, r8   		@ d += a[4] * b[9]
    231 	ldr	r7, [r1, #5*4]   		@ a[5]
    232 	umull	r9, r10, r7, r8   		@ d' = a[5] * b[9]
    233 	ldr	r8, [r2, #8*4]   		@ b[8]
    234 	umlal	r5, r6, r7, r8   		@ d += a[5] * b[8]
    235 	ldr	r7, [r1, #6*4]   		@ a[6]
    236 	umlal	r9, r10, r7, r8   		@ d' += a[6] * b[8]
    237 	ldr	r8, [r2, #7*4]   		@ b[7]
    238 	umlal	r5, r6, r7, r8   		@ d += a[6] * b[7]
    239 	ldr	r7, [r1, #7*4]   		@ a[7]
    240 	umlal	r9, r10, r7, r8   		@ d' += a[7] * b[7]
    241 	ldr	r8, [r2, #6*4]   		@ b[6]
    242 	umlal	r5, r6, r7, r8   		@ d += a[7] * b[6]
    243 	ldr	r7, [r1, #8*4]   		@ a[8]
    244 	umlal	r9, r10, r7, r8   		@ d' += a[8] * b[6]
    245 	ldr	r8, [r2, #5*4]   		@ b[5]
    246 	umlal	r5, r6, r7, r8   		@ d += a[8] * b[5]
    247 	ldr	r7, [r1, #9*4]   		@ a[9]
    248 	umlal	r9, r10, r7, r8   		@ d' += a[9] * b[5]
    249 	ldr	r8, [r2, #4*4]   		@ b[4]
    250 	umlal	r5, r6, r7, r8   		@ d += a[9] * b[4]
    251 
    252 	bic	r0, r5, field_not_M 		@ u3 = d & M
    253 	mov	r5, r5, lsr #26     		@ d >>= 26
    254 	orr	r5, r5, r6, asl #6
    255 	mov     r6, r6, lsr #26
    256 	movw    r14, field_R0			@ c += u3 * R0
    257 	umlal   r3, r4, r0, r14
    258 
    259 	bic	r14, r3, field_not_M 		@ t3 = c & M
    260 	str	r14, [sp, #4 + 3*4]
    261 	mov	r3, r3, lsr #26     		@ c >>= 26
    262 	orr	r3, r3, r4, asl #6
    263 	mov     r4, r4, lsr #26
    264 	mov     r14, field_R1			@ c += u3 * R1
    265 	umlal   r3, r4, r0, r14
    266 
    267 	/* F */
    268 	adds	r3, r3, r11			@ c += c'
    269 	adc	r4, r4, r12
    270 	adds	r5, r5, r9			@ d += d'
    271 	adc	r6, r6, r10
    272 
    273 	bic	r0, r5, field_not_M 		@ u4 = d & M
    274 	mov	r5, r5, lsr #26     		@ d >>= 26
    275 	orr	r5, r5, r6, asl #6
    276 	mov     r6, r6, lsr #26
    277 	movw    r14, field_R0			@ c += u4 * R0
    278 	umlal   r3, r4, r0, r14
    279 
    280 	bic	r14, r3, field_not_M 		@ t4 = c & M
    281 	str	r14, [sp, #4 + 4*4]
    282 	mov	r3, r3, lsr #26     		@ c >>= 26
    283 	orr	r3, r3, r4, asl #6
    284 	mov     r4, r4, lsr #26
    285 	mov     r14, field_R1			@ c += u4 * R1
    286 	umlal   r3, r4, r0, r14
    287 
    288 	/* G - interleaved with H */
    289 	ldr	r7, [r1, #0*4]   		@ a[0]
    290 	ldr	r8, [r2, #6*4]   		@ b[6]
    291 	ldr	r14, [r2, #5*4]   		@ b[5]
    292 	umull	r11, r12, r7, r8   		@ c' = a[0] * b[6]
    293 	ldr	r0, [r1, #1*4]   		@ a[1]
    294 	umlal   r3, r4, r7, r14   		@ c += a[0] * b[5]
    295 	ldr	r8, [r2, #4*4]   		@ b[4]
    296 	umlal   r11, r12, r0, r14   		@ c' += a[1] * b[5]
    297 	ldr	r7, [r1, #2*4]   		@ a[2]
    298 	umlal   r3, r4, r0, r8   		@ c += a[1] * b[4]
    299 	ldr	r14, [r2, #3*4]   		@ b[3]
    300 	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[4]
    301 	ldr	r0, [r1, #3*4]   		@ a[3]
    302 	umlal   r3, r4, r7, r14   		@ c += a[2] * b[3]
    303 	ldr	r8, [r2, #2*4]   		@ b[2]
    304 	umlal   r11, r12, r0, r14   		@ c' += a[3] * b[3]
    305 	ldr	r7, [r1, #4*4]   		@ a[4]
    306 	umlal   r3, r4, r0, r8   		@ c += a[3] * b[2]
    307 	ldr	r14, [r2, #1*4]   		@ b[1]
    308 	umlal   r11, r12, r7, r8   		@ c' += a[4] * b[2]
    309 	ldr	r0, [r1, #5*4]   		@ a[5]
    310 	umlal   r3, r4, r7, r14   		@ c += a[4] * b[1]
    311 	ldr	r8, [r2, #0*4]   		@ b[0]
    312 	umlal   r11, r12, r0, r14   		@ c' += a[5] * b[1]
    313 	ldr	r7, [r1, #6*4]   		@ a[6]
    314 	umlal   r3, r4, r0, r8   		@ c += a[5] * b[0]
    315 	ldr	r14, [r2, #9*4]   		@ b[9]
    316 	umlal   r11, r12, r7, r8   		@ c' += a[6] * b[0]
    317 	ldr	r0, [r1, #7*4]   		@ a[7]
    318 	umlal	r5, r6, r7, r14   		@ d += a[6] * b[9]
    319 	ldr	r8, [r2, #8*4]   		@ b[8]
    320 	umull	r9, r10, r0, r14   		@ d' = a[7] * b[9]
    321 	ldr	r7, [r1, #8*4]   		@ a[8]
    322 	umlal	r5, r6, r0, r8   		@ d += a[7] * b[8]
    323 	ldr	r14, [r2, #7*4]   		@ b[7]
    324 	umlal	r9, r10, r7, r8   		@ d' += a[8] * b[8]
    325 	ldr	r0, [r1, #9*4]   		@ a[9]
    326 	umlal	r5, r6, r7, r14   		@ d += a[8] * b[7]
    327 	ldr	r8, [r2, #6*4]   		@ b[6]
    328 	umlal	r9, r10, r0, r14   		@ d' += a[9] * b[7]
    329 	umlal	r5, r6, r0, r8   		@ d += a[9] * b[6]
    330 
    331 	bic	r0, r5, field_not_M 		@ u5 = d & M
    332 	mov	r5, r5, lsr #26     		@ d >>= 26
    333 	orr	r5, r5, r6, asl #6
    334 	mov     r6, r6, lsr #26
    335 	movw    r14, field_R0			@ c += u5 * R0
    336 	umlal   r3, r4, r0, r14
    337 
    338 	bic	r14, r3, field_not_M 		@ t5 = c & M
    339 	str	r14, [sp, #4 + 5*4]
    340 	mov	r3, r3, lsr #26     		@ c >>= 26
    341 	orr	r3, r3, r4, asl #6
    342 	mov     r4, r4, lsr #26
    343 	mov     r14, field_R1			@ c += u5 * R1
    344 	umlal   r3, r4, r0, r14
    345 
    346 	/* H */
    347 	adds	r3, r3, r11			@ c += c'
    348 	adc	r4, r4, r12
    349 	adds	r5, r5, r9			@ d += d'
    350 	adc	r6, r6, r10
    351 
    352 	bic	r0, r5, field_not_M 		@ u6 = d & M
    353 	mov	r5, r5, lsr #26     		@ d >>= 26
    354 	orr	r5, r5, r6, asl #6
    355 	mov     r6, r6, lsr #26
    356 	movw    r14, field_R0			@ c += u6 * R0
    357 	umlal   r3, r4, r0, r14
    358 
    359 	bic	r14, r3, field_not_M 		@ t6 = c & M
    360 	str	r14, [sp, #4 + 6*4]
    361 	mov	r3, r3, lsr #26     		@ c >>= 26
    362 	orr	r3, r3, r4, asl #6
    363 	mov     r4, r4, lsr #26
    364 	mov     r14, field_R1			@ c += u6 * R1
    365 	umlal   r3, r4, r0, r14
    366 
    367 	/* I - interleaved with J */
    368 	ldr	r8, [r2, #8*4]   		@ b[8]
    369 	ldr	r7, [r1, #0*4]   		@ a[0]
    370 	ldr	r14, [r2, #7*4]   		@ b[7]
    371 	umull   r11, r12, r7, r8   		@ c' = a[0] * b[8]
    372 	ldr	r0, [r1, #1*4]   		@ a[1]
    373 	umlal   r3, r4, r7, r14   		@ c += a[0] * b[7]
    374 	ldr	r8, [r2, #6*4]   		@ b[6]
    375 	umlal   r11, r12, r0, r14   		@ c' += a[1] * b[7]
    376 	ldr	r7, [r1, #2*4]   		@ a[2]
    377 	umlal   r3, r4, r0, r8   		@ c += a[1] * b[6]
    378 	ldr	r14, [r2, #5*4]   		@ b[5]
    379 	umlal   r11, r12, r7, r8   		@ c' += a[2] * b[6]
    380 	ldr	r0, [r1, #3*4]   		@ a[3]
    381 	umlal   r3, r4, r7, r14   		@ c += a[2] * b[5]
    382 	ldr	r8, [r2, #4*4]   		@ b[4]
    383 	umlal   r11, r12, r0, r14   		@ c' += a[3] * b[5]
    384 	ldr	r7, [r1, #4*4]   		@ a[4]
    385 	umlal   r3, r4, r0, r8   		@ c += a[3] * b[4]
    386 	ldr	r14, [r2, #3*4]   		@ b[3]
    387 	umlal   r11, r12, r7, r8   		@ c' += a[4] * b[4]
    388 	ldr	r0, [r1, #5*4]   		@ a[5]
    389 	umlal   r3, r4, r7, r14   		@ c += a[4] * b[3]
    390 	ldr	r8, [r2, #2*4]   		@ b[2]
    391 	umlal   r11, r12, r0, r14   		@ c' += a[5] * b[3]
    392 	ldr	r7, [r1, #6*4]   		@ a[6]
    393 	umlal   r3, r4, r0, r8   		@ c += a[5] * b[2]
    394 	ldr	r14, [r2, #1*4]   		@ b[1]
    395 	umlal   r11, r12, r7, r8   		@ c' += a[6] * b[2]
    396 	ldr	r0, [r1, #7*4]   		@ a[7]
    397 	umlal   r3, r4, r7, r14   		@ c += a[6] * b[1]
    398 	ldr	r8, [r2, #0*4]   		@ b[0]
    399 	umlal   r11, r12, r0, r14   		@ c' += a[7] * b[1]
    400 	ldr	r7, [r1, #8*4]   		@ a[8]
    401 	umlal   r3, r4, r0, r8   		@ c += a[7] * b[0]
    402 	ldr	r14, [r2, #9*4]   		@ b[9]
    403 	umlal   r11, r12, r7, r8   		@ c' += a[8] * b[0]
    404 	ldr	r0, [r1, #9*4]   		@ a[9]
    405 	umlal	r5, r6, r7, r14   		@ d += a[8] * b[9]
    406 	ldr	r8, [r2, #8*4]   		@ b[8]
    407 	umull	r9, r10, r0, r14  		@ d' = a[9] * b[9]
    408 	umlal	r5, r6, r0, r8   		@ d += a[9] * b[8]
    409 
    410 	bic	r0, r5, field_not_M 		@ u7 = d & M
    411 	mov	r5, r5, lsr #26     		@ d >>= 26
    412 	orr	r5, r5, r6, asl #6
    413 	mov     r6, r6, lsr #26
    414 	movw    r14, field_R0			@ c += u7 * R0
    415 	umlal   r3, r4, r0, r14
    416 
    417 	bic	r14, r3, field_not_M 		@ t7 = c & M
    418 	str	r14, [sp, #4 + 7*4]
    419 	mov	r3, r3, lsr #26     		@ c >>= 26
    420 	orr	r3, r3, r4, asl #6
    421 	mov     r4, r4, lsr #26
    422 	mov     r14, field_R1			@ c += u7 * R1
    423 	umlal   r3, r4, r0, r14
    424 
    425 	/* J */
    426 	adds	r3, r3, r11			@ c += c'
    427 	adc	r4, r4, r12
    428 	adds	r5, r5, r9			@ d += d'
    429 	adc	r6, r6, r10
    430 
    431 	bic	r0, r5, field_not_M 		@ u8 = d & M
    432 	str	r0, [sp, #4 + 8*4]
    433 	mov	r5, r5, lsr #26     		@ d >>= 26
    434 	orr	r5, r5, r6, asl #6
    435 	mov     r6, r6, lsr #26
    436 	movw    r14, field_R0			@ c += u8 * R0
    437 	umlal   r3, r4, r0, r14
    438 
    439 	/******************************************
    440 	 * compute and write back result
    441 	 ******************************************
    442 	Allocation:
    443 	    r0    r
    444 	    r3:r4 c
    445 	    r5:r6 d
    446 	    r7    t0
    447 	    r8    t1
    448 	    r9    t2
    449 	    r11   u8
    450 	    r12   t9
    451 	    r1,r2,r10,r14 scratch
    452 
    453 	Note: do not read from a[] after here, it may overlap with r[]
    454 	*/
    455 	ldr	r0, [sp, #0]
    456 	add	r1, sp, #4 + 3*4		@ r[3..7] = t3..7, r11=u8, r12=t9
    457 	ldmia	r1, {r2,r7,r8,r9,r10,r11,r12}
    458 	add	r1, r0, #3*4
    459 	stmia	r1, {r2,r7,r8,r9,r10}
    460 
    461 	bic	r2, r3, field_not_M 		@ r[8] = c & M
    462 	str	r2, [r0, #8*4]
    463 	mov	r3, r3, lsr #26     		@ c >>= 26
    464 	orr	r3, r3, r4, asl #6
    465 	mov     r4, r4, lsr #26
    466 	mov     r14, field_R1			@ c += u8 * R1
    467 	umlal   r3, r4, r11, r14
    468 	movw    r14, field_R0			@ c += d * R0
    469 	umlal   r3, r4, r5, r14
    470 	adds	r3, r3, r12			@ c += t9
    471 	adc	r4, r4, #0
    472 
    473 	add	r1, sp, #4 + 0*4		@ r7,r8,r9 = t0,t1,t2
    474 	ldmia	r1, {r7,r8,r9}
    475 
    476 	ubfx	r2, r3, #0, #22     		@ r[9] = c & (M >> 4)
    477 	str	r2, [r0, #9*4]
    478 	mov	r3, r3, lsr #22     		@ c >>= 22
    479 	orr	r3, r3, r4, asl #10
    480 	mov     r4, r4, lsr #22
    481 	movw    r14, field_R1 << 4   		@ c += d * (R1 << 4)
    482 	umlal   r3, r4, r5, r14
    483 
    484 	movw    r14, field_R0 >> 4   		@ d = c * (R0 >> 4) + t0 (64x64 multiply+add)
    485 	umull	r5, r6, r3, r14			@ d = c.lo * (R0 >> 4)
    486 	adds	r5, r5, r7	    		@ d.lo += t0
    487 	mla	r6, r14, r4, r6			@ d.hi += c.hi * (R0 >> 4)
    488 	adc	r6, r6, 0	     		@ d.hi += carry
    489 
    490 	bic	r2, r5, field_not_M 		@ r[0] = d & M
    491 	str	r2, [r0, #0*4]
    492 
    493 	mov	r5, r5, lsr #26     		@ d >>= 26
    494 	orr	r5, r5, r6, asl #6
    495 	mov     r6, r6, lsr #26
    496 	
    497 	movw    r14, field_R1 >> 4   		@ d += c * (R1 >> 4) + t1 (64x64 multiply+add)
    498 	umull	r1, r2, r3, r14       		@ tmp = c.lo * (R1 >> 4)
    499 	adds	r5, r5, r8	    		@ d.lo += t1
    500 	adc	r6, r6, #0	    		@ d.hi += carry
    501 	adds	r5, r5, r1	    		@ d.lo += tmp.lo
    502 	mla	r2, r14, r4, r2      		@ tmp.hi += c.hi * (R1 >> 4)
    503 	adc	r6, r6, r2	   		@ d.hi += carry + tmp.hi
    504 
    505 	bic	r2, r5, field_not_M 		@ r[1] = d & M
    506 	str	r2, [r0, #1*4]
    507 	mov	r5, r5, lsr #26     		@ d >>= 26 (ignore hi)
    508 	orr	r5, r5, r6, asl #6
    509 
    510 	add	r5, r5, r9	  		@ d += t2
    511 	str	r5, [r0, #2*4]      		@ r[2] = d
    512 
    513 	add	sp, sp, #48
    514 	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
    515 	.size	haskellsecp256k1_v0_1_0_fe_mul_inner, .-haskellsecp256k1_v0_1_0_fe_mul_inner
    516 
    517 	.align	2
    518 	.global haskellsecp256k1_v0_1_0_fe_sqr_inner
    519 	.type	haskellsecp256k1_v0_1_0_fe_sqr_inner, %function
    520 	.hidden haskellsecp256k1_v0_1_0_fe_sqr_inner
    521 	@ Arguments:
    522 	@  r0  r	 Can overlap with a
    523 	@  r1  a
    524 	@ Stack (total 4+10*4 = 44)
    525 	@  sp + #0        saved 'r' pointer
    526 	@  sp + #4 + 4*X  t0,t1,t2,t3,t4,t5,t6,t7,u8,t9
    527 haskellsecp256k1_v0_1_0_fe_sqr_inner:
    528 	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r14}
    529 	sub	sp, sp, #48			@ frame=44 + alignment
    530 	str     r0, [sp, #0]			@ save result address, we need it only at the end
    531 	/******************************************
    532 	 * Main computation code.
    533 	 ******************************************
    534 
    535 	Allocation:
    536 	    r0,r14,r2,r7,r8   scratch
    537 	    r1       a (pointer)
    538 	    r3:r4    c
    539 	    r5:r6    d
    540 	    r11:r12  c'
    541 	    r9:r10   d'
    542 
    543 	Note: do not write to r[] here, it may overlap with a[]
    544 	*/
    545 	/* A interleaved with B */
    546 	ldr	r0, [r1, #1*4]			@ a[1]*2
    547 	ldr	r7, [r1, #0*4]			@ a[0]
    548 	mov	r0, r0, asl #1
    549 	ldr	r14, [r1, #9*4]			@ a[9]
    550 	umull	r3, r4, r7, r7			@ c = a[0] * a[0]
    551 	ldr	r8, [r1, #8*4]			@ a[8]
    552 	mov	r7, r7, asl #1
    553 	umull	r5, r6, r7, r14			@ d = a[0]*2 * a[9]
    554 	ldr	r7, [r1, #2*4]			@ a[2]*2
    555 	umull	r9, r10, r0, r14		@ d' = a[1]*2 * a[9]
    556 	ldr	r14, [r1, #7*4]			@ a[7]
    557 	umlal	r5, r6, r0, r8			@ d += a[1]*2 * a[8]
    558 	mov	r7, r7, asl #1
    559 	ldr	r0, [r1, #3*4]			@ a[3]*2
    560 	umlal	r9, r10, r7, r8			@ d' += a[2]*2 * a[8]
    561 	ldr	r8, [r1, #6*4]			@ a[6]
    562 	umlal	r5, r6, r7, r14			@ d += a[2]*2 * a[7]
    563 	mov	r0, r0, asl #1
    564 	ldr	r7, [r1, #4*4]			@ a[4]*2
    565 	umlal	r9, r10, r0, r14		@ d' += a[3]*2 * a[7]
    566 	ldr	r14, [r1, #5*4]			@ a[5]
    567 	mov	r7, r7, asl #1
    568 	umlal	r5, r6, r0, r8			@ d += a[3]*2 * a[6]
    569 	umlal	r9, r10, r7, r8			@ d' += a[4]*2 * a[6]
    570 	umlal	r5, r6, r7, r14			@ d += a[4]*2 * a[5]
    571 	umlal	r9, r10, r14, r14		@ d' += a[5] * a[5]
    572 
    573 	bic	r0, r5, field_not_M 		@ t9 = d & M
    574 	str     r0, [sp, #4 + 9*4]
    575 	mov	r5, r5, lsr #26     		@ d >>= 26 
    576 	orr	r5, r5, r6, asl #6
    577 	mov     r6, r6, lsr #26
    578 
    579 	/* B */
    580 	adds	r5, r5, r9			@ d += d'
    581 	adc	r6, r6, r10
    582 
    583 	bic	r0, r5, field_not_M 		@ u0 = d & M
    584 	mov	r5, r5, lsr #26     		@ d >>= 26
    585 	orr	r5, r5, r6, asl #6
    586 	mov     r6, r6, lsr #26
    587 	movw    r14, field_R0			@ c += u0 * R0
    588 	umlal   r3, r4, r0, r14
    589 	bic	r14, r3, field_not_M 		@ t0 = c & M
    590 	str	r14, [sp, #4 + 0*4]
    591 	mov	r3, r3, lsr #26     		@ c >>= 26
    592 	orr	r3, r3, r4, asl #6
    593 	mov     r4, r4, lsr #26
    594 	mov     r14, field_R1			@ c += u0 * R1
    595 	umlal   r3, r4, r0, r14
    596 
    597 	/* C interleaved with D */
    598 	ldr	r0, [r1, #0*4]			@ a[0]*2
    599 	ldr	r14, [r1, #1*4]			@ a[1]
    600 	mov	r0, r0, asl #1
    601 	ldr	r8, [r1, #2*4]			@ a[2]
    602 	umlal	r3, r4, r0, r14			@ c += a[0]*2 * a[1]
    603 	mov	r7, r8, asl #1                  @ a[2]*2
    604 	umull	r11, r12, r14, r14		@ c' = a[1] * a[1]
    605 	ldr	r14, [r1, #9*4]			@ a[9]
    606 	umlal	r11, r12, r0, r8		@ c' += a[0]*2 * a[2]
    607 	ldr	r0, [r1, #3*4]			@ a[3]*2
    608 	ldr	r8, [r1, #8*4]			@ a[8]
    609 	umlal	r5, r6, r7, r14			@ d += a[2]*2 * a[9]
    610 	mov	r0, r0, asl #1
    611 	ldr	r7, [r1, #4*4]			@ a[4]*2
    612 	umull	r9, r10, r0, r14		@ d' = a[3]*2 * a[9]
    613 	ldr	r14, [r1, #7*4]			@ a[7]
    614 	umlal	r5, r6, r0, r8			@ d += a[3]*2 * a[8]
    615 	mov	r7, r7, asl #1
    616 	ldr	r0, [r1, #5*4]			@ a[5]*2
    617 	umlal	r9, r10, r7, r8			@ d' += a[4]*2 * a[8]
    618 	ldr	r8, [r1, #6*4]			@ a[6]
    619 	mov	r0, r0, asl #1
    620 	umlal	r5, r6, r7, r14			@ d += a[4]*2 * a[7]
    621 	umlal	r9, r10, r0, r14		@ d' += a[5]*2 * a[7]
    622 	umlal	r5, r6, r0, r8			@ d += a[5]*2 * a[6]
    623 	umlal	r9, r10, r8, r8			@ d' += a[6] * a[6]
    624 
    625 	bic	r0, r5, field_not_M 		@ u1 = d & M
    626 	mov	r5, r5, lsr #26     		@ d >>= 26
    627 	orr	r5, r5, r6, asl #6
    628 	mov     r6, r6, lsr #26
    629 	movw    r14, field_R0			@ c += u1 * R0
    630 	umlal   r3, r4, r0, r14
    631 	bic	r14, r3, field_not_M 		@ t1 = c & M
    632 	str	r14, [sp, #4 + 1*4]
    633 	mov	r3, r3, lsr #26     		@ c >>= 26
    634 	orr	r3, r3, r4, asl #6
    635 	mov     r4, r4, lsr #26
    636 	mov     r14, field_R1			@ c += u1 * R1
    637 	umlal   r3, r4, r0, r14
    638 
    639 	/* D */
    640 	adds	r3, r3, r11			@ c += c'
    641 	adc	r4, r4, r12
    642 	adds	r5, r5, r9			@ d += d'
    643 	adc	r6, r6, r10
    644 
    645 	bic	r0, r5, field_not_M 		@ u2 = d & M
    646 	mov	r5, r5, lsr #26     		@ d >>= 26
    647 	orr	r5, r5, r6, asl #6
    648 	mov     r6, r6, lsr #26
    649 	movw    r14, field_R0			@ c += u2 * R0
    650 	umlal   r3, r4, r0, r14
    651 	bic	r14, r3, field_not_M 		@ t2 = c & M
    652 	str	r14, [sp, #4 + 2*4]
    653 	mov	r3, r3, lsr #26     		@ c >>= 26
    654 	orr	r3, r3, r4, asl #6
    655 	mov     r4, r4, lsr #26
    656 	mov     r14, field_R1			@ c += u2 * R1
    657 	umlal   r3, r4, r0, r14
    658 
    659 	/* E interleaved with F */
    660 	ldr	r7, [r1, #0*4]			@ a[0]*2
    661 	ldr	r0, [r1, #1*4]			@ a[1]*2
    662 	ldr	r14, [r1, #2*4]			@ a[2]
    663 	mov	r7, r7, asl #1
    664 	ldr	r8, [r1, #3*4]			@ a[3]
    665 	ldr	r2, [r1, #4*4]
    666 	umlal	r3, r4, r7, r8			@ c += a[0]*2 * a[3]
    667 	mov	r0, r0, asl #1
    668 	umull	r11, r12, r7, r2		@ c' = a[0]*2 * a[4]
    669 	mov	r2, r2, asl #1			@ a[4]*2
    670 	umlal	r11, r12, r0, r8		@ c' += a[1]*2 * a[3]
    671 	ldr	r8, [r1, #9*4]			@ a[9]
    672 	umlal	r3, r4, r0, r14			@ c += a[1]*2 * a[2]
    673 	ldr	r0, [r1, #5*4]			@ a[5]*2
    674 	umlal	r11, r12, r14, r14		@ c' += a[2] * a[2]
    675 	ldr	r14, [r1, #8*4]			@ a[8]
    676 	mov	r0, r0, asl #1
    677 	umlal	r5, r6, r2, r8			@ d += a[4]*2 * a[9]
    678 	ldr	r7, [r1, #6*4]			@ a[6]*2
    679 	umull	r9, r10, r0, r8			@ d' = a[5]*2 * a[9]
    680 	mov	r7, r7, asl #1
    681 	ldr	r8, [r1, #7*4]			@ a[7]
    682 	umlal	r5, r6, r0, r14			@ d += a[5]*2 * a[8]
    683 	umlal	r9, r10, r7, r14		@ d' += a[6]*2 * a[8]
    684 	umlal	r5, r6, r7, r8			@ d += a[6]*2 * a[7]
    685 	umlal	r9, r10, r8, r8			@ d' += a[7] * a[7]
    686 
    687 	bic	r0, r5, field_not_M 		@ u3 = d & M
    688 	mov	r5, r5, lsr #26     		@ d >>= 26
    689 	orr	r5, r5, r6, asl #6
    690 	mov     r6, r6, lsr #26
    691 	movw    r14, field_R0			@ c += u3 * R0
    692 	umlal   r3, r4, r0, r14
    693 	bic	r14, r3, field_not_M 		@ t3 = c & M
    694 	str	r14, [sp, #4 + 3*4]
    695 	mov	r3, r3, lsr #26     		@ c >>= 26
    696 	orr	r3, r3, r4, asl #6
    697 	mov     r4, r4, lsr #26
    698 	mov     r14, field_R1			@ c += u3 * R1
    699 	umlal   r3, r4, r0, r14
    700 
    701 	/* F */
    702 	adds	r3, r3, r11			@ c += c'
    703 	adc	r4, r4, r12
    704 	adds	r5, r5, r9			@ d += d'
    705 	adc	r6, r6, r10
    706 
    707 	bic	r0, r5, field_not_M 		@ u4 = d & M
    708 	mov	r5, r5, lsr #26     		@ d >>= 26
    709 	orr	r5, r5, r6, asl #6
    710 	mov     r6, r6, lsr #26
    711 	movw    r14, field_R0			@ c += u4 * R0
    712 	umlal   r3, r4, r0, r14
    713 	bic	r14, r3, field_not_M 		@ t4 = c & M
    714 	str	r14, [sp, #4 + 4*4]
    715 	mov	r3, r3, lsr #26     		@ c >>= 26
    716 	orr	r3, r3, r4, asl #6
    717 	mov     r4, r4, lsr #26
    718 	mov     r14, field_R1			@ c += u4 * R1
    719 	umlal   r3, r4, r0, r14
    720 
    721 	/* G interleaved with H */
    722 	ldr	r7, [r1, #0*4]			@ a[0]*2
    723 	ldr	r0, [r1, #1*4]			@ a[1]*2
    724 	mov	r7, r7, asl #1
    725 	ldr	r8, [r1, #5*4]			@ a[5]
    726 	ldr	r2, [r1, #6*4]			@ a[6]
    727 	umlal	r3, r4, r7, r8			@ c += a[0]*2 * a[5]
    728 	ldr	r14, [r1, #4*4]			@ a[4]
    729 	mov	r0, r0, asl #1
    730 	umull	r11, r12, r7, r2		@ c' = a[0]*2 * a[6]
    731 	ldr	r7, [r1, #2*4]			@ a[2]*2
    732 	umlal	r11, r12, r0, r8		@ c' += a[1]*2 * a[5]
    733 	mov	r7, r7, asl #1
    734 	ldr	r8, [r1, #3*4]			@ a[3]
    735 	umlal	r3, r4, r0, r14			@ c += a[1]*2 * a[4]
    736 	mov	r0, r2, asl #1			@ a[6]*2
    737 	umlal	r11, r12, r7, r14		@ c' += a[2]*2 * a[4]
    738 	ldr	r14, [r1, #9*4]			@ a[9]
    739 	umlal	r3, r4, r7, r8			@ c += a[2]*2 * a[3]
    740 	ldr	r7, [r1, #7*4]			@ a[7]*2
    741 	umlal	r11, r12, r8, r8		@ c' += a[3] * a[3]
    742 	mov	r7, r7, asl #1
    743 	ldr	r8, [r1, #8*4]			@ a[8]
    744 	umlal	r5, r6, r0, r14			@ d += a[6]*2 * a[9]
    745 	umull	r9, r10, r7, r14		@ d' = a[7]*2 * a[9]
    746 	umlal	r5, r6, r7, r8			@ d += a[7]*2 * a[8]
    747 	umlal	r9, r10, r8, r8			@ d' += a[8] * a[8]
    748 
    749 	bic	r0, r5, field_not_M 		@ u5 = d & M
    750 	mov	r5, r5, lsr #26     		@ d >>= 26
    751 	orr	r5, r5, r6, asl #6
    752 	mov     r6, r6, lsr #26
    753 	movw    r14, field_R0			@ c += u5 * R0
    754 	umlal   r3, r4, r0, r14
    755 	bic	r14, r3, field_not_M 		@ t5 = c & M
    756 	str	r14, [sp, #4 + 5*4]
    757 	mov	r3, r3, lsr #26     		@ c >>= 26
    758 	orr	r3, r3, r4, asl #6
    759 	mov     r4, r4, lsr #26
    760 	mov     r14, field_R1			@ c += u5 * R1
    761 	umlal   r3, r4, r0, r14
    762 
    763 	/* H */
    764 	adds	r3, r3, r11			@ c += c'
    765 	adc	r4, r4, r12
    766 	adds	r5, r5, r9			@ d += d'
    767 	adc	r6, r6, r10
    768 
    769 	bic	r0, r5, field_not_M 		@ u6 = d & M
    770 	mov	r5, r5, lsr #26     		@ d >>= 26
    771 	orr	r5, r5, r6, asl #6
    772 	mov     r6, r6, lsr #26
    773 	movw    r14, field_R0			@ c += u6 * R0
    774 	umlal   r3, r4, r0, r14
    775 	bic	r14, r3, field_not_M 		@ t6 = c & M
    776 	str	r14, [sp, #4 + 6*4]
    777 	mov	r3, r3, lsr #26     		@ c >>= 26
    778 	orr	r3, r3, r4, asl #6
    779 	mov     r4, r4, lsr #26
    780 	mov     r14, field_R1			@ c += u6 * R1
    781 	umlal   r3, r4, r0, r14
    782 
    783 	/* I interleaved with J */
    784 	ldr	r7, [r1, #0*4]			@ a[0]*2
    785 	ldr	r0, [r1, #1*4]			@ a[1]*2
    786 	mov	r7, r7, asl #1
    787 	ldr	r8, [r1, #7*4]			@ a[7]
    788 	ldr	r2, [r1, #8*4]			@ a[8]
    789 	umlal	r3, r4, r7, r8			@ c += a[0]*2 * a[7]
    790 	ldr	r14, [r1, #6*4]			@ a[6]
    791 	mov	r0, r0, asl #1
    792 	umull	r11, r12, r7, r2		@ c' = a[0]*2 * a[8]
    793 	ldr	r7, [r1, #2*4]			@ a[2]*2
    794 	umlal	r11, r12, r0, r8		@ c' += a[1]*2 * a[7]
    795 	ldr	r8, [r1, #5*4]			@ a[5]
    796 	umlal	r3, r4, r0, r14			@ c += a[1]*2 * a[6]
    797 	ldr	r0, [r1, #3*4]			@ a[3]*2
    798 	mov	r7, r7, asl #1
    799 	umlal	r11, r12, r7, r14		@ c' += a[2]*2 * a[6]
    800 	ldr	r14, [r1, #4*4]			@ a[4]
    801 	mov	r0, r0, asl #1
    802 	umlal	r3, r4, r7, r8			@ c += a[2]*2 * a[5]
    803 	mov	r2, r2, asl #1			@ a[8]*2
    804 	umlal	r11, r12, r0, r8		@ c' += a[3]*2 * a[5]
    805 	umlal	r3, r4, r0, r14			@ c += a[3]*2 * a[4]
    806 	umlal	r11, r12, r14, r14		@ c' += a[4] * a[4]
    807 	ldr	r8, [r1, #9*4]			@ a[9]
    808 	umlal	r5, r6, r2, r8			@ d += a[8]*2 * a[9]
    809 	@ r8 will be used in J
    810 
    811 	bic	r0, r5, field_not_M 		@ u7 = d & M
    812 	mov	r5, r5, lsr #26     		@ d >>= 26
    813 	orr	r5, r5, r6, asl #6
    814 	mov     r6, r6, lsr #26
    815 	movw    r14, field_R0			@ c += u7 * R0
    816 	umlal   r3, r4, r0, r14
    817 	bic	r14, r3, field_not_M 		@ t7 = c & M
    818 	str	r14, [sp, #4 + 7*4]
    819 	mov	r3, r3, lsr #26     		@ c >>= 26
    820 	orr	r3, r3, r4, asl #6
    821 	mov     r4, r4, lsr #26
    822 	mov     r14, field_R1			@ c += u7 * R1
    823 	umlal   r3, r4, r0, r14
    824 
    825 	/* J */
    826 	adds	r3, r3, r11			@ c += c'
    827 	adc	r4, r4, r12
    828 	umlal	r5, r6, r8, r8			@ d += a[9] * a[9]
    829 
    830 	bic	r0, r5, field_not_M 		@ u8 = d & M
    831 	str	r0, [sp, #4 + 8*4]
    832 	mov	r5, r5, lsr #26     		@ d >>= 26
    833 	orr	r5, r5, r6, asl #6
    834 	mov     r6, r6, lsr #26
    835 	movw    r14, field_R0			@ c += u8 * R0
    836 	umlal   r3, r4, r0, r14
    837 
    838 	/******************************************
    839 	 * compute and write back result
    840 	 ******************************************
    841 	Allocation:
    842 	    r0    r
    843 	    r3:r4 c
    844 	    r5:r6 d
    845 	    r7    t0
    846 	    r8    t1
    847 	    r9    t2
    848 	    r11   u8
    849 	    r12   t9
    850 	    r1,r2,r10,r14 scratch
    851 
    852 	Note: do not read from a[] after here, it may overlap with r[]
    853 	*/
    854 	ldr	r0, [sp, #0]
    855 	add	r1, sp, #4 + 3*4		@ r[3..7] = t3..7, r11=u8, r12=t9
    856 	ldmia	r1, {r2,r7,r8,r9,r10,r11,r12}
    857 	add	r1, r0, #3*4
    858 	stmia	r1, {r2,r7,r8,r9,r10}
    859 
    860 	bic	r2, r3, field_not_M 		@ r[8] = c & M
    861 	str	r2, [r0, #8*4]
    862 	mov	r3, r3, lsr #26     		@ c >>= 26
    863 	orr	r3, r3, r4, asl #6
    864 	mov     r4, r4, lsr #26
    865 	mov     r14, field_R1			@ c += u8 * R1
    866 	umlal   r3, r4, r11, r14
    867 	movw    r14, field_R0			@ c += d * R0
    868 	umlal   r3, r4, r5, r14
    869 	adds	r3, r3, r12			@ c += t9
    870 	adc	r4, r4, #0
    871 
    872 	add	r1, sp, #4 + 0*4		@ r7,r8,r9 = t0,t1,t2
    873 	ldmia	r1, {r7,r8,r9}
    874 
    875 	ubfx	r2, r3, #0, #22     		@ r[9] = c & (M >> 4)
    876 	str	r2, [r0, #9*4]
    877 	mov	r3, r3, lsr #22     		@ c >>= 22
    878 	orr	r3, r3, r4, asl #10
    879 	mov     r4, r4, lsr #22
    880 	movw    r14, field_R1 << 4   		@ c += d * (R1 << 4)
    881 	umlal   r3, r4, r5, r14
    882 
    883 	movw    r14, field_R0 >> 4   		@ d = c * (R0 >> 4) + t0 (64x64 multiply+add)
    884 	umull	r5, r6, r3, r14			@ d = c.lo * (R0 >> 4)
    885 	adds	r5, r5, r7	    		@ d.lo += t0
    886 	mla	r6, r14, r4, r6			@ d.hi += c.hi * (R0 >> 4)
    887 	adc	r6, r6, 0	     		@ d.hi += carry
    888 
    889 	bic	r2, r5, field_not_M 		@ r[0] = d & M
    890 	str	r2, [r0, #0*4]
    891 
    892 	mov	r5, r5, lsr #26     		@ d >>= 26
    893 	orr	r5, r5, r6, asl #6
    894 	mov     r6, r6, lsr #26
    895 	
    896 	movw    r14, field_R1 >> 4   		@ d += c * (R1 >> 4) + t1 (64x64 multiply+add)
    897 	umull	r1, r2, r3, r14       		@ tmp = c.lo * (R1 >> 4)
    898 	adds	r5, r5, r8	    		@ d.lo += t1
    899 	adc	r6, r6, #0	    		@ d.hi += carry
    900 	adds	r5, r5, r1	    		@ d.lo += tmp.lo
    901 	mla	r2, r14, r4, r2      		@ tmp.hi += c.hi * (R1 >> 4)
    902 	adc	r6, r6, r2	   		@ d.hi += carry + tmp.hi
    903 
    904 	bic	r2, r5, field_not_M 		@ r[1] = d & M
    905 	str	r2, [r0, #1*4]
    906 	mov	r5, r5, lsr #26     		@ d >>= 26 (ignore hi)
    907 	orr	r5, r5, r6, asl #6
    908 
    909 	add	r5, r5, r9	  		@ d += t2
    910 	str	r5, [r0, #2*4]      		@ r[2] = d
    911 
    912 	add	sp, sp, #48
    913 	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
    914 	.size	haskellsecp256k1_v0_1_0_fe_sqr_inner, .-haskellsecp256k1_v0_1_0_fe_sqr_inner
    915 
    916 	.section .note.GNU-stack,"",%progbits