13#define CLEAR_FLAGS(empty_reg) \
14 "xorq " empty_reg ", " empty_reg " \n\t"
20#define LOAD_FIELD_ELEMENT(a, lolo, lohi, hilo, hihi) \
21 "movq 0(" a "), " lolo " \n\t" \
22 "movq 8(" a "), " lohi " \n\t" \
23 "movq 16(" a "), " hilo " \n\t" \
24 "movq 24(" a "), " hihi " \n\t"
31#define STORE_FIELD_ELEMENT(r, lolo, lohi, hilo, hihi) \
32 "movq " lolo ", 0(" r ") \n\t" \
33 "movq " lohi ", 8(" r ") \n\t" \
34 "movq " hilo ", 16(" r ") \n\t" \
35 "movq " hihi ", 24(" r ") \n\t"
37#if !defined(__ADX__) || defined(DISABLE_ADX)
43 "addq 0(" b "), %%r12 \n\t" \
44 "adcq 8(" b "), %%r13 \n\t" \
45 "adcq 16(" b "), %%r14 \n\t" \
46 "adcq 24(" b "), %%r15 \n\t"
53 "subq 0(" b "), %%r12 \n\t" \
54 "sbbq 8(" b "), %%r13 \n\t" \
55 "sbbq 16(" b "), %%r14 \n\t" \
56 "sbbq 24(" b "), %%r15 \n\t"
63#define ADD_REDUCE(b, modulus_0, modulus_1, modulus_2, modulus_3) \
64 "addq 0(" b "), %%r12 \n\t" \
65 "adcq 8(" b "), %%r13 \n\t" \
66 "adcq 16(" b "), %%r14 \n\t" \
67 "adcq 24(" b "), %%r15 \n\t" \
68 "movq %%r12, %%r8 \n\t" \
69 "movq %%r13, %%r9 \n\t" \
70 "movq %%r14, %%r10 \n\t" \
71 "movq %%r15, %%r11 \n\t" \
72 "addq " modulus_0 ", %%r12 \n\t" \
73 "adcq " modulus_1 ", %%r13 \n\t" \
74 "adcq " modulus_2 ", %%r14 \n\t" \
75 "adcq " modulus_3 ", %%r15 \n\t" \
76 "cmovncq %%r8, %%r12 \n\t" \
77 "cmovncq %%r9, %%r13 \n\t" \
78 "cmovncq %%r10, %%r14 \n\t" \
79 "cmovncq %%r11, %%r15 \n\t"
87#define REDUCE_FIELD_ELEMENT(neg_modulus_0, neg_modulus_1, neg_modulus_2, neg_modulus_3) \
89 "movq %%r12, %%r8 \n\t" \
90 "movq %%r13, %%r9 \n\t" \
91 "movq %%r14, %%r10 \n\t" \
92 "movq %%r15, %%r11 \n\t" \
93 "addq " neg_modulus_0 ", %%r12 \n\t" \
94 "adcq " neg_modulus_1 ", %%r13 \n\t" \
95 "adcq " neg_modulus_2 ", %%r14 \n\t" \
96 "adcq " neg_modulus_3 ", %%r15 \n\t" \
100 "cmovncq %%r8, %%r12 \n\t" \
101 "cmovncq %%r9, %%r13 \n\t" \
102 "cmovncq %%r10, %%r14 \n\t" \
103 "cmovncq %%r11, %%r15 \n\t"
110 "movq 0(" a "), %%rdx \n\t" \
112 "xorq %%r8, %%r8 \n\t" \
114 "mulxq 8(" a "), %%r9, %%r10 \n\t" \
115 "mulxq 16(" a "), %%r8, %%r15 \n\t" \
116 "mulxq 24(" a "), %%r11, %%r12 \n\t" \
120 "addq %%r8, %%r10 \n\t" \
121 "adcq %%r15, %%r11 \n\t" \
122 "movq 8(" a "), %%rdx \n\t" \
123 "mulxq 16(" a "), %%r8, %%r15 \n\t" \
124 "mulxq 24(" a "), %%rdi, %%rcx \n\t" \
125 "movq 24(" a "), %%rdx \n\t" \
126 "mulxq 16(" a "), %%r13, %%r14 \n\t" \
127 "adcq %%rdi, %%r12 \n\t" \
128 "adcq %%rcx, %%r13 \n\t" \
129 "adcq $0, %%r14 \n\t" \
130 "addq %%r8, %%r11 \n\t" \
131 "adcq %%r15, %%r12 \n\t" \
132 "adcq $0, %%r13 \n\t" \
135 "addq %%r9, %%r9 \n\t" \
136 "adcq %%r10, %%r10 \n\t" \
137 "adcq %%r11, %%r11 \n\t" \
138 "adcq %%r12, %%r12 \n\t" \
139 "adcq %%r13, %%r13 \n\t" \
140 "adcq %%r14, %%r14 \n\t" \
143 "movq 0(" a "), %%rdx \n\t" \
144 "mulxq %%rdx, %%r8, %%rcx \n\t" \
145 "movq 16(" a "), %%rdx \n\t" \
146 "mulxq %%rdx, %%rdx, %%rdi \n\t" \
148 "addq %%rdx, %%r12 \n\t" \
149 "adcq %%rdi, %%r13 \n\t" \
150 "adcq $0, %%r14 \n\t" \
151 "addq %%rcx, %%r9 \n\t" \
152 "movq 24(" a "), %%rdx \n\t" \
153 "mulxq %%rdx, %%rcx, %%r15 \n\t" \
154 "movq 8(" a "), %%rdx \n\t" \
155 "mulxq %%rdx, %%rdi, %%rdx \n\t" \
156 "adcq %%rdi, %%r10 \n\t" \
157 "adcq %%rdx, %%r11 \n\t" \
158 "adcq $0, %%r12 \n\t" \
159 "addq %%rcx, %%r14 \n\t" \
160 "adcq $0, %%r15 \n\t" \
163 "movq %%r8, %%rdx \n\t" \
164 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
165 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
166 "addq %%rdi, %%r8 \n\t" \
167 "adcq %%rcx, %%r9 \n\t" \
168 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
169 "adcq %%rcx, %%r10 \n\t" \
170 "adcq $0, %%r11 \n\t" \
172 "addq %%rdi, %%r9 \n\t" \
173 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
174 "mulxq %[modulus_3], %%r8, %%rdx \n\t" \
175 "adcq %%rdi, %%r10 \n\t" \
176 "adcq %%rcx, %%r11 \n\t" \
177 "adcq %%rdx, %%r12 \n\t" \
178 "adcq $0, %%r13 \n\t" \
179 "addq %%r8, %%r11 \n\t" \
180 "adcq $0, %%r12 \n\t" \
183 "movq %%r9, %%rdx \n\t" \
184 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
185 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
186 "addq %%rdi, %%r9 \n\t" \
187 "adcq %%rcx, %%r10 \n\t" \
188 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
189 "adcq %%rcx, %%r11 \n\t" \
190 "adcq $0, %%r12 \n\t" \
191 "addq %%rdi, %%r10 \n\t" \
192 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
193 "mulxq %[modulus_3], %%r8, %%r9 \n\t" \
194 "adcq %%rdi, %%r11 \n\t" \
195 "adcq %%rcx, %%r12 \n\t" \
196 "adcq %%r9, %%r13 \n\t" \
197 "adcq $0, %%r14 \n\t" \
198 "addq %%r8, %%r12 \n\t" \
199 "adcq $0, %%r13 \n\t" \
202 "movq %%r10, %%rdx \n\t" \
203 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
204 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
205 "addq %%rdi, %%r10 \n\t" \
206 "adcq %%rcx, %%r11 \n\t" \
207 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
208 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
209 "mulxq %[modulus_3], %%r10, %%rdx \n\t" \
210 "adcq %%rcx, %%r12 \n\t" \
211 "adcq %%r9, %%r13 \n\t" \
212 "adcq %%rdx, %%r14 \n\t" \
213 "adcq $0, %%r15 \n\t" \
214 "addq %%rdi, %%r11 \n\t" \
215 "adcq %%r8, %%r12 \n\t" \
216 "adcq %%r10, %%r13 \n\t" \
217 "adcq $0, %%r14 \n\t" \
220 "movq %%r11, %%rdx \n\t" \
221 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
222 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
223 "mulxq %[modulus_1], %%r8, %%r9 \n\t" \
224 "addq %%rdi, %%r11 \n\t" \
225 "adcq %%r8, %%r12 \n\t" \
226 "adcq %%r9, %%r13 \n\t" \
227 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
228 "mulxq %[modulus_3], %%r10, %%r11 \n\t" \
229 "adcq %%r9, %%r14 \n\t" \
230 "adcq %%r11, %%r15 \n\t" \
231 "addq %%rcx, %%r12 \n\t" \
232 "adcq %%r8, %%r13 \n\t" \
233 "adcq %%r10, %%r14 \n\t" \
234 "adcq $0, %%r15 \n\t"
241#define MUL(a1, a2, a3, a4, b) \
242 "movq " a1 ", %%rdx \n\t" \
243 "xorq %%r8, %%r8 \n\t" \
245 "mulxq 8(" b "), %%r8, %%r9 \n\t" \
246 "mulxq 24(" b "), %%rdi, %%r12 \n\t" \
247 "mulxq 0(" b "), %%r13, %%r14 \n\t" \
248 "mulxq 16(" b "), %%r15, %%r10 \n\t" \
252 "movq %%r13, %%rdx \n\t" \
253 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
256 "addq %%r8, %%r14 \n\t" \
257 "adcq %%r9, %%r15 \n\t" \
258 "adcq %%rdi, %%r10 \n\t" \
259 "adcq $0, %%r12 \n\t" \
262 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
263 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
264 "addq %%r8, %%r13 \n\t" \
265 "adcq %%rdi, %%r14 \n\t" \
266 "adcq %%r11, %%r15 \n\t" \
267 "adcq $0, %%r10 \n\t" \
268 "adcq $0, %%r12 \n\t" \
269 "addq %%r9, %%r14 \n\t" \
270 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
271 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
272 "adcq %%r8, %%r15 \n\t" \
273 "adcq %%rdi, %%r10 \n\t" \
274 "adcq %%r11, %%r12 \n\t" \
275 "addq %%r9, %%r10 \n\t" \
276 "adcq $0, %%r12 \n\t" \
285 "movq " a2 ", %%rdx \n\t" \
286 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
287 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
288 "addq %%r8, %%r14 \n\t" \
289 "adcq %%rdi, %%r15 \n\t" \
290 "adcq %%r11, %%r10 \n\t" \
291 "adcq $0, %%r12 \n\t" \
292 "addq %%r9, %%r15 \n\t" \
294 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
295 "mulxq 24(" b "), %%rdi, %%r13 \n\t" \
296 "adcq %%r8, %%r10 \n\t" \
297 "adcq %%rdi, %%r12 \n\t" \
298 "adcq $0, %%r13 \n\t" \
299 "addq %%r9, %%r12 \n\t" \
300 "adcq $0, %%r13 \n\t" \
303 "movq %%r14, %%rdx \n\t" \
304 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
305 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
306 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
307 "addq %%r8, %%r14 \n\t" \
308 "adcq %%rdi, %%r15 \n\t" \
309 "adcq %%r11, %%r10 \n\t" \
310 "adcq $0, %%r12 \n\t" \
311 "adcq $0, %%r13 \n\t" \
312 "addq %%r9, %%r15 \n\t" \
313 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
314 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
315 "adcq %%r8, %%r10 \n\t" \
316 "adcq %%r9, %%r12 \n\t" \
317 "adcq %%r11, %%r13 \n\t" \
318 "addq %%rdi, %%r12 \n\t" \
319 "adcq $0, %%r13 \n\t" \
322 "movq " a3 ", %%rdx \n\t" \
323 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
324 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
325 "addq %%r8, %%r15 \n\t" \
326 "adcq %%r9, %%r10 \n\t" \
327 "adcq %%r11, %%r12 \n\t" \
328 "adcq $0, %%r13 \n\t" \
329 "addq %%rdi, %%r10 \n\t" \
330 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
331 "mulxq 24(" b "), %%rdi, %%r14 \n\t" \
332 "adcq %%r8, %%r12 \n\t" \
333 "adcq %%r9, %%r13 \n\t" \
334 "adcq $0, %%r14 \n\t" \
335 "addq %%rdi, %%r13 \n\t" \
336 "adcq $0, %%r14 \n\t" \
339 "movq %%r15, %%rdx \n\t" \
340 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
341 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
342 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
343 "addq %%r8, %%r15 \n\t" \
344 "adcq %%r9, %%r10 \n\t" \
345 "adcq %%r11, %%r12 \n\t" \
346 "adcq $0, %%r13 \n\t" \
347 "adcq $0, %%r14 \n\t" \
348 "addq %%rdi, %%r10 \n\t" \
349 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
350 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
351 "adcq %%r8, %%r12 \n\t" \
352 "adcq %%r9, %%r13 \n\t" \
353 "adcq %%r11, %%r14 \n\t" \
354 "addq %%rdi, %%r13 \n\t" \
355 "adcq $0, %%r14 \n\t" \
358 "movq " a4 ", %%rdx \n\t" \
359 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
360 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
361 "addq %%r8, %%r10 \n\t" \
362 "adcq %%r9, %%r12 \n\t" \
363 "adcq %%r11, %%r13 \n\t" \
364 "adcq $0, %%r14 \n\t" \
365 "addq %%rdi, %%r12 \n\t" \
367 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
368 "mulxq 24(" b "), %%rdi, %%r15 \n\t" \
369 "adcq %%r8, %%r13 \n\t" \
370 "adcq %%r9, %%r14 \n\t" \
371 "adcq $0, %%r15 \n\t" \
372 "addq %%rdi, %%r14 \n\t" \
373 "adcq $0, %%r15 \n\t" \
376 "movq %%r10, %%rdx \n\t" \
377 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
378 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
379 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
380 "addq %%r8, %%r10 \n\t" \
381 "adcq %%r9, %%r12 \n\t" \
382 "adcq %%r11, %%r13 \n\t" \
383 "adcq $0, %%r14 \n\t" \
384 "adcq $0, %%r15 \n\t" \
385 "addq %%rdi, %%r12 \n\t" \
387 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
388 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
389 "adcq %%r8, %%r13 \n\t" \
390 "adcq %%r9, %%r14 \n\t" \
391 "adcq %%rdx, %%r15 \n\t" \
392 "addq %%rdi, %%r14 \n\t" \
393 "adcq $0, %%r15 \n\t"
400#define MUL_256(a, b, r) \
401 "movq 0(" a "), %%rdx \n\t" \
404 "mulxq 8(" b "), %%r8, %%r9 \n\t" \
405 "mulxq 24(" b "), %%rdi, %%r12 \n\t" \
406 "mulxq 0(" b "), %%r13, %%r14 \n\t" \
407 "mulxq 16(" b "), %%r15, %%rax \n\t" \
409 "xorq %%r10, %%r10 \n\t" \
413 "addq %%r8, %%r14 \n\t" \
414 "adcq %%r9, %%r15 \n\t" \
415 "adcq %%r10, %%rax \n\t" \
416 "addq %%rdi, %%rax \n\t" \
419 "movq 8(" a "), %%rdx \n\t" \
420 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
421 "mulxq 8(" b "), %%rdi, %%rsi \n\t" \
422 "addq %%r8, %%r14 \n\t" \
423 "adcq %%r9, %%r15 \n\t" \
424 "adcq %%rsi, %%rax \n\t" \
425 "addq %%rdi, %%r15 \n\t" \
427 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
428 "adcq %%r8, %%rax \n\t" \
431 "movq 16(" a "), %%rdx \n\t" \
432 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
433 "mulxq 8(" b "), %%rdi, %%rsi \n\t" \
434 "addq %%r8, %%r15 \n\t" \
435 "adcq %%r9, %%rax \n\t" \
436 "addq %%rdi, %%rax \n\t" \
440 "movq 24(" a "), %%rdx \n\t" \
441 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
442 "adcq %%r8, %%rax \n\t" \
443 "movq %%r13, 0(" r ") \n\t" \
444 "movq %%r14, 8(" r ") \n\t" \
445 "movq %%r15, 16(" r ") \n\t" \
446 "movq %%rax, 24(" r ") \n\t"
455 "adcxq 0(" b "), %%r12 \n\t" \
456 "adcxq 8(" b "), %%r13 \n\t" \
457 "adcxq 16(" b "), %%r14 \n\t" \
458 "adcxq 24(" b "), %%r15 \n\t"
465 "subq 0(" b "), %%r12 \n\t" \
466 "sbbq 8(" b "), %%r13 \n\t" \
467 "sbbq 16(" b "), %%r14 \n\t" \
468 "sbbq 24(" b "), %%r15 \n\t"
474#define ADD_REDUCE(b, modulus_0, modulus_1, modulus_2, modulus_3) \
475 "adcxq 0(" b "), %%r12 \n\t" \
476 "movq %%r12, %%r8 \n\t" \
477 "adoxq " modulus_0 ", %%r12 \n\t" \
478 "adcxq 8(" b "), %%r13 \n\t" \
479 "movq %%r13, %%r9 \n\t" \
480 "adoxq " modulus_1 ", %%r13 \n\t" \
481 "adcxq 16(" b "), %%r14 \n\t" \
482 "movq %%r14, %%r10 \n\t" \
483 "adoxq " modulus_2 ", %%r14 \n\t" \
484 "adcxq 24(" b "), %%r15 \n\t" \
485 "movq %%r15, %%r11 \n\t" \
486 "adoxq " modulus_3 ", %%r15 \n\t" \
487 "cmovnoq %%r8, %%r12 \n\t" \
488 "cmovnoq %%r9, %%r13 \n\t" \
489 "cmovnoq %%r10, %%r14 \n\t" \
490 "cmovnoq %%r11, %%r15 \n\t"
497#define REDUCE_FIELD_ELEMENT(neg_modulus_0, neg_modulus_1, neg_modulus_2, neg_modulus_3) \
499 "movq %%r12, %%r8 \n\t" \
500 "movq %%r13, %%r9 \n\t" \
501 "movq %%r14, %%r10 \n\t" \
502 "movq %%r15, %%r11 \n\t" \
508 "adoxq " neg_modulus_0 ", %%r12 \n\t" \
509 "adoxq " neg_modulus_1 ", %%r13 \n\t" \
510 "adoxq " neg_modulus_2 ", %%r14 \n\t" \
511 "adoxq " neg_modulus_3 ", %%r15 \n\t" \
515 "cmovnoq %%r8, %%r12 \n\t" \
516 "cmovnoq %%r9, %%r13 \n\t" \
517 "cmovnoq %%r10, %%r14 \n\t" \
518 "cmovnoq %%r11, %%r15 \n\t"
526 "movq 0(" a "), %%rdx \n\t" \
528 "xorq %%r8, %%r8 \n\t" \
530 "mulxq 8(" a "), %%r9, %%r10 \n\t" \
531 "mulxq 16(" a "), %%r8, %%r15 \n\t" \
532 "mulxq 24(" a "), %%r11, %%r12 \n\t" \
536 "adoxq %%r8, %%r10 \n\t" \
537 "adcxq %%r15, %%r11 \n\t" \
538 "movq 8(" a "), %%rdx \n\t" \
539 "mulxq 16(" a "), %%r8, %%r15 \n\t" \
540 "mulxq 24(" a "), %%rdi, %%rcx \n\t" \
541 "movq 24(" a "), %%rdx \n\t" \
542 "mulxq 16(" a "), %%r13, %%r14 \n\t" \
543 "adoxq %%r8, %%r11 \n\t" \
544 "adcxq %%rdi, %%r12 \n\t" \
545 "adoxq %%r15, %%r12 \n\t" \
546 "adcxq %%rcx, %%r13 \n\t" \
547 "adoxq %[zero_reference], %%r13 \n\t" \
548 "adcxq %[zero_reference], %%r14 \n\t" \
549 "adoxq %[zero_reference], %%r14 \n\t" \
552 "adoxq %%r9, %%r9 \n\t" \
553 "adcxq %%r12, %%r12 \n\t" \
554 "adoxq %%r10, %%r10 \n\t" \
555 "adcxq %%r13, %%r13 \n\t" \
556 "adoxq %%r11, %%r11 \n\t" \
557 "adcxq %%r14, %%r14 \n\t" \
560 "movq 0(" a "), %%rdx \n\t" \
561 "mulxq %%rdx, %%r8, %%rcx \n\t" \
562 "movq 16(" a "), %%rdx \n\t" \
563 "mulxq %%rdx, %%rdx, %%rdi \n\t" \
565 "adcxq %%rcx, %%r9 \n\t" \
566 "adoxq %%rdx, %%r12 \n\t" \
567 "adoxq %%rdi, %%r13 \n\t" \
568 "movq 24(" a "), %%rdx \n\t" \
569 "mulxq %%rdx, %%rcx, %%r15 \n\t" \
570 "movq 8(" a "), %%rdx \n\t" \
571 "mulxq %%rdx, %%rdi, %%rdx \n\t" \
572 "adcxq %%rdi, %%r10 \n\t" \
573 "adcxq %%rdx, %%r11 \n\t" \
574 "adoxq %%rcx, %%r14 \n\t" \
575 "adoxq %[zero_reference], %%r15 \n\t" \
578 "movq %%r8, %%rdx \n\t" \
579 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
580 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
581 "adoxq %%rdi, %%r8 \n\t" \
582 "mulxq %[modulus_3], %%r8, %%rdi \n\t" \
583 "adcxq %%rdi, %%r12 \n\t" \
584 "adoxq %%rcx, %%r9 \n\t" \
585 "adcxq %[zero_reference], %%r13 \n\t" \
586 "adcxq %[zero_reference], %%r14 \n\t" \
587 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
588 "adcxq %[zero_reference], %%r15 \n\t" \
589 "adoxq %%rcx, %%r10 \n\t" \
590 "adcxq %%rdi, %%r9 \n\t" \
591 "adoxq %%r8, %%r11 \n\t" \
592 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
593 "adcxq %%rdi, %%r10 \n\t" \
594 "adcxq %%rcx, %%r11 \n\t" \
597 "movq %%r9, %%rdx \n\t" \
598 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
599 "mulxq %[modulus_2], %%rdi, %%rcx \n\t" \
600 "adoxq %%rcx, %%r12 \n\t" \
601 "mulxq %[modulus_3], %%r8, %%rcx \n\t" \
602 "adcxq %%r8, %%r12 \n\t" \
603 "adoxq %%rcx, %%r13 \n\t" \
604 "adcxq %[zero_reference], %%r13 \n\t" \
605 "adoxq %[zero_reference], %%r14 \n\t" \
606 "adcxq %[zero_reference], %%r14 \n\t" \
607 "adoxq %[zero_reference], %%r15 \n\t" \
608 "adcxq %[zero_reference], %%r15 \n\t" \
609 "mulxq %[modulus_0], %%r8, %%rcx \n\t" \
610 "adcxq %%r8, %%r9 \n\t" \
611 "adoxq %%rcx, %%r10 \n\t" \
612 "mulxq %[modulus_1], %%r8, %%rcx \n\t" \
613 "adcxq %%r8, %%r10 \n\t" \
614 "adoxq %%rcx, %%r11 \n\t" \
615 "adcxq %%rdi, %%r11 \n\t" \
618 "movq %%r10, %%rdx \n\t" \
619 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
620 "mulxq %[modulus_1], %%rdi, %%rcx \n\t" \
621 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
622 "adoxq %%rcx, %%r12 \n\t" \
623 "adcxq %%r8, %%r12 \n\t" \
624 "adoxq %%r9, %%r13 \n\t" \
625 "mulxq %[modulus_3], %%r8, %%r9 \n\t" \
626 "adcxq %%r8, %%r13 \n\t" \
627 "adoxq %%r9, %%r14 \n\t" \
628 "adcxq %[zero_reference], %%r14 \n\t" \
629 "adoxq %[zero_reference], %%r15 \n\t" \
630 "adcxq %[zero_reference], %%r15 \n\t" \
631 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
632 "adcxq %%r8, %%r10 \n\t" \
633 "adoxq %%r9, %%r11 \n\t" \
634 "adcxq %%rdi, %%r11 \n\t" \
635 "adoxq %[zero_reference], %%r12 \n\t" \
636 "adoxq %[zero_reference], %%r13 \n\t" \
639 "movq %%r11, %%rdx \n\t" \
640 "mulxq %[r_inv], %%rdx, %%rdi \n\t" \
641 "mulxq %[modulus_0], %%rdi, %%rcx \n\t" \
642 "mulxq %[modulus_1], %%r8, %%r9 \n\t" \
643 "adoxq %%rdi, %%r11 \n\t" \
644 "adcxq %%r8, %%r12 \n\t" \
645 "adoxq %%rcx, %%r12 \n\t" \
646 "adcxq %%r9, %%r13 \n\t" \
647 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
648 "mulxq %[modulus_3], %%r10, %%r11 \n\t" \
649 "adoxq %%r8, %%r13 \n\t" \
650 "adcxq %%r10, %%r14 \n\t" \
651 "adoxq %%r9, %%r14 \n\t" \
652 "adcxq %%r11, %%r15 \n\t" \
653 "adoxq %[zero_reference], %%r15 \n\t"
659#define MUL(a1, a2, a3, a4, b) \
660 "movq " a1 ", %%rdx \n\t" \
661 "xorq %%r8, %%r8 \n\t" \
663 "mulxq 0(" b "), %%r13, %%r14 \n\t" \
664 "mulxq 8(" b "), %%r8, %%r9 \n\t" \
665 "mulxq 16(" b "), %%r15, %%r10 \n\t" \
666 "mulxq 24(" b "), %%rdi, %%r12 \n\t" \
670 "movq %%r13, %%rdx \n\t" \
671 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
674 "adcxq %%r8, %%r14 \n\t" \
675 "adoxq %%rdi, %%r10 \n\t" \
676 "adcxq %%r9, %%r15 \n\t" \
679 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
680 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
681 "adcxq %%rdi, %%r10 \n\t" \
682 "adoxq %%r11, %%r12 \n\t" \
683 "adcxq %[zero_reference], %%r12 \n\t" \
684 "adoxq %%r8, %%r13 \n\t" \
685 "adcxq %%r9, %%r14 \n\t" \
686 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
687 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
688 "adoxq %%rdi, %%r14 \n\t" \
689 "adcxq %%r11, %%r15 \n\t" \
690 "adoxq %%r8, %%r15 \n\t" \
691 "adcxq %%r9, %%r10 \n\t" \
700 "movq " a2 ", %%rdx \n\t" \
701 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
702 "mulxq 24(" b "), %%rdi, %%r13 \n\t" \
703 "adoxq %%r8, %%r10 \n\t" \
704 "adcxq %%rdi, %%r12 \n\t" \
705 "adoxq %%r9, %%r12 \n\t" \
706 "adcxq %[zero_reference], %%r13 \n\t" \
707 "adoxq %[zero_reference], %%r13 \n\t" \
708 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
709 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
710 "adcxq %%r8, %%r14 \n\t" \
711 "adoxq %%r9, %%r15 \n\t" \
712 "adcxq %%rdi, %%r15 \n\t" \
713 "adoxq %%r11, %%r10 \n\t" \
716 "movq %%r14, %%rdx \n\t" \
717 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
718 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
719 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
720 "adcxq %%r8, %%r10 \n\t" \
721 "adoxq %%r9, %%r12 \n\t" \
722 "adcxq %%rdi, %%r12 \n\t" \
723 "adoxq %%r11, %%r13 \n\t" \
724 "adcxq %[zero_reference], %%r13 \n\t" \
725 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
726 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
727 "adoxq %%r8, %%r14 \n\t" \
728 "adcxq %%rdi, %%r15 \n\t" \
729 "adoxq %%r9, %%r15 \n\t" \
730 "adcxq %%r11, %%r10 \n\t" \
733 "movq " a3 ", %%rdx \n\t" \
734 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
735 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
736 "adoxq %%rdi, %%r10 \n\t" \
737 "adcxq %%r11, %%r12 \n\t" \
738 "adoxq %%r8, %%r12 \n\t" \
739 "adcxq %%r9, %%r13 \n\t" \
740 "mulxq 24(" b "), %%rdi, %%r14 \n\t" \
741 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
742 "adoxq %%rdi, %%r13 \n\t" \
743 "adcxq %[zero_reference], %%r14 \n\t" \
744 "adoxq %[zero_reference], %%r14 \n\t" \
745 "adcxq %%r8, %%r15 \n\t" \
746 "adoxq %%r9, %%r10 \n\t" \
749 "movq %%r15, %%rdx \n\t" \
750 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
751 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
752 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
753 "adcxq %%rdi, %%r10 \n\t" \
754 "adoxq %%r11, %%r12 \n\t" \
755 "adcxq %%r8, %%r12 \n\t" \
756 "adoxq %%r9, %%r13 \n\t" \
757 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
758 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
759 "adcxq %%rdi, %%r13 \n\t" \
760 "adoxq %%r11, %%r14 \n\t" \
761 "adcxq %[zero_reference], %%r14 \n\t" \
762 "adoxq %%r8, %%r15 \n\t" \
763 "adcxq %%r9, %%r10 \n\t" \
766 "movq " a4 ", %%rdx \n\t" \
767 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
768 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
769 "adoxq %%r8, %%r10 \n\t" \
770 "adcxq %%r9, %%r12 \n\t" \
771 "adoxq %%rdi, %%r12 \n\t" \
772 "adcxq %%r11, %%r13 \n\t" \
774 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
775 "mulxq 24(" b "), %%rdi, %%r15 \n\t" \
776 "adoxq %%r8, %%r13 \n\t" \
777 "adcxq %%r9, %%r14 \n\t" \
778 "adoxq %%rdi, %%r14 \n\t" \
779 "adcxq %[zero_reference], %%r15 \n\t" \
780 "adoxq %[zero_reference], %%r15 \n\t" \
783 "movq %%r10, %%rdx \n\t" \
784 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
785 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
786 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
787 "adoxq %%r8, %%r10 \n\t" \
788 "adcxq %%r9, %%r12 \n\t" \
789 "adoxq %%rdi, %%r12 \n\t" \
790 "adcxq %%r11, %%r13 \n\t" \
792 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
793 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
794 "adoxq %%r8, %%r13 \n\t" \
795 "adcxq %%r9, %%r14 \n\t" \
796 "adoxq %%rdi, %%r14 \n\t" \
797 "adcxq %%rdx, %%r15 \n\t" \
798 "adoxq %[zero_reference], %%r15 \n\t"
804#define MUL_FOO(a1, a2, a3, a4, b) \
805 "movq " a1 ", %%rdx \n\t" \
806 "xorq %%r8, %%r8 \n\t" \
808 "mulxq 0(" b "), %%r13, %%r14 \n\t" \
809 "mulxq 8(" b "), %%r8, %%r9 \n\t" \
810 "mulxq 16(" b "), %%r15, %%r10 \n\t" \
811 "mulxq 24(" b "), %%rdi, %%r12 \n\t" \
815 "movq %%r13, %%rdx \n\t" \
816 "mulxq %[r_inv], %%rdx, %%r11 \n\t" \
819 "adcxq %%r8, %%r14 \n\t" \
820 "adoxq %%rdi, %%r10 \n\t" \
821 "adcxq %%r9, %%r15 \n\t" \
824 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
825 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
826 "adcxq %%rdi, %%r10 \n\t" \
827 "adoxq %%r11, %%r12 \n\t" \
828 "adcxq %[zero_reference], %%r12 \n\t" \
829 "adoxq %%r8, %%r13 \n\t" \
830 "adcxq %%r9, %%r14 \n\t" \
831 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
832 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
833 "adoxq %%rdi, %%r14 \n\t" \
834 "adcxq %%r11, %%r15 \n\t" \
835 "adoxq %%r8, %%r15 \n\t" \
836 "adcxq %%r9, %%r10 \n\t" \
845 "movq " a2 ", %%rdx \n\t" \
846 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
847 "mulxq 24(" b "), %%rdi, %%r13 \n\t" \
848 "adoxq %%r8, %%r10 \n\t" \
849 "adcxq %%rdi, %%r12 \n\t" \
850 "adoxq %%r9, %%r12 \n\t" \
851 "adcxq %[zero_reference], %%r13 \n\t" \
852 "adoxq %[zero_reference], %%r13 \n\t" \
853 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
854 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
855 "adcxq %%r8, %%r14 \n\t" \
856 "adoxq %%r9, %%r15 \n\t" \
857 "adcxq %%rdi, %%r15 \n\t" \
858 "adoxq %%r11, %%r10 \n\t" \
861 "movq %%r14, %%rdx \n\t" \
862 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
863 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
864 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
865 "adcxq %%r8, %%r10 \n\t" \
866 "adoxq %%r9, %%r12 \n\t" \
867 "adcxq %%rdi, %%r12 \n\t" \
868 "adoxq %%r11, %%r13 \n\t" \
869 "adcxq %[zero_reference], %%r13 \n\t" \
870 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
871 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
872 "adoxq %%r8, %%r14 \n\t" \
873 "adcxq %%rdi, %%r15 \n\t" \
874 "adoxq %%r9, %%r15 \n\t" \
875 "adcxq %%r11, %%r10 \n\t" \
878 "movq " a3 ", %%rdx \n\t" \
879 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
880 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
881 "adoxq %%rdi, %%r10 \n\t" \
882 "adcxq %%r11, %%r12 \n\t" \
883 "adoxq %%r8, %%r12 \n\t" \
884 "adcxq %%r9, %%r13 \n\t" \
885 "mulxq 24(" b "), %%rdi, %%r14 \n\t" \
886 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
887 "adoxq %%rdi, %%r13 \n\t" \
888 "adcxq %[zero_reference], %%r14 \n\t" \
889 "adoxq %[zero_reference], %%r14 \n\t" \
890 "adcxq %%r8, %%r15 \n\t" \
891 "adoxq %%r9, %%r10 \n\t" \
894 "movq %%r15, %%rdx \n\t" \
895 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
896 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
897 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
898 "adcxq %%rdi, %%r10 \n\t" \
899 "adoxq %%r11, %%r12 \n\t" \
900 "adcxq %%r8, %%r12 \n\t" \
901 "adoxq %%r9, %%r13 \n\t" \
902 "mulxq %[modulus_3], %%rdi, %%r11 \n\t" \
903 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
904 "adcxq %%rdi, %%r13 \n\t" \
905 "adoxq %%r11, %%r14 \n\t" \
906 "adcxq %[zero_reference], %%r14 \n\t" \
907 "adoxq %%r8, %%r15 \n\t" \
908 "adcxq %%r9, %%r10 \n\t" \
911 "movq " a4 ", %%rdx \n\t" \
912 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
913 "mulxq 8(" b "), %%rdi, %%r11 \n\t" \
914 "adoxq %%r8, %%r10 \n\t" \
915 "adcxq %%r9, %%r12 \n\t" \
916 "adoxq %%rdi, %%r12 \n\t" \
917 "adcxq %%r11, %%r13 \n\t" \
919 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
920 "mulxq 24(" b "), %%rdi, %%r15 \n\t" \
921 "adoxq %%r8, %%r13 \n\t" \
922 "adcxq %%r9, %%r14 \n\t" \
923 "adoxq %%rdi, %%r14 \n\t" \
924 "adcxq %[zero_reference], %%r15 \n\t" \
925 "adoxq %[zero_reference], %%r15 \n\t" \
928 "movq %%r10, %%rdx \n\t" \
929 "mulxq %[r_inv], %%rdx, %%r8 \n\t" \
930 "mulxq %[modulus_0], %%r8, %%r9 \n\t" \
931 "mulxq %[modulus_1], %%rdi, %%r11 \n\t" \
932 "adoxq %%r8, %%r10 \n\t" \
933 "adcxq %%r9, %%r12 \n\t" \
934 "adoxq %%rdi, %%r12 \n\t" \
935 "adcxq %%r11, %%r13 \n\t" \
937 "mulxq %[modulus_2], %%r8, %%r9 \n\t" \
938 "mulxq %[modulus_3], %%rdi, %%rdx \n\t" \
939 "adoxq %%r8, %%r13 \n\t" \
940 "adcxq %%r9, %%r14 \n\t" \
941 "adoxq %%rdi, %%r14 \n\t" \
942 "adcxq %%rdx, %%r15 \n\t" \
943 "adoxq %[zero_reference], %%r15 \n\t"
949#define MUL_256(a, b, r) \
950 "movq 0(" a "), %%rdx \n\t" \
953 "mulxq 8(" b "), %%r8, %%r9 \n\t" \
954 "mulxq 24(" b "), %%rdi, %%r12 \n\t" \
955 "mulxq 0(" b "), %%r13, %%r14 \n\t" \
956 "mulxq 16(" b "), %%r15, %%rax \n\t" \
958 "xorq %%r10, %%r10 \n\t" \
962 "adcxq %%r8, %%r14 \n\t" \
963 "adoxq %%rdi, %%rax \n\t" \
964 "adcxq %%r9, %%r15 \n\t" \
965 "adcxq %%r10, %%rax \n\t" \
968 "movq 8(" a "), %%rdx \n\t" \
969 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
970 "mulxq 8(" b "), %%rdi, %%rsi \n\t" \
971 "adcxq %%r8, %%r14 \n\t" \
972 "adoxq %%r9, %%r15 \n\t" \
973 "adcxq %%rdi, %%r15 \n\t" \
974 "adoxq %%rsi, %%rax \n\t" \
976 "mulxq 16(" b "), %%r8, %%r9 \n\t" \
977 "adcxq %%r8, %%rax \n\t" \
980 "movq 16(" a "), %%rdx \n\t" \
981 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
982 "mulxq 8(" b "), %%rdi, %%rsi \n\t" \
983 "adcxq %%r8, %%r15 \n\t" \
984 "adoxq %%r9, %%rax \n\t" \
985 "adcxq %%rdi, %%rax \n\t" \
989 "movq 24(" a "), %%rdx \n\t" \
990 "mulxq 0(" b "), %%r8, %%r9 \n\t" \
991 "adcxq %%r8, %%rax \n\t" \
992 "movq %%r13, 0(" r ") \n\t" \
993 "movq %%r14, 8(" r ") \n\t" \
994 "movq %%r15, 16(" r ") \n\t" \
995 "movq %%rax, 24(" r ") \n\t"