Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
fr.bench.cpp
Go to the documentation of this file.
1// === AUDIT STATUS ===
2// internal: { status: not started, auditors: [], date: YYYY-MM-DD }
3// external_1: { status: not started, auditors: [], date: YYYY-MM-DD }
4// external_2: { status: not started, auditors: [], date: YYYY-MM-DD }
5// =====================
6
7#include "fr.hpp"
8#include <benchmark/benchmark.h>
9
10using namespace benchmark;
11
12/*
13ADX asm
14------------------------------------------------------------
15Benchmark Time CPU Iterations
16------------------------------------------------------------
17sqr_assign_bench 58897282 ns 59659091 ns 11
18sqr_bench 80042911 ns 79861111 ns 9
19unary_minus_bench 33735725 ns 34375000 ns 20
20mul_assign_bench 61236664 ns 61079545 ns 11
21mul_bench 81490829 ns 80357143 ns 7
22add_bench 31052175 ns 31250000 ns 24
23sub_bench 33843720 ns 33593750 ns 20
24invert_bench 5008074600 ns 5000000000 ns 1
25pow_bench 6020118300 ns 6015625000 ns 1
26field_bench 916661900 ns 921875000 ns 1
27-------------------------------------------------------------------------------------
28Benchmark Time CPU Iterations
29-------------------------------------------------------------------------------------
30pippenger_bench/8192 16347884 ns 16047297 ns 37
31pippenger_bench/16384 29240762 ns 29296875 ns 24
32pippenger_bench/32768 57061918 ns 55397727 ns 11
33pippenger_bench/65536 106352929 ns 104910714 ns 7
34pippenger_bench/131072 205578500 ns 203125000 ns 4
35pippenger_bench/262144 352079750 ns 351562500 ns 2
36pippenger_bench/524288 691483800 ns 687500000 ns 1
37pippenger_bench/1048576 1333215400 ns 1250000000 ns 1
38unsafe pippenger. 1048576 points. clock cycles = 1898152911
39unsafe pippenger clock cycles per mul = 1810
40unsafe_pippenger_bench/1048576 898753700 ns 875000000 ns 1
41*/
42/*
43Generic
44------------------------------------------------------------
45Benchmark Time CPU Iterations
46------------------------------------------------------------
47sqr_assign_bench 109600860 ns 109375000 ns 5
48sqr_bench 112329383 ns 111979167 ns 6
49unary_minus_bench 29771167 ns 29296875 ns 24
50mul_assign_bench 111395033 ns 111979167 ns 6
51mul_bench 109264250 ns 109375000 ns 6
52add_bench 29478508 ns 29947917 ns 24
53sub_bench 32852481 ns 32738095 ns 21
54invert_bench 10354704900 ns 10328125000 ns 1
55pow_bench 12036579200 ns 12031250000 ns 1
56field_bench 1557337500 ns 1562500000 ns 1
57-------------------------------------------------------------------------------------
58Benchmark Time CPU Iterations
59-------------------------------------------------------------------------------------
60pippenger_bench/8192 27198528 ns 25862069 ns 29
61pippenger_bench/16384 51719409 ns 48295455 ns 11
62pippenger_bench/32768 87673922 ns 86805556 ns 9
63pippenger_bench/65536 169227125 ns 160156250 ns 4
64pippenger_bench/131072 322899500 ns 328125000 ns 2
65pippenger_bench/262144 615274500 ns 546875000 ns 1
66pippenger_bench/524288 1119308100 ns 1078125000 ns 1
67pippenger_bench/1048576 2145468700 ns 2078125000 ns 1
68unsafe pippenger. 1048576 points. clock cycles = 3626871186
69unsafe pippenger clock cycles per mul = 3458
70unsafe_pippenger_bench/1048576 1717275300 ns 1640625000 ns 1
71*/
72using namespace bb;
73
74void field_mixed_add(const fr& x1, const fr& y1, const fr& z1, const fr& x2, const fr& y2, fr& x3, fr& y3, fr& z3)
75{
76 fr T0;
77 fr T1;
78 fr T2;
79 fr T3;
80
81 // 3 sqr // 90 cycles
82 // 1 self sqr // 30 cycles
83 // 2 * // 72 cycles
84 // 5 *= // 180 cycles
85 // 1 - // 6 cycles
86 // 5 -= // 36 cycles
87 // 2 + // 12 cycles
88 // 6 += // 36 cycles
89 // 22 + 340 + 100 = 462 cycles
90 T0 = z1.sqr();
91 T1 = x2 * T0;
92 T1 -= x1;
93 T2 = z1 * T0;
94 T2 *= y2;
95 T2 -= y1;
96 z3 = z1 + T1;
97 T2 += T2;
98 T3 = T1.sqr();
99 T0 += T3;
100 z3.self_sqr();
101 z3 -= T0;
102 T3 += T3;
103 T3 += T3;
104 T1 *= T3;
105 T3 *= x1;
106 T0 = T3 + T3;
107 T0 += T1;
108 x3 = T2.sqr();
109 x3 -= T0;
110 T3 -= x3;
111 T1 *= y1;
112 T1 += T1;
113 T3 *= T2;
114 y3 = T3 - T1;
115}
116
117uint64_t rdtsc()
118{
119#ifdef __aarch64__
120 uint64_t pmccntr;
121 __asm__ __volatile__("mrs %0, pmccntr_el0" : "=r"(pmccntr));
122 return pmccntr;
123#elif __x86_64__
124 unsigned int lo = 0;
125 unsigned int hi = 0;
126 __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
127 return (static_cast<uint64_t>(hi) << 32) | lo;
128#else
129 return 0;
130#endif
131}
132
133constexpr size_t NUM_POINTS = 1 << 24;
134constexpr size_t NUM_INVERSIONS = 1 << 20;
137
141const auto init = []() {
142 fr seed_x = fr::random_element();
143 fr seed_y = fr::random_element();
144 fr seed_z = fr::random_element();
145 accx = seed_x;
146 accy = seed_y;
147 accz = seed_z;
148 for (size_t i = 0; i < NUM_POINTS; ++i) {
149 oldx.emplace_back(accx);
150 oldy.emplace_back(accy);
151
152 accx = accx * seed_x;
153 accy = accy * seed_y;
154 accz = accz * seed_z;
155 }
156 return 1;
157}();
158
160{
161 fr acc = x;
162 for (size_t i = 0; i < NUM_POINTS; ++i) {
163 acc.self_sqr();
164 }
165 return acc;
166}
167void sqr_assign_bench(State& state) noexcept
168{
169 uint64_t clocks = 0;
170 uint64_t count = 0;
171 for (auto _ : state) {
172 uint64_t before = rdtsc();
173 DoNotOptimize(sqr_assign_impl(accx));
174 clocks += (rdtsc() - before);
175 ++count;
176 }
177 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
178 std::cout << "sqr_assign clocks per operation = " << average << std::endl;
179}
181
182fr sqr_impl(const fr& x)
183{
184 fr acc = x;
185 for (size_t i = 0; i < NUM_POINTS; ++i) {
186 acc = acc.sqr();
187 }
188 return acc;
189}
190void sqr_bench(State& state) noexcept
191{
192 uint64_t clocks = 0;
193 uint64_t count = 0;
194 for (auto _ : state) {
195 uint64_t before = rdtsc();
196 DoNotOptimize(sqr_impl(accx));
197 clocks += (rdtsc() - before);
198 ++count;
199 }
200 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
201 std::cout << "sqr clocks per operation = " << average << std::endl;
202}
204
206{
207 fr acc = x;
208 for (size_t i = 0; i < NUM_POINTS; ++i) {
209 acc = -acc;
210 }
211 return acc;
212}
213void unary_minus_bench(State& state) noexcept
214{
215 uint64_t clocks = 0;
216 uint64_t count = 0;
217 for (auto _ : state) {
218 uint64_t before = rdtsc();
219 DoNotOptimize(unary_minus_impl(accx));
220 clocks += (rdtsc() - before);
221 ++count;
222 }
223 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
224 std::cout << "unary minus clocks per operation = " << average << std::endl;
225}
227
228fr mul_assign_impl(const fr& x, const fr& y)
229{
230 fr acc = x;
231 for (size_t i = 0; i < NUM_POINTS; ++i) {
232 acc *= y;
233 }
234 return acc;
235}
236void mul_assign_bench(State& state) noexcept
237{
238 uint64_t clocks = 0;
239 uint64_t count = 0;
240 for (auto _ : state) {
241 uint64_t before = rdtsc();
242 DoNotOptimize(mul_assign_impl(accx, accy));
243 clocks += (rdtsc() - before);
244 ++count;
245 }
246 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
247 std::cout << "mul assign clocks per operation = " << average << std::endl;
248}
250
251fr mul_impl(const fr& x, const fr& y)
252{
253 fr acc = x;
254 for (size_t i = 0; i < NUM_POINTS; ++i) {
255 acc = acc * y;
256 }
257 return acc;
258}
259
260void mul_bench(State& state) noexcept
261{
262 uint64_t clocks = 0;
263 uint64_t count = 0;
264 for (auto _ : state) {
265 uint64_t before = rdtsc();
266 DoNotOptimize(mul_impl(accx, accy));
267 clocks += (rdtsc() - before);
268 ++count;
269 }
270 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
271 std::cout << "mul clocks per operation = " << average << std::endl;
272}
274
275fr self_add_impl(const fr& x, fr& y)
276{
277 fr acc = x;
278 for (size_t i = 0; i < NUM_POINTS; ++i) {
279 acc += y;
280 }
281 return acc;
282}
283
284void self_add_bench(State& state) noexcept
285{
286 uint64_t clocks = 0;
287 uint64_t count = 0;
288 for (auto _ : state) {
289 uint64_t before = rdtsc();
290 DoNotOptimize(self_add_impl(accx, accy));
291 clocks += (rdtsc() - before);
292 ++count;
293 }
294 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
295 std::cout << "self add clocks per operation = " << average << std::endl;
296}
298
299fr add_impl(const fr& x, fr& y)
300{
301 fr acc = x;
302 for (size_t i = 0; i < NUM_POINTS; ++i) {
303 acc = acc + y;
304 }
305 return acc;
306}
307
308void add_bench(State& state) noexcept
309{
310 uint64_t clocks = 0;
311 uint64_t count = 0;
312 for (auto _ : state) {
313 uint64_t before = rdtsc();
314 DoNotOptimize(add_impl(accx, accy));
315 clocks += (rdtsc() - before);
316 ++count;
317 }
318 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
319 std::cout << "add clocks per operation = " << average << std::endl;
320}
322
323fr sub_impl(const fr& x, fr& y)
324{
325 fr acc = x;
326 for (size_t i = 0; i < NUM_POINTS; ++i) {
327 acc = acc - y;
328 }
329 return acc;
330}
331
332void sub_bench(State& state) noexcept
333{
334 uint64_t clocks = 0;
335 uint64_t count = 0;
336 for (auto _ : state) {
337 uint64_t before = rdtsc();
338 DoNotOptimize(sub_impl(accx, accy));
339 clocks += (rdtsc() - before);
340 ++count;
341 }
342 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
343 std::cout << "sub clocks per operation = " << average << std::endl;
344}
346
347fr addaddmul_impl(const fr& x, const fr& y, const fr& z)
348{
349 fr acc = x;
350 for (size_t i = 0; i < NUM_POINTS; ++i) {
351 acc *= y;
352 acc += z;
353 acc += y;
354 }
355 return acc;
356}
357
358void addaddmul_bench(State& state) noexcept
359{
360 uint64_t clocks = 0;
361 uint64_t count = 0;
362 for (auto _ : state) {
363 uint64_t before = rdtsc();
364 DoNotOptimize(addaddmul_impl(accx, accy, accz));
365 clocks += (rdtsc() - before);
366 ++count;
367 }
368 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
369 std::cout << "field clocks per call = " << average << std::endl;
370}
372
373fr subaddmul_impl(const fr& x, const fr& y, const fr& z)
374{
375 fr acc = x;
376 for (size_t i = 0; i < NUM_POINTS; ++i) {
377 acc *= y;
378 acc -= z;
379 acc += y;
380 }
381 return acc;
382}
383
384void subaddmul_bench(State& state) noexcept
385{
386 uint64_t clocks = 0;
387 uint64_t count = 0;
388 for (auto _ : state) {
389 uint64_t before = rdtsc();
390 DoNotOptimize(subaddmul_impl(accx, accy, accz));
391 clocks += (rdtsc() - before);
392 ++count;
393 }
394 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
395 std::cout << "field clocks per call = " << average << std::endl;
396}
398
399void field_bench(State& state) noexcept
400{
401 uint64_t clocks = 0;
402 uint64_t count = 0;
403 for (auto _ : state) {
404 uint64_t before = rdtsc();
405 fr x = accx;
406 fr y = accy;
407 fr z = accz;
408 for (size_t i = 0; i < NUM_POINTS; ++i) {
409 field_mixed_add(x, y, z, oldx[i], oldy[i], x, y, z);
410 }
411 DoNotOptimize(z);
412 clocks += (rdtsc() - before);
413 ++count;
414 }
415 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
416 std::cout << "field clocks per call = " << average << std::endl;
417}
419
420void invert_bench(State& state) noexcept
421{
422 for (auto _ : state) {
423 fr x = accx;
424 for (size_t i = 0; i < NUM_INVERSIONS; ++i) {
425 x = x.invert();
426 }
427 DoNotOptimize(x);
428 }
429}
431
432void pow_bench(State& state) noexcept
433{
434 for (auto _ : state) {
435 constexpr uint256_t exponent = fr::modulus - uint256_t(2);
436 fr x = accx;
437 for (size_t i = 0; i < NUM_INVERSIONS; ++i) {
438 x = x.pow(exponent);
439 }
440 DoNotOptimize(x);
441 }
442}
444
445void hash_bench(State& state) noexcept
446{
447 for (auto _ : state) {
448 state.PauseTiming();
450 state.ResumeTiming();
451 DoNotOptimize(std::hash<fr>{}(a));
452 }
453}
455
456// NOLINTNEXTLINE macro invokation triggers style guideline errors from googletest code
FF a
fr add_impl(const fr &x, fr &y)
Definition fr.bench.cpp:299
std::vector< bb::fr > oldx
Definition fr.bench.cpp:135
void sqr_bench(State &state) noexcept
Definition fr.bench.cpp:190
void addaddmul_bench(State &state) noexcept
Definition fr.bench.cpp:358
fr accy
Definition fr.bench.cpp:139
void field_bench(State &state) noexcept
Definition fr.bench.cpp:399
std::vector< bb::fr > oldy
Definition fr.bench.cpp:136
fr sqr_impl(const fr &x)
Definition fr.bench.cpp:182
void sub_bench(State &state) noexcept
Definition fr.bench.cpp:332
void self_add_bench(State &state) noexcept
Definition fr.bench.cpp:284
fr unary_minus_impl(const fr &x)
Definition fr.bench.cpp:205
fr accz
Definition fr.bench.cpp:140
void invert_bench(State &state) noexcept
Definition fr.bench.cpp:420
const auto init
Definition fr.bench.cpp:141
uint64_t rdtsc()
Definition fr.bench.cpp:117
void sqr_assign_bench(State &state) noexcept
Definition fr.bench.cpp:167
BENCHMARK_MAIN()
fr subaddmul_impl(const fr &x, const fr &y, const fr &z)
Definition fr.bench.cpp:373
fr mul_impl(const fr &x, const fr &y)
Definition fr.bench.cpp:251
void add_bench(State &state) noexcept
Definition fr.bench.cpp:308
fr addaddmul_impl(const fr &x, const fr &y, const fr &z)
Definition fr.bench.cpp:347
void field_mixed_add(const fr &x1, const fr &y1, const fr &z1, const fr &x2, const fr &y2, fr &x3, fr &y3, fr &z3)
Definition fr.bench.cpp:74
fr sub_impl(const fr &x, fr &y)
Definition fr.bench.cpp:323
fr mul_assign_impl(const fr &x, const fr &y)
Definition fr.bench.cpp:228
void mul_bench(State &state) noexcept
Definition fr.bench.cpp:260
void unary_minus_bench(State &state) noexcept
Definition fr.bench.cpp:213
constexpr size_t NUM_INVERSIONS
Definition fr.bench.cpp:134
void mul_assign_bench(State &state) noexcept
Definition fr.bench.cpp:236
fr accx
Definition fr.bench.cpp:138
fr self_add_impl(const fr &x, fr &y)
Definition fr.bench.cpp:275
constexpr size_t NUM_POINTS
Definition fr.bench.cpp:133
fr sqr_assign_impl(const fr &x)
Definition fr.bench.cpp:159
void pow_bench(State &state) noexcept
Definition fr.bench.cpp:432
void subaddmul_bench(State &state) noexcept
Definition fr.bench.cpp:384
void hash_bench(State &state) noexcept
Definition fr.bench.cpp:445
Entry point for Barretenberg command-line interface.
BENCHMARK(vector_of_evaluations) -> DenseRange(15, 21) ->Unit(kMillisecond) ->Iterations(1)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
static constexpr uint256_t modulus
BB_INLINE constexpr field pow(const uint256_t &exponent) const noexcept
BB_INLINE constexpr void self_sqr() &noexcept
constexpr field invert() const noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
BB_INLINE constexpr field sqr() const noexcept