Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
blake3-impl.hpp
Go to the documentation of this file.
1// === AUDIT STATUS ===
2// internal: { status: not started, auditors: [], date: YYYY-MM-DD }
3// external_1: { status: not started, auditors: [], date: YYYY-MM-DD }
4// external_2: { status: not started, auditors: [], date: YYYY-MM-DD }
5// =====================
6
7#pragma once
8/*
9 BLAKE3 reference source code package - C implementations
10
11 Intellectual property:
12
13 The Rust code is copyright Jack O'Connor, 2019-2020.
14 The C code is copyright Samuel Neves and Jack O'Connor, 2019-2020.
15 The assembly code is copyright Samuel Neves, 2019-2020.
16
17 This work is released into the public domain with CC0 1.0. Alternatively, it is licensed under the Apache
18 License 2.0.
19
20 - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
21 - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
22
23 More information about the BLAKE3 hash function can be found at
24 https://github.com/BLAKE3-team/BLAKE3.
25*/
26
27#ifndef BLAKE3_IMPL_H
28#define BLAKE3_IMPL_H
29
30#include <assert.h>
31#include <stdbool.h>
32#include <stddef.h>
33#include <stdint.h>
34#include <string.h>
35
36#include "blake3s.hpp"
37
38namespace blake3_full {
39
40// This C implementation tries to support recent versions of GCC, Clang, and
41// MSVC.
42#if defined(_MSC_VER)
43#define INLINE static __forceinline
44#else
45#define INLINE static inline __attribute__((always_inline))
46#endif
47
48#if defined(__x86_64__) || defined(_M_X64)
49#define IS_X86
50#define IS_X86_64
51#endif
52
53#if defined(__i386__) || defined(_M_IX86)
54#define IS_X86
55#define IS_X86_32
56#endif
57
58#if defined(IS_X86)
59#if defined(_MSC_VER)
60#include <intrin.h>
61#endif
62#include <immintrin.h>
63#endif
64
65// #if defined(IS_X86)
66// #define MAX_SIMD_DEGREE 16
67// #elif defined(BLAKE3_USE_NEON)
68// #define MAX_SIMD_DEGREE 4
69// #else
70#define MAX_SIMD_DEGREE 1
71// #endif
72
73// There are some places where we want a static size that's equal to the
74// MAX_SIMD_DEGREE, but also at least 2.
75#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
76
77// The dynamically detected SIMD degree of the current platform.
78/*
79 * Commenting out unnecessary parts as we currently don't need SIMD fo
80 * different hardwares. To be revisited later.
81 *
82 */
84{
85 return 1;
86 // #if defined(IS_X86)
87 // const enum cpu_feature features = get_cpu_features();
88 // MAYBE_UNUSED(features);
89 // #if !defined(BLAKE3_NO_AVX512)
90 // if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
91 // return 16;
92 // }
93 // #endif
94 // #if !defined(BLAKE3_NO_AVX2)
95 // if (features & AVX2) {
96 // return 8;
97 // }
98 // #endif
99 // #if !defined(BLAKE3_NO_SSE41)
100 // if (features & SSE41) {
101 // return 4;
102 // }
103 // #endif
104 // #if !defined(BLAKE3_NO_SSE2)
105 // if (features & SSE2) {
106 // return 4;
107 // }
108 // #endif
109 // #endif
110 // #if defined(BLAKE3_USE_NEON)
111 // return 4;
112 // #endif
113 // return 1;
114}
115
116/*----------------------------------------------------------------
117 *
118 * Commenting out as we currently don't need SIMD for different hardwares.
119 * To be revisited later.
120 *
121
122enum cpu_feature get_cpu_features() {
123 if (g_cpu_features != UNDEFINED) {
124 return g_cpu_features;
125 } else {
126#if defined(IS_X86)
127 uint32_t regs[4] = {0};
128 uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
129 (void)edx;
130 enum cpu_feature features = 0;
131 cpuid(regs, 0);
132 const int max_id = *eax;
133 cpuid(regs, 1);
134#if defined(__amd64__) || defined(_M_X64)
135 features |= SSE2;
136#else
137 if (*edx & (1UL << 26))
138 features |= SSE2;
139#endif
140 if (*ecx & (1UL << 0))
141 features |= SSSE3;
142 if (*ecx & (1UL << 19))
143 features |= SSE41;
144
145 if (*ecx & (1UL << 27)) { // OSXSAVE
146 const uint64_t mask = xgetbv();
147 if ((mask & 6) == 6) { // SSE and AVX states
148 if (*ecx & (1UL << 28))
149 features |= AVX;
150 if (max_id >= 7) {
151 cpuidex(regs, 7, 0);
152 if (*ebx & (1UL << 5))
153 features |= AVX2;
154 if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
155 if (*ebx & (1UL << 31))
156 features |= AVX512VL;
157 if (*ebx & (1UL << 16))
158 features |= AVX512F;
159 }
160 }
161 }
162 }
163 g_cpu_features = features;
164 return features;
165#else
166 // How to detect NEON?
167 return 0;
168#endif
169 }
170}
171----------------------------------------------------------------*/
172
173/* Find index of the highest set bit */
174/* x is assumed to be nonzero. */
175static unsigned int highest_one(uint64_t x)
176{
177#if defined(__GNUC__) || defined(__clang__)
178 return uint32_t(63) ^ uint32_t(__builtin_clzll(x));
179#elif defined(_MSC_VER) && defined(IS_X86_64)
180 unsigned long index;
181 _BitScanReverse64(&index, x);
182 return index;
183#elif defined(_MSC_VER) && defined(IS_X86_32)
184 if (x >> 32) {
185 unsigned long index;
186 _BitScanReverse(&index, x >> 32);
187 return 32 + index;
188 } else {
189 unsigned long index;
190 _BitScanReverse(&index, x);
191 return index;
192 }
193#else
194 unsigned int c = 0;
195 if (x & 0xffffffff00000000ULL) {
196 x >>= 32;
197 c += 32;
198 }
199 if (x & 0x00000000ffff0000ULL) {
200 x >>= 16;
201 c += 16;
202 }
203 if (x & 0x000000000000ff00ULL) {
204 x >>= 8;
205 c += 8;
206 }
207 if (x & 0x00000000000000f0ULL) {
208 x >>= 4;
209 c += 4;
210 }
211 if (x & 0x000000000000000cULL) {
212 x >>= 2;
213 c += 2;
214 }
215 if (x & 0x0000000000000002ULL) {
216 c += 1;
217 }
218 return c;
219#endif
220}
221
222// Count the number of 1 bits.
223INLINE unsigned int popcnt(uint64_t x)
224{
225#if defined(__GNUC__) || defined(__clang__)
226 return uint32_t(__builtin_popcountll(x));
227#else
228 unsigned int count = 0;
229 while (x != 0) {
230 count += 1;
231 x &= x - 1;
232 }
233 return count;
234#endif
235}
236
237// Right rotates 32 bit inputs
238INLINE uint32_t rotr32(uint32_t w, uint32_t c)
239{
240 return (w >> c) | (w << (32 - c));
241}
242
243// Largest power of two less than or equal to x. As a special case, returns 1
244// when x is 0.
246{
247 return 1ULL << highest_one(x | 1);
248}
249
250INLINE uint32_t counter_low(uint64_t counter)
251{
252 return (uint32_t)counter;
253}
254
255INLINE uint32_t counter_high(uint64_t counter)
256{
257 return (uint32_t)(counter >> 32);
258}
259
260INLINE uint32_t load32(const void* src)
261{
262 const uint8_t* p = (const uint8_t*)src;
263 return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) | ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
264}
265
266INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], uint32_t key_words[8])
267{
268 key_words[0] = load32(&key[0 * 4]);
269 key_words[1] = load32(&key[1 * 4]);
270 key_words[2] = load32(&key[2 * 4]);
271 key_words[3] = load32(&key[3 * 4]);
272 key_words[4] = load32(&key[4 * 4]);
273 key_words[5] = load32(&key[5 * 4]);
274 key_words[6] = load32(&key[6 * 4]);
275 key_words[7] = load32(&key[7 * 4]);
276}
277
278INLINE void store32(void* dst, uint32_t w)
279{
280 uint8_t* p = (uint8_t*)dst;
281 p[0] = (uint8_t)(w >> 0);
282 p[1] = (uint8_t)(w >> 8);
283 p[2] = (uint8_t)(w >> 16);
284 p[3] = (uint8_t)(w >> 24);
285}
286
287INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8])
288{
289 store32(&bytes_out[0 * 4], cv_words[0]);
290 store32(&bytes_out[1 * 4], cv_words[1]);
291 store32(&bytes_out[2 * 4], cv_words[2]);
292 store32(&bytes_out[3 * 4], cv_words[3]);
293 store32(&bytes_out[4 * 4], cv_words[4]);
294 store32(&bytes_out[5 * 4], cv_words[5]);
295 store32(&bytes_out[6 * 4], cv_words[6]);
296 store32(&bytes_out[7 * 4], cv_words[7]);
297}
298
299} // namespace blake3_full
300
301#endif /* BLAKE3_IMPL_H */
#define INLINE
INLINE void store32(void *dst, uint32_t w)
INLINE unsigned int popcnt(uint64_t x)
size_t blake3_simd_degree(void)
INLINE uint64_t round_down_to_power_of_2(uint64_t x)
INLINE uint32_t load32(const void *src)
INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8])
INLINE uint32_t counter_high(uint64_t counter)
INLINE uint32_t counter_low(uint64_t counter)
INLINE uint32_t rotr32(uint32_t w, uint32_t c)
INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], uint32_t key_words[8])