Barretenberg: src/barretenberg/ecc/groups/element_impl.hpp Source File

// === AUDIT STATUS ===

// internal:    { status: not started, auditors: [], date: YYYY-MM-DD }

// external_1:  { status: not started, auditors: [], date: YYYY-MM-DD }

// external_2:  { status: not started, auditors: [], date: YYYY-MM-DD }

// =====================


#pragma once

#include "barretenberg/common/assert.hpp"

#include "barretenberg/common/op_count.hpp"

#include "barretenberg/common/thread.hpp"

#include "barretenberg/ecc/groups/element.hpp"

#include "element.hpp"

#include <cstdint>


// NOLINTBEGIN(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)

namespace bb::group_elements {

template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>::element(const Fq& a, const Fq& b, const Fq& c) noexcept

    : x(a)

    , y(b)

    , z(c)

{}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>::element(const element& other) noexcept

    : x(other.x)

    , y(other.y)

    , z(other.z)

{}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>::element(element&& other) noexcept

    : x(other.x)

    , y(other.y)

    , z(other.z)

{}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T>::element(const affine_element<Fq, Fr, T>& other) noexcept

    : x(other.x)

    , y(other.y)

    , z(Fq::one())

{}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>& element<Fq, Fr, T>::operator=(const element& other) noexcept

{

    if (this == &other) {

        return *this;

    }

    x = other.x;

    y = other.y;

    z = other.z;

    return *this;

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T>& element<Fq, Fr, T>::operator=(element&& other) noexcept

{

    x = other.x;

    y = other.y;

    z = other.z;

    return *this;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T>::operator affine_element<Fq, Fr, T>() const noexcept

{

    if (is_point_at_infinity()) {

        affine_element<Fq, Fr, T> result;

        result.x = Fq(0);

        result.y = Fq(0);

        result.self_set_infinity();

        return result;

    }

    Fq z_inv = z.invert();

    Fq zz_inv = z_inv.sqr();

    Fq zzz_inv = zz_inv * z_inv;

    affine_element<Fq, Fr, T> result(x * zz_inv, y * zzz_inv);

    return result;

}


template <class Fq, class Fr, class T> constexpr void element<Fq, Fr, T>::self_dbl() noexcept

{

    if constexpr (Fq::modulus.data[3] >= 0x4000000000000000ULL) {

        if (is_point_at_infinity()) {

            return;

        }

    } else {

        if (x.is_msb_set_word()) {

            return;

        }

    }


    // T0 = x*x

    Fq T0 = x.sqr();


    // T1 = y*y

    Fq T1 = y.sqr();


    // T2 = T2*T1 = y*y*y*y

    Fq T2 = T1.sqr();


    // T1 = T1 + x = x + y*y

    T1 += x;


    // T1 = T1 * T1

    T1.self_sqr();


    // T3 = T0 + T2 = xx + y*y*y*y

    Fq T3 = T0 + T2;


    // T1 = T1 - T3 = x*x + y*y*y*y + 2*x*x*y*y*y*y - x*x - y*y*y*y = 2*x*x*y*y*y*y = 2*S

    T1 -= T3;


    // T1 = 2T1 = 4*S

    T1 += T1;


    // T3 = 3T0

    T3 = T0 + T0;

    T3 += T0;

    if constexpr (T::has_a) {

        T3 += (T::a * z.sqr().sqr());

    }


    // z2 = 2*y*z

    z += z;

    z *= y;


    // T0 = 2T1

    T0 = T1 + T1;


    // x2 = T3*T3

    x = T3.sqr();


    // x2 = x2 - 2T1

    x -= T0;


    // T2 = 8T2

    T2 += T2;

    T2 += T2;

    T2 += T2;


    // y2 = T1 - x2

    y = T1 - x;


    // y2 = y2 * T3 - T2

    y *= T3;

    y -= T2;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::dbl() const noexcept

{

    element result(*this);

    result.self_dbl();

    return result;

}


template <class Fq, class Fr, class T>


constexpr void element<Fq, Fr, T>::self_mixed_add_or_sub(const affine_element<Fq, Fr, T>& other,

                                                         const uint64_t predicate) noexcept

{

    if constexpr (Fq::modulus.data[3] >= 0x4000000000000000ULL) {

        if (is_point_at_infinity()) {

            conditional_negate_affine(other, *(affine_element<Fq, Fr, T>*)this, predicate); // NOLINT

            z = Fq::one();

            return;

        }

    } else {

        const bool edge_case_trigger = x.is_msb_set() || other.x.is_msb_set();

        if (edge_case_trigger) {

            if (x.is_msb_set()) {

                conditional_negate_affine(other, *(affine_element<Fq, Fr, T>*)this, predicate); // NOLINT

                z = Fq::one();

            }

            return;

        }

    }


    // T0 = z1.z1

    Fq T0 = z.sqr();


    // T1 = x2.t0 - x1 = x2.z1.z1 - x1

    Fq T1 = other.x * T0;

    T1 -= x;


    // T2 = T0.z1 = z1.z1.z1

    // T2 = T2.y2 - y1 = y2.z1.z1.z1 - y1

    Fq T2 = z * T0;

    T2 *= other.y;

    T2.self_conditional_negate(predicate);

    T2 -= y;


    if (__builtin_expect(T1.is_zero(), 0)) {

        if (T2.is_zero()) {

            // y2 equals y1, x2 equals x1, double x1

            self_dbl();

            return;

        }

        self_set_infinity();

        return;

    }


    // T2 = 2T2 = 2(y2.z1.z1.z1 - y1) = R

    // z3 = z1 + H

    T2 += T2;

    z += T1;


    // T3 = T1*T1 = HH

    Fq T3 = T1.sqr();


    // z3 = z3 - z1z1 - HH

    T0 += T3;


    // z3 = (z1 + H)*(z1 + H)

    z.self_sqr();

    z -= T0;


    // T3 = 4HH

    T3 += T3;

    T3 += T3;


    // T1 = T1*T3 = 4HHH

    T1 *= T3;


    // T3 = T3 * x1 = 4HH*x1

    T3 *= x;


    // T0 = 2T3

    T0 = T3 + T3;


    // T0 = T0 + T1 = 2(4HH*x1) + 4HHH

    T0 += T1;

    x = T2.sqr();


    // x3 = x3 - T0 = R*R - 8HH*x1 -4HHH

    x -= T0;


    // T3 = T3 - x3 = 4HH*x1 - x3

    T3 -= x;


    T1 *= y;

    T1 += T1;


    // T3 = T2 * T3 = R*(4HH*x1 - x3)

    T3 *= T2;


    // y3 = T3 - T1

    y = T3 - T1;

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+=(const affine_element<Fq, Fr, T>& other) noexcept

{

    if constexpr (Fq::modulus.data[3] >= 0x4000000000000000ULL) {

        if (is_point_at_infinity()) {

            *this = { other.x, other.y, Fq::one() };

            return *this;

        }

    } else {

        const bool edge_case_trigger = x.is_msb_set() || other.x.is_msb_set();

        if (edge_case_trigger) {

            if (x.is_msb_set()) {

                *this = { other.x, other.y, Fq::one() };

            }

            return *this;

        }

    }


    // T0 = z1.z1

    Fq T0 = z.sqr();


    // T1 = x2.t0 - x1 = x2.z1.z1 - x1

    Fq T1 = other.x * T0;

    T1 -= x;


    // T2 = T0.z1 = z1.z1.z1

    // T2 = T2.y2 - y1 = y2.z1.z1.z1 - y1

    Fq T2 = z * T0;

    T2 *= other.y;

    T2 -= y;


    if (__builtin_expect(T1.is_zero(), 0)) {

        if (T2.is_zero()) {

            self_dbl();

            return *this;

        }

        self_set_infinity();

        return *this;

    }


    // T2 = 2T2 = 2(y2.z1.z1.z1 - y1) = R

    // z3 = z1 + H

    T2 += T2;

    z += T1;


    // T3 = T1*T1 = HH

    Fq T3 = T1.sqr();


    // z3 = z3 - z1z1 - HH

    T0 += T3;


    // z3 = (z1 + H)*(z1 + H)

    z.self_sqr();

    z -= T0;


    // T3 = 4HH

    T3 += T3;

    T3 += T3;


    // T1 = T1*T3 = 4HHH

    T1 *= T3;


    // T3 = T3 * x1 = 4HH*x1

    T3 *= x;


    // T0 = 2T3

    T0 = T3 + T3;


    // T0 = T0 + T1 = 2(4HH*x1) + 4HHH

    T0 += T1;

    x = T2.sqr();


    // x3 = x3 - T0 = R*R - 8HH*x1 -4HHH

    x -= T0;


    // T3 = T3 - x3 = 4HH*x1 - x3

    T3 -= x;


    T1 *= y;

    T1 += T1;


    // T3 = T2 * T3 = R*(4HH*x1 - x3)

    T3 *= T2;


    // y3 = T3 - T1

    y = T3 - T1;

    return *this;

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+(const affine_element<Fq, Fr, T>& other) const noexcept

{

    element result(*this);

    return (result += other);

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-=(const affine_element<Fq, Fr, T>& other) noexcept

{

    const affine_element<Fq, Fr, T> to_add{ other.x, -other.y };

    return operator+=(to_add);

}


template <class Fq, class Fr, class T>

constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-(const affine_element<Fq, Fr, T>& other) const noexcept

{

    element result(*this);

    return (result -= other);

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+=(const element& other) noexcept

{

    if constexpr (Fq::modulus.data[3] >= 0x4000000000000000ULL) {

        bool p1_zero = is_point_at_infinity();

        bool p2_zero = other.is_point_at_infinity();

        if (__builtin_expect((p1_zero || p2_zero), 0)) {

            if (p1_zero && !p2_zero) {

                *this = other;

                return *this;

            }

            if (p2_zero && !p1_zero) {

                return *this;

            }

            self_set_infinity();

            return *this;

        }

    } else {

        bool p1_zero = x.is_msb_set();

        bool p2_zero = other.x.is_msb_set();

        if (__builtin_expect((p1_zero || p2_zero), 0)) {

            if (p1_zero && !p2_zero) {

                *this = other;

                return *this;

            }

            if (p2_zero && !p1_zero) {

                return *this;

            }

            self_set_infinity();

            return *this;

        }

    }

    Fq Z1Z1(z.sqr());

    Fq Z2Z2(other.z.sqr());

    Fq S2(Z1Z1 * z);

    Fq U2(Z1Z1 * other.x);

    S2 *= other.y;

    Fq U1(Z2Z2 * x);

    Fq S1(Z2Z2 * other.z);

    S1 *= y;


    Fq F(S2 - S1);


    Fq H(U2 - U1);


    if (__builtin_expect(H.is_zero(), 0)) {

        if (F.is_zero()) {

            self_dbl();

            return *this;

        }

        self_set_infinity();

        return *this;

    }


    F += F;


    Fq I(H + H);

    I.self_sqr();


    Fq J(H * I);


    U1 *= I;


    U2 = U1 + U1;

    U2 += J;


    x = F.sqr();


    x -= U2;


    J *= S1;

    J += J;


    y = U1 - x;


    y *= F;


    y -= J;


    z += other.z;


    Z1Z1 += Z2Z2;


    z.self_sqr();

    z -= Z1Z1;

    z *= H;

    return *this;

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator+(const element& other) const noexcept

{

    element result(*this);

    return (result += other);

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-=(const element& other) noexcept

{

    const element to_add{ other.x, -other.y, other.z };

    return operator+=(to_add);

}


template <class Fq, class Fr, class T>


constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-(const element& other) const noexcept

{

    element result(*this);

    return (result -= other);

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::operator-() const noexcept

{

    return { x, -y, z };

}


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::operator*(const Fr& exponent) const noexcept

{

    if constexpr (T::USE_ENDOMORPHISM) {

        return mul_with_endomorphism(exponent);

    }

    return mul_without_endomorphism(exponent);

}


template <class Fq, class Fr, class T> element<Fq, Fr, T> element<Fq, Fr, T>::operator*=(const Fr& exponent) noexcept

{

    *this = operator*(exponent);

    return *this;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::normalize() const noexcept

{

    const affine_element<Fq, Fr, T> converted = *this;

    return element(converted);

}


template <class Fq, class Fr, class T> element<Fq, Fr, T> element<Fq, Fr, T>::infinity()

{

    element<Fq, Fr, T> e{};

    e.self_set_infinity();

    return e;

}


template <class Fq, class Fr, class T> constexpr element<Fq, Fr, T> element<Fq, Fr, T>::set_infinity() const noexcept

{

    element result(*this);

    result.self_set_infinity();

    return result;

}


template <class Fq, class Fr, class T> constexpr void element<Fq, Fr, T>::self_set_infinity() noexcept

{

    if constexpr (Fq::modulus.data[3] >= 0x4000000000000000ULL) {

        // We set the value of x equal to modulus to represent inifinty

        x.data[0] = Fq::modulus.data[0];

        x.data[1] = Fq::modulus.data[1];

        x.data[2] = Fq::modulus.data[2];

        x.data[3] = Fq::modulus.data[3];

    } else {

        (*this).x = Fq::zero();

        (*this).y = Fq::zero();

        (*this).z = Fq::zero();

        x.self_set_msb();

    }

}


template <class Fq, class Fr, class T> constexpr bool element<Fq, Fr, T>::is_point_at_infinity() const noexcept

{

    if constexpr (Fq::modulus.data[3] >= 0x4000000000000000ULL) {

        // We check if the value of x is equal to modulus to represent inifinty

        return ((x.data[0] ^ Fq::modulus.data[0]) | (x.data[1] ^ Fq::modulus.data[1]) |

                (x.data[2] ^ Fq::modulus.data[2]) | (x.data[3] ^ Fq::modulus.data[3])) == 0;

    } else {

        return (x.is_msb_set());

    }

}


template <class Fq, class Fr, class T> constexpr bool element<Fq, Fr, T>::on_curve() const noexcept

{

    if (is_point_at_infinity()) {

        return true;

    }

    // We specify the point at inifinity not by (0 \lambda 0), so z should not be 0

    if (z.is_zero()) {

        return false;

    }

    Fq zz = z.sqr();

    Fq zzzz = zz.sqr();

    Fq bz_6 = zzzz * zz * T::b;

    if constexpr (T::has_a) {

        bz_6 += (x * T::a) * zzzz;

    }

    Fq xxx = x.sqr() * x + bz_6;

    Fq yy = y.sqr();

    return (xxx == yy);

}


template <class Fq, class Fr, class T>


constexpr bool element<Fq, Fr, T>::operator==(const element& other) const noexcept

{

    // If one of points is not on curve, we have no business comparing them.

    if ((!on_curve()) || (!other.on_curve())) {

        return false;

    }

    bool am_infinity = is_point_at_infinity();

    bool is_infinity = other.is_point_at_infinity();

    bool both_infinity = am_infinity && is_infinity;

    // If just one is infinity, then they are obviously not equal.

    if ((!both_infinity) && (am_infinity || is_infinity)) {

        return false;

    }

    const Fq lhs_zz = z.sqr();

    const Fq lhs_zzz = lhs_zz * z;

    const Fq rhs_zz = other.z.sqr();

    const Fq rhs_zzz = rhs_zz * other.z;


    const Fq lhs_x = x * rhs_zz;

    const Fq lhs_y = y * rhs_zzz;


    const Fq rhs_x = other.x * lhs_zz;

    const Fq rhs_y = other.y * lhs_zzz;

    return both_infinity || ((lhs_x == rhs_x) && (lhs_y == rhs_y));

}


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::random_element(numeric::RNG* engine) noexcept

{

    if constexpr (T::can_hash_to_curve) {

        element result = random_coordinates_on_curve(engine);

        result.z = Fq::random_element(engine);

        Fq zz = result.z.sqr();

        Fq zzz = zz * result.z;

        result.x *= zz;

        result.y *= zzz;

        return result;

    } else {

        Fr scalar = Fr::random_element(engine);

        return (element{ T::one_x, T::one_y, Fq::one() } * scalar);

    }

}


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::mul_without_endomorphism(const Fr& scalar) const noexcept

{

    const uint256_t converted_scalar(scalar);


    if (converted_scalar == 0) {

        return element::infinity();

    }


    element accumulator(*this);

    const uint64_t maximum_set_bit = converted_scalar.get_msb();

    // This is simpler and doublings of infinity should be fast. We should think if we want to defend against the

    // timing leak here (if used with ECDSA it can sometimes lead to private key compromise)

    for (uint64_t i = maximum_set_bit - 1; i < maximum_set_bit; --i) {

        accumulator.self_dbl();

        if (converted_scalar.get_bit(i)) {

            accumulator += *this;

        }

    }

    return accumulator;

}


namespace detail {

// Represents the result of

using EndoScalars = std::pair<std::array<uint64_t, 2>, std::array<uint64_t, 2>>;


template <typename Element, std::size_t NUM_ROUNDS> struct EndomorphismWnaf {

    // NUM_WNAF_BITS: Number of bits per window in the WNAF representation.

    static constexpr size_t NUM_WNAF_BITS = 4;

    // table: Stores the WNAF representation of the scalars.

    std::array<uint64_t, NUM_ROUNDS * 2> table;

    // skew and endo_skew: Indicate if our original scalar is even or odd.

    bool skew = false;

    bool endo_skew = false;


    EndomorphismWnaf(const EndoScalars& scalars)

    {

        wnaf::fixed_wnaf(&scalars.first[0], &table[0], skew, 0, 2, NUM_WNAF_BITS);

        wnaf::fixed_wnaf(&scalars.second[0], &table[1], endo_skew, 0, 2, NUM_WNAF_BITS);

    }


};


} // namespace detail


template <class Fq, class Fr, class T>


element<Fq, Fr, T> element<Fq, Fr, T>::mul_with_endomorphism(const Fr& scalar) const noexcept

{

    // Consider the infinity flag, return infinity if set

    if (is_point_at_infinity()) {

        return element::infinity();

    }

    constexpr size_t NUM_ROUNDS = 32;

    const Fr converted_scalar = scalar.from_montgomery_form();


    if (converted_scalar.is_zero()) {

        return element::infinity();

    }

    static constexpr size_t LOOKUP_SIZE = 8;

    std::array<element, LOOKUP_SIZE> lookup_table;


    element d2 = dbl();

    lookup_table[0] = element(*this);

    for (size_t i = 1; i < LOOKUP_SIZE; ++i) {

        lookup_table[i] = lookup_table[i - 1] + d2;

    }


    detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);

    detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars };

    element accumulator{ T::one_x, T::one_y, Fq::one() };

    accumulator.self_set_infinity();

    Fq beta = Fq::cube_root_of_unity();


    for (size_t i = 0; i < NUM_ROUNDS * 2; ++i) {

        uint64_t wnaf_entry = wnaf.table[i];

        uint64_t index = wnaf_entry & 0x0fffffffU;

        bool sign = static_cast<bool>((wnaf_entry >> 31) & 1);

        const bool is_odd = ((i & 1) == 1);

        auto to_add = lookup_table[static_cast<size_t>(index)];

        to_add.y.self_conditional_negate(sign ^ is_odd);

        if (is_odd) {

            to_add.x *= beta;

        }

        accumulator += to_add;


        if (i != ((2 * NUM_ROUNDS) - 1) && is_odd) {

            for (size_t j = 0; j < 4; ++j) {

                accumulator.self_dbl();

            }

        }

    }


    if (wnaf.skew) {

        accumulator += -lookup_table[0];

    }

    if (wnaf.endo_skew) {

        accumulator += element{ lookup_table[0].x * beta, lookup_table[0].y, lookup_table[0].z };

    }


    return accumulator;

}


template <class Fq, class Fr, class T>


void element<Fq, Fr, T>::batch_affine_add(const std::span<affine_element<Fq, Fr, T>>& first_group,

                                          const std::span<affine_element<Fq, Fr, T>>& second_group,

                                          const std::span<affine_element<Fq, Fr, T>>& results) noexcept

{

    typedef affine_element<Fq, Fr, T> affine_element;

    const size_t num_points = first_group.size();

    BB_ASSERT_EQ(second_group.size(), first_group.size());


    // Space for temporary values

    std::vector<Fq> scratch_space(num_points);


    parallel_for_heuristic(

        num_points, [&](size_t i) { results[i] = first_group[i]; }, thread_heuristics::FF_COPY_COST * 2);


    // TODO(#826): Same code as in batch mul

    //  we can mutate rhs but NOT lhs!

    //  output is stored in rhs

    const auto batch_affine_add_chunked =

        [](const affine_element* lhs, affine_element* rhs, const size_t point_count, Fq* personal_scratch_space) {

            Fq batch_inversion_accumulator = Fq::one();


            for (size_t i = 0; i < point_count; i += 1) {

                personal_scratch_space[i] = lhs[i].x + rhs[i].x; // x2 + x1

                rhs[i].x -= lhs[i].x;                            // x2 - x1

                rhs[i].y -= lhs[i].y;                            // y2 - y1

                rhs[i].y *= batch_inversion_accumulator;         // (y2 - y1)*accumulator_old

                batch_inversion_accumulator *= (rhs[i].x);

            }

            batch_inversion_accumulator = batch_inversion_accumulator.invert();


            for (size_t i = (point_count)-1; i < point_count; i -= 1) {

                rhs[i].y *= batch_inversion_accumulator; // update accumulator

                batch_inversion_accumulator *= rhs[i].x;

                rhs[i].x = rhs[i].y.sqr();

                rhs[i].x = rhs[i].x - (personal_scratch_space[i]); // x3 = lambda_squared - x2

                                                                   // - x1

                personal_scratch_space[i] = lhs[i].x - rhs[i].x;

                personal_scratch_space[i] *= rhs[i].y;

                rhs[i].y = personal_scratch_space[i] - lhs[i].y;

            }

        };


    const auto batch_affine_add_internal = [&](const affine_element* lhs, affine_element* rhs) {

        parallel_for_heuristic(

            num_points,

            [&](size_t start, size_t end, BB_UNUSED size_t chunk_index) {

                batch_affine_add_chunked(lhs + start, rhs + start, end - start, &scratch_space[0] + start);

            },

            thread_heuristics::FF_ADDITION_COST * 6 + thread_heuristics::FF_MULTIPLICATION_COST * 6);

    };

    batch_affine_add_internal(&second_group[0], &results[0]);

}


template <class Fq, class Fr, class T>


std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomorphism(

    const std::span<const affine_element<Fq, Fr, T>>& points, const Fr& scalar) noexcept

{

    PROFILE_THIS();

    typedef affine_element<Fq, Fr, T> affine_element;

    const size_t num_points = points.size();


    // Space for temporary values

    std::vector<Fq> scratch_space(num_points);


    // TODO(#826): Same code as in batch add

    //  we can mutate rhs but NOT lhs!

    //  output is stored in rhs

    const auto batch_affine_add_chunked =

        [](const affine_element* lhs, affine_element* rhs, const size_t point_count, Fq* personal_scratch_space) {

            Fq batch_inversion_accumulator = Fq::one();


            for (size_t i = 0; i < point_count; i += 1) {

                personal_scratch_space[i] = lhs[i].x + rhs[i].x; // x2 + x1

                rhs[i].x -= lhs[i].x;                            // x2 - x1

                rhs[i].y -= lhs[i].y;                            // y2 - y1

                rhs[i].y *= batch_inversion_accumulator;         // (y2 - y1)*accumulator_old

                batch_inversion_accumulator *= (rhs[i].x);

            }

            batch_inversion_accumulator = batch_inversion_accumulator.invert();


            for (size_t i = (point_count)-1; i < point_count; i -= 1) {

                rhs[i].y *= batch_inversion_accumulator; // update accumulator

                batch_inversion_accumulator *= rhs[i].x;

                rhs[i].x = rhs[i].y.sqr();

                rhs[i].x = rhs[i].x - (personal_scratch_space[i]); // x3 = lambda_squared - x2

                                                                   // - x1

                personal_scratch_space[i] = lhs[i].x - rhs[i].x;

                personal_scratch_space[i] *= rhs[i].y;

                rhs[i].y = personal_scratch_space[i] - lhs[i].y;

            }

        };


    const auto batch_affine_add_internal =

        [num_points, &scratch_space, &batch_affine_add_chunked](const affine_element* lhs, affine_element* rhs) {

            parallel_for_heuristic(

                num_points,

                [&](size_t start, size_t end, BB_UNUSED size_t chunk_index) {

                    batch_affine_add_chunked(lhs + start, rhs + start, end - start, &scratch_space[0] + start);

                },

                thread_heuristics::FF_ADDITION_COST * 6 + thread_heuristics::FF_MULTIPLICATION_COST * 6);

        };


    const auto batch_affine_double_chunked =

        [](affine_element* lhs, const size_t point_count, Fq* personal_scratch_space) {

            Fq batch_inversion_accumulator = Fq::one();


            for (size_t i = 0; i < point_count; i += 1) {


                personal_scratch_space[i] = lhs[i].x.sqr();

                personal_scratch_space[i] =

                    personal_scratch_space[i] + personal_scratch_space[i] + personal_scratch_space[i];


                personal_scratch_space[i] *= batch_inversion_accumulator;


                batch_inversion_accumulator *= (lhs[i].y + lhs[i].y);

            }

            batch_inversion_accumulator = batch_inversion_accumulator.invert();


            Fq temp;

            for (size_t i = (point_count)-1; i < point_count; i -= 1) {


                personal_scratch_space[i] *= batch_inversion_accumulator;

                batch_inversion_accumulator *= (lhs[i].y + lhs[i].y);


                temp = lhs[i].x;

                lhs[i].x = personal_scratch_space[i].sqr() - (lhs[i].x + lhs[i].x);

                lhs[i].y = personal_scratch_space[i] * (temp - lhs[i].x) - lhs[i].y;

            }

        };

    const auto batch_affine_double = [num_points, &scratch_space, &batch_affine_double_chunked](affine_element* lhs) {

        parallel_for_heuristic(

            num_points,

            [&](size_t start, size_t end, BB_UNUSED size_t chunk_index) {

                batch_affine_double_chunked(lhs + start, end - start, &scratch_space[0] + start);

            },

            thread_heuristics::FF_ADDITION_COST * 7 + thread_heuristics::FF_MULTIPLICATION_COST * 6);

    };


    // We compute the resulting point through WNAF by evaluating (the (\sum_i (16ⁱ⋅

    // (a_i ∈ {-15,-13,-11,-9,-7,-5,-3,-1,1,3,5,7,9,11,13,15}))) - skew), where skew is 0 or 1. The result of the sum is

    // always odd and skew is used to reconstruct an even scalar. This means that to construct scalar p-1, where p is

    // the order of the scalar field, we first compute p through the sums and then subtract -1. Howver, since we are

    // computing p⋅Point, we get a point at infinity, which is an edgecase, and we don't want to handle edgecases in the

    // hot loop since the slow the computation down. So it's better to just handle it here.

    if (scalar == -Fr::one()) {

        std::vector<affine_element> results(num_points);

        parallel_for_heuristic(num_points, [&](size_t i) { results[i] = -points[i]; }, thread_heuristics::FF_COPY_COST);

        return results;

    }

    // Compute wnaf for scalar

    const Fr converted_scalar = scalar.from_montgomery_form();


    // If the scalar is zero, just set results to the point at infinity

    if (converted_scalar.is_zero()) {

        affine_element result{ Fq::zero(), Fq::zero() };

        result.self_set_infinity();

        std::vector<affine_element> results(num_points);

        parallel_for_heuristic(num_points, [&](size_t i) { results[i] = result; }, thread_heuristics::FF_COPY_COST);

        return results;

    }


    constexpr size_t LOOKUP_SIZE = 8;

    constexpr size_t NUM_ROUNDS = 32;

    std::array<std::vector<affine_element>, LOOKUP_SIZE> lookup_table;

    for (auto& table : lookup_table) {

        table.resize(num_points);

    }

    // Initialize first etnries in lookup table

    std::vector<affine_element> temp_point_vector(num_points);

    parallel_for_heuristic(

        num_points,

        [&](size_t i) {

            // If the point is at infinity we fix-up the result later

            // To avoid 'trying to invert zero in the field' we set the point to 'one' here

            temp_point_vector[i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i];

            lookup_table[0][i] = points[i].is_point_at_infinity() ? affine_element::one() : points[i];

        },

        thread_heuristics::FF_COPY_COST * 2);


    // Construct lookup table

    batch_affine_double(&temp_point_vector[0]);

    for (size_t j = 1; j < LOOKUP_SIZE; ++j) {

        parallel_for_heuristic(

            num_points,

            [&](size_t i) { lookup_table[j][i] = lookup_table[j - 1][i]; },

            thread_heuristics::FF_COPY_COST);

        batch_affine_add_internal(&temp_point_vector[0], &lookup_table[j][0]);

    }


    detail::EndoScalars endo_scalars = Fr::split_into_endomorphism_scalars(converted_scalar);

    detail::EndomorphismWnaf<element, NUM_ROUNDS> wnaf{ endo_scalars };


    std::vector<affine_element> work_elements(num_points);


    constexpr Fq beta = Fq::cube_root_of_unity();

    uint64_t wnaf_entry = 0;

    uint64_t index = 0;

    bool sign = 0;

    // Prepare elements for the first batch addition

    for (size_t j = 0; j < 2; ++j) {

        wnaf_entry = wnaf.table[j];

        index = wnaf_entry & 0x0fffffffU;

        sign = static_cast<bool>((wnaf_entry >> 31) & 1);

        const bool is_odd = ((j & 1) == 1);

        parallel_for_heuristic(

            num_points,

            [&](size_t i) {

                auto to_add = lookup_table[static_cast<size_t>(index)][i];

                to_add.y.self_conditional_negate(sign ^ is_odd);

                if (is_odd) {

                    to_add.x *= beta;

                }

                if (j == 0) {

                    work_elements[i] = to_add;

                } else {

                    temp_point_vector[i] = to_add;

                }

            },

            (is_odd ? thread_heuristics::FF_MULTIPLICATION_COST : 0) + thread_heuristics::FF_COPY_COST +

                thread_heuristics::FF_ADDITION_COST);

    }

    // First cycle of addition

    batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);

    // Run through SM logic in wnaf form (excluding the skew)

    for (size_t j = 2; j < NUM_ROUNDS * 2; ++j) {

        wnaf_entry = wnaf.table[j];

        index = wnaf_entry & 0x0fffffffU;

        sign = static_cast<bool>((wnaf_entry >> 31) & 1);

        const bool is_odd = ((j & 1) == 1);

        if (!is_odd) {

            for (size_t k = 0; k < 4; ++k) {

                batch_affine_double(&work_elements[0]);

            }

        }

        parallel_for_heuristic(

            num_points,

            [&](size_t i) {

                auto to_add = lookup_table[static_cast<size_t>(index)][i];

                to_add.y.self_conditional_negate(sign ^ is_odd);

                if (is_odd) {

                    to_add.x *= beta;

                }

                temp_point_vector[i] = to_add;

            },

            (is_odd ? thread_heuristics::FF_MULTIPLICATION_COST : 0) + thread_heuristics::FF_COPY_COST +

                thread_heuristics::FF_ADDITION_COST);

        batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);

    }


    // Apply skew for the first endo scalar

    if (wnaf.skew) {

        parallel_for_heuristic(

            num_points,

            [&](size_t i) { temp_point_vector[i] = -lookup_table[0][i]; },

            thread_heuristics::FF_ADDITION_COST + thread_heuristics::FF_COPY_COST);

        batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);

    }

    // Apply skew for the second endo scalar

    if (wnaf.endo_skew) {

        parallel_for_heuristic(

            num_points,

            [&](size_t i) {

                temp_point_vector[i] = lookup_table[0][i];

                temp_point_vector[i].x *= beta;

            },

            thread_heuristics::FF_MULTIPLICATION_COST + thread_heuristics::FF_COPY_COST);

        batch_affine_add_internal(&temp_point_vector[0], &work_elements[0]);

    }

    // handle points at infinity explicitly

    parallel_for_heuristic(

        num_points,

        [&](size_t i) {

            work_elements[i] = points[i].is_point_at_infinity() ? work_elements[i].set_infinity() : work_elements[i];

        },

        thread_heuristics::FF_COPY_COST);


    return work_elements;

}


template <typename Fq, typename Fr, typename T>


void element<Fq, Fr, T>::conditional_negate_affine(const affine_element<Fq, Fr, T>& in,

                                                   affine_element<Fq, Fr, T>& out,

                                                   const uint64_t predicate) noexcept

{

    out = { in.x, predicate ? -in.y : in.y };

}


template <typename Fq, typename Fr, typename T>


void element<Fq, Fr, T>::batch_normalize(element* elements, const size_t num_elements) noexcept

{

    std::vector<Fq> temporaries;

    temporaries.reserve(num_elements * 2);

    Fq accumulator = Fq::one();


    // Iterate over the points, computing the product of their z-coordinates.

    // At each iteration, store the currently-accumulated z-coordinate in `temporaries`

    for (size_t i = 0; i < num_elements; ++i) {

        temporaries.emplace_back(accumulator);

        if (!elements[i].is_point_at_infinity()) {

            accumulator *= elements[i].z;

        }

    }

    // For the rest of this method we refer to the product of all z-coordinates as the 'global' z-coordinate

    // Invert the global z-coordinate and store in `accumulator`

    accumulator = accumulator.invert();


    for (size_t i = num_elements - 1; i < num_elements; --i) {

        if (!elements[i].is_point_at_infinity()) {

            Fq z_inv = accumulator * temporaries[i];

            Fq zz_inv = z_inv.sqr();

            elements[i].x *= zz_inv;

            elements[i].y *= (zz_inv * z_inv);

            accumulator *= elements[i].z;

        }

        elements[i].z = Fq::one();

    }

}


template <typename Fq, typename Fr, typename T>

template <typename>


element<Fq, Fr, T> element<Fq, Fr, T>::random_coordinates_on_curve(numeric::RNG* engine) noexcept

{

    bool found_one = false;

    Fq yy;

    Fq x;

    Fq y;

    while (!found_one) {

        x = Fq::random_element(engine);

        yy = x.sqr() * x + T::b;

        if constexpr (T::has_a) {

            yy += (x * T::a);

        }

        auto [found_root, y1] = yy.sqrt();

        y = y1;

        found_one = found_root;

    }

    return { x, y, Fq::one() };

}


} // namespace bb::group_elements

// NOLINTEND(readability-implicit-bool-conversion, cppcoreguidelines-avoid-c-arrays)

assert.hpp

BB_ASSERT_EQ
#define BB_ASSERT_EQ(actual, expected,...)
Definition assert.hpp:59

bb::group_elements::affine_element
Definition affine_element.hpp:21

bb::group_elements::affine_element::self_set_infinity
constexpr void self_set_infinity() noexcept
Definition affine_element_impl.hpp:110

bb::group_elements::affine_element::x
Fq x
Definition affine_element.hpp:201

bb::group_elements::affine_element::y
Fq y
Definition affine_element.hpp:202

bb::group_elements::affine_element::one
static constexpr affine_element one() noexcept
Definition affine_element.hpp:49

bb::group_elements::element
element class. Implements ecc group arithmetic using Jacobian coordinates See https://hyperelliptic....
Definition element.hpp:33

bb::group_elements::element::operator*=
element operator*=(const Fr &exponent) noexcept
Definition element_impl.hpp:485

bb::group_elements::element::set_infinity
BB_INLINE constexpr element set_infinity() const noexcept
Definition element_impl.hpp:504

bb::group_elements::element::mul_with_endomorphism
element mul_with_endomorphism(const Fr &scalar) const noexcept
Definition element_impl.hpp:658

bb::group_elements::element::infinity
static element infinity()
Definition element_impl.hpp:497

bb::group_elements::element::y
Fq y
Definition element.hpp:107

bb::group_elements::element::batch_mul_with_endomorphism
static std::vector< affine_element< Fq, Fr, Params > > batch_mul_with_endomorphism(const std::span< const affine_element< Fq, Fr, Params > > &points, const Fr &scalar) noexcept
Multiply each point by the same scalar.
Definition element_impl.hpp:794

bb::group_elements::element::operator-=
constexpr element operator-=(const element &other) noexcept
Definition element_impl.hpp:458

bb::group_elements::element::operator-
constexpr element operator-() const noexcept
Definition element_impl.hpp:471

bb::group_elements::element::z
Fq z
Definition element.hpp:108

bb::group_elements::element::operator+
friend constexpr element operator+(const affine_element< Fq, Fr, Params > &left, const element &right) noexcept
Definition element.hpp:75

bb::group_elements::element::dbl
constexpr element dbl() const noexcept
Definition element_impl.hpp:151

bb::group_elements::element::normalize
constexpr element normalize() const noexcept
Definition element_impl.hpp:491

bb::group_elements::element::self_dbl
constexpr void self_dbl() noexcept
Definition element_impl.hpp:82

bb::group_elements::element::random_element
static element random_element(numeric::RNG *engine=nullptr) noexcept
Definition element_impl.hpp:586

bb::group_elements::element::batch_normalize
static void batch_normalize(element *elements, size_t num_elements) noexcept
Definition element_impl.hpp:1044

bb::group_elements::element::operator+=
constexpr element operator+=(const element &other) noexcept
Definition element_impl.hpp:362

bb::group_elements::element::batch_affine_add
static void batch_affine_add(const std::span< affine_element< Fq, Fr, Params > > &first_group, const std::span< affine_element< Fq, Fr, Params > > &second_group, const std::span< affine_element< Fq, Fr, Params > > &results) noexcept
Pairwise affine add points in first and second group.
Definition element_impl.hpp:722

bb::group_elements::element::on_curve
BB_INLINE constexpr bool on_curve() const noexcept
Definition element_impl.hpp:538

bb::group_elements::element::operator==
BB_INLINE constexpr bool operator==(const element &other) const noexcept
Definition element_impl.hpp:559

bb::group_elements::element::operator*
element operator*(const Fr &exponent) const noexcept
Definition element_impl.hpp:477

bb::group_elements::element::self_mixed_add_or_sub
constexpr void self_mixed_add_or_sub(const affine_element< Fq, Fr, Params > &other, uint64_t predicate) noexcept
Definition element_impl.hpp:159

bb::group_elements::element::x
Fq x
Definition element.hpp:106

bb::group_elements::element::element
element() noexcept=default

bb::group_elements::element::conditional_negate_affine
static void conditional_negate_affine(const affine_element< Fq, Fr, Params > &in, affine_element< Fq, Fr, Params > &out, uint64_t predicate) noexcept
Definition element_impl.hpp:1036

bb::group_elements::element::random_coordinates_on_curve
static element random_coordinates_on_curve(numeric::RNG *engine=nullptr) noexcept

bb::group_elements::element::mul_without_endomorphism
element mul_without_endomorphism(const Fr &scalar) const noexcept
Definition element_impl.hpp:603

bb::group_elements::element::operator=
constexpr element & operator=(const element &other) noexcept
Definition element_impl.hpp:46

bb::group_elements::element::self_set_infinity
BB_INLINE constexpr void self_set_infinity() noexcept
Definition element_impl.hpp:511

bb::group_elements::element::is_point_at_infinity
BB_INLINE constexpr bool is_point_at_infinity() const noexcept
Definition element_impl.hpp:527

bb::numeric::RNG
Definition engine.hpp:17

bb::numeric::uint256_t
Definition uint256.hpp:32

bb::numeric::uint256_t::get_bit
constexpr bool get_bit(uint64_t bit_index) const
Definition uint256_impl.hpp:318

bb::numeric::uint256_t::data
uint64_t data[4]
Definition uint256.hpp:208

bb::numeric::uint256_t::get_msb
constexpr uint64_t get_msb() const
Definition uint256_impl.hpp:329

BB_UNUSED
#define BB_UNUSED
Definition compiler_hints.hpp:30

a
FF a
Definition field_gt.test.cpp:51

b
FF b
Definition field_gt.test.cpp:52

engine
numeric::RNG & engine
Definition eccvm_transcript.test.cpp:282

element.hpp

bb::group_elements::detail::EndoScalars
std::pair< std::array< uint64_t, 2 >, std::array< uint64_t, 2 > > EndoScalars
Definition element_impl.hpp:626

bb::group_elements
Definition affine_element.hpp:18

bb::stdlib::element
std::conditional_t< IsGoblinBigGroup< C, Fq, Fr, G >, element_goblin::goblin_element< C, goblin_field< C >, Fr, G >, element_default::element< C, Fq, Fr, G > > element
element wraps either element_default::element or element_goblin::goblin_element depending on parametr...
Definition biggroup.hpp:1055

bb::thread_heuristics::FF_COPY_COST
constexpr size_t FF_COPY_COST
Definition thread.hpp:146

bb::thread_heuristics::FF_ADDITION_COST
constexpr size_t FF_ADDITION_COST
Definition thread.hpp:134

bb::thread_heuristics::FF_MULTIPLICATION_COST
constexpr size_t FF_MULTIPLICATION_COST
Definition thread.hpp:136

bb::wnaf::fixed_wnaf
void fixed_wnaf(const uint64_t *scalar, uint64_t *wnaf, bool &skew_map, const uint64_t point_index, const uint64_t num_points, const size_t wnaf_bits) noexcept
Performs fixed-window non-adjacent form (WNAF) computation for scalar multiplication.
Definition wnaf.hpp:178

bb::operator*
Univariate< Fr, domain_end, domain_start, skip_count > operator*(const Fr &ff, const Univariate< Fr, domain_end, domain_start, skip_count > &uv)
Definition univariate.hpp:634

bb::parallel_for_heuristic
void parallel_for_heuristic(size_t num_points, const std::function< void(size_t, size_t, size_t)> &func, size_t heuristic_cost)
Split a loop into several loops running in parallel based on operations in 1 iteration.
Definition thread.cpp:132

std::get
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13

op_count.hpp

PROFILE_THIS
#define PROFILE_THIS()
Definition op_count.hpp:15

bb::field< Bn254FqParams >

bb::field< Bn254FqParams >::cube_root_of_unity
static constexpr field cube_root_of_unity()
Definition field_declarations.hpp:218

bb::field< Bn254FqParams >::one
static constexpr field one()
Definition field_declarations.hpp:242

bb::field< Bn254FqParams >::modulus
static constexpr uint256_t modulus
Definition field_declarations.hpp:197

bb::field::self_conditional_negate
BB_INLINE constexpr void self_conditional_negate(uint64_t predicate) &noexcept
Definition field_impl.hpp:216

bb::field< Bn254FrParams >::split_into_endomorphism_scalars
static void split_into_endomorphism_scalars(const field &k, field &k1, field &k2)
Definition field_declarations.hpp:424

bb::field::self_sqr
BB_INLINE constexpr void self_sqr() &noexcept
Definition field_impl.hpp:83

bb::field::invert
constexpr field invert() const noexcept
Definition field_impl.hpp:378

bb::field::is_msb_set
BB_INLINE constexpr bool is_msb_set() const noexcept
Definition field_impl.hpp:636

bb::field< Bn254FqParams >::random_element
static field random_element(numeric::RNG *engine=nullptr) noexcept
Definition field_impl.hpp:665

bb::field::sqr
BB_INLINE constexpr field sqr() const noexcept
Definition field_impl.hpp:70

bb::field::sqrt
constexpr std::pair< bool, field > sqrt() const noexcept
Compute square root of the field element.
Definition field_impl.hpp:598

bb::field::is_zero
BB_INLINE constexpr bool is_zero() const noexcept
Definition field_impl.hpp:646

bb::field::from_montgomery_form
BB_INLINE constexpr field from_montgomery_form() const noexcept
Definition field_impl.hpp:301

bb::field< Bn254FqParams >::zero
static constexpr field zero()
Definition field_declarations.hpp:240

bb::group_elements::detail::EndomorphismWnaf
Handles the WNAF computation for scalars that are split using an endomorphism, achieved through split...
Definition element_impl.hpp:636

bb::group_elements::detail::EndomorphismWnaf::EndomorphismWnaf
EndomorphismWnaf(const EndoScalars &scalars)
Definition element_impl.hpp:648

bb::group_elements::detail::EndomorphismWnaf::table
std::array< uint64_t, NUM_ROUNDS *2 > table
Definition element_impl.hpp:640

bb::group_elements::detail::EndomorphismWnaf::endo_skew
bool endo_skew
Definition element_impl.hpp:643

bb::group_elements::detail::EndomorphismWnaf::skew
bool skew
Definition element_impl.hpp:642

bb::group_elements::detail::EndomorphismWnaf::NUM_WNAF_BITS
static constexpr size_t NUM_WNAF_BITS
Definition element_impl.hpp:638

thread.hpp

Fq
bb::fq Fq
Definition transcript.test.cpp:8