ristretto255/internal/radix51/fe_mul.go

127 lines
3.8 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright (c) 2017 George Tankersley. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 noasm
package radix51
// FeMul sets out = a * b
func FeMul(out, x, y *FieldElement) {
var x0, x1, x2, x3, x4 uint64
var y0, y1, y2, y3, y4 uint64
x0 = x[0]
x1 = x[1]
x2 = x[2]
x3 = x[3]
x4 = x[4]
y0 = y[0]
y1 = y[1]
y2 = y[2]
y3 = y[3]
y4 = y[4]
// Reduction can be carried out simultaneously to multiplication. For
// example, we do not compute a coefficient r_5 . Whenever the result of a
// mul instruction belongs to r_5 , for example in the multiplication of
// x_3*y_2 , we multiply one of the inputs by 19 and add the result to r_0.
x1_19 := x1 * 19
x2_19 := x2 * 19
x3_19 := x3 * 19
x4_19 := x4 * 19
// calculate r0 = x0*y0 + 19*(x1*y4 + x2*y3 + x3*y2 + x4*y1)
r00, r01 := mul64x64(0, 0, x0, y0)
r00, r01 = mul64x64(r00, r01, x1_19, y4)
r00, r01 = mul64x64(r00, r01, x2_19, y3)
r00, r01 = mul64x64(r00, r01, x3_19, y2)
r00, r01 = mul64x64(r00, r01, x4_19, y1)
// calculate r1 = x0*y1 + x1*y0 + 19*(x2*y4 + x3*y3 + x4*y2)
r10, r11 := mul64x64(0, 0, x0, y1)
r10, r11 = mul64x64(r10, r11, x1, y0)
r10, r11 = mul64x64(r10, r11, x2_19, y4)
r10, r11 = mul64x64(r10, r11, x3_19, y3)
r10, r11 = mul64x64(r10, r11, x4_19, y2)
// calculate r2 = x0*y2 + x1*y1 + x2*y0 + 19*(x3*y4 + x4*y3)
r20, r21 := mul64x64(0, 0, x0, y2)
r20, r21 = mul64x64(r20, r21, x1, y1)
r20, r21 = mul64x64(r20, r21, x2, y0)
r20, r21 = mul64x64(r20, r21, x3_19, y4)
r20, r21 = mul64x64(r20, r21, x4_19, y3)
// calculate r3 = x0*y3 + x1*y2 + x2*y1 + x3*y0 + 19*x4*y4
r30, r31 := mul64x64(0, 0, x0, y3)
r30, r31 = mul64x64(r30, r31, x1, y2)
r30, r31 = mul64x64(r30, r31, x2, y1)
r30, r31 = mul64x64(r30, r31, x3, y0)
r30, r31 = mul64x64(r30, r31, x4_19, y4)
// calculate r4 = x0*y4 + x1*y3 + x2*y2 + x3*y1 + x4*y0
r40, r41 := mul64x64(0, 0, x0, y4)
r40, r41 = mul64x64(r40, r41, x1, y3)
r40, r41 = mul64x64(r40, r41, x2, y2)
r40, r41 = mul64x64(r40, r41, x3, y1)
r40, r41 = mul64x64(r40, r41, x4, y0)
// After the multiplication we need to reduce (carry) the 5 coefficients to
// obtain a result with coefficients that are at most slightly larger than
// 2^51 . Denote the two registers holding coefficient r_0 as r_00 and r_01
// with r_0 = 2^64*r_01 + r_00 . Similarly denote the two registers holding
// coefficient r_1 as r_10 and r_11 . We first shift r_01 left by 13, while
// shifting in the most significant bits of r_00 (shld instruction) and
// then compute the logical and of r_00 with 2^51 1. We do the same with
// r_10 and r_11 and add r_01 into r_10 after the logical and with 2^51
// 1. We proceed this way for coefficients r_2,...,r_4; register r_41 is
// multiplied by 19 before adding it to r_00 .
r01 = (r01 << 13) | (r00 >> 51)
r00 &= maskLow51Bits
r11 = (r11 << 13) | (r10 >> 51)
r10 &= maskLow51Bits
r10 += r01
r21 = (r21 << 13) | (r20 >> 51)
r20 &= maskLow51Bits
r20 += r11
r31 = (r31 << 13) | (r30 >> 51)
r30 &= maskLow51Bits
r30 += r21
r41 = (r41 << 13) | (r40 >> 51)
r40 &= maskLow51Bits
r40 += r31
r41 *= 19
r00 += r41
// Now all 5 coefficients fit into 64-bit registers but are still too large
// to be used as input to another multiplication. We therefore carry from
// r_0 to r_1 , from r_1 to r_2 , from r_2 to r_3 , from r_3 to r_4 , and
// finally from r_4 to r_0 . Each of these carries is done as one copy, one
// right shift by 51, one logical and with 2^51 1, and one addition.
r10 += r00 >> 51
r00 &= maskLow51Bits
r20 += r10 >> 51
r10 &= maskLow51Bits
r30 += r20 >> 51
r20 &= maskLow51Bits
r40 += r30 >> 51
r30 &= maskLow51Bits
r00 += (r40 >> 51) * 19
r40 &= maskLow51Bits
out[0] = r00
out[1] = r10
out[2] = r20
out[3] = r30
out[4] = r40
}