Tor 0.4.9.8
Loading...
Searching...
No Matches
polyval.c
1/* Copyright (c) 2025, The Tor Project, Inc. */
2/* See LICENSE for licensing information */
3
4/**
5 * \file polyval.h
6 * \brief Implementation for polyval universal hash function.
7 *
8 * Polyval, which was first defined for AES-GCM-SIV, is a
9 * universal hash based on multiplication in GF(2^128).
10 * Unlike the more familiar GHASH, it is defined to work on
11 * little-endian inputs, and so is more straightforward and efficient
12 * on little-endian architectures.
13 *
14 * In Tor we use it as part of the Counter Galois Onion relay
15 * encryption format.
16 **/
17
18/* Implementation notes:
19 *
20 * Our implementation is based on the GHASH code from BearSSL
21 * by Thomas Pornin. There are three implementations:
22 *
23 * pclmul.c -- An x86-only version, based on the CLMUL instructions
24 * introduced for westlake processors in 2010.
25 *
26 * ctmul64.c -- A portable contant-time implementation for 64-bit
27 * processors.
28 *
29 * ctmul.c -- A portable constant-time implementation for 32-bit
30 * processors with an efficient 32x32->64 multiply instruction.
31 *
32 * Note that the "ctmul" implementations are only constant-time
33 * if the corresponding CPU multiply and rotate instructions are
34 * also constant-time. But that's an architectural assumption we
35 * make in Tor.
36 */
37
38#include "ext/polyval/polyval.h"
40
41#include <string.h>
42
43#ifdef PV_USE_PCLMUL_DETECT
44#include <cpuid.h>
45#endif
46
47typedef pv_u128_ u128;
48
49/* ========
50 * We declare these functions, to help implement polyval.
51 *
52 * They have different definitions depending on our representation
53 * of 128-bit integers.
54 */
55#if 0
56/**
57 * Read a u128-bit little-endian integer from 'bytes',
58 * which may not be aligned.
59 */
60static inline u128 u128_from_bytes(const uint8_t *bytes);
61/**
62 * Store a u128-bit little-endian integer to 'bytes_out',
63 * which may not be aligned.
64 */
65static inline void u128_to_bytes(u128, uint8_t *bytes_out);
66/**
67 * XOR a u128 into the y field of a polyval_t struct.
68 *
69 * (Within the polyval struct, perform "y ^= v").
70 */
71static inline void pv_xor_y(polyval_t *, u128 v);
72
73/* ========
74 * The function which we expect our backend to implement.
75 */
76/**
77 * Within the polyval struct, perform "y *= h".
78 *
79 * (This is a carryless multiply in the Polyval galois field)
80 */
81static void pv_mul_y_h(polyval_t *);h
82#endif
83
84/* =====
85 * Endianness conversion for big-endian platforms
86 */
87#ifdef WORDS_BIGENDIAN
88#ifdef __GNUC__
89#define bswap64(x) __builtin_bswap64(x)
90#define bswap32(x) __builtin_bswap32(x)
91#else
92/* The (compiler should optimize these into a decent bswap instruction) */
93static inline uint64_t
94bswap64(uint64_t value)
95{
96 return
97 ((value & 0xFF00000000000000) >> 56) |
98 ((value & 0x00FF000000000000) >> 40) |
99 ((value & 0x0000FF0000000000) >> 24) |
100 ((value & 0x000000FF00000000) >> 8) |
101 ((value & 0x00000000FF000000) << 8) |
102 ((value & 0x0000000000FF0000) << 24) |
103 ((value & 0x000000000000FF00) << 40) |
104 ((value & 0x00000000000000FF) << 56);
105}
106
107static inline uint32_t
108bswap32(uint32_t value)
109{
110 return
111 ((value & 0xFF000000) >> 24) |
112 ((value & 0x00FF0000) >> 8) |
113 ((value & 0x0000FF00) << 8) |
114 ((value & 0x000000FF) << 24);
115}
116#endif
117#endif
118
119#ifdef WORDS_BIGENDIAN
120#define convert_byte_order64(x) bswap64(x)
121#define convert_byte_order32(x) bswap32(x)
122#else
123#define convert_byte_order64(x) (x)
124#define convert_byte_order32(x) (x)
125#endif
126
127#if defined PV_USE_PCLMUL_UNCONDITIONAL
128#define PCLMUL_MEMBER(v) (v)
129#define PV_USE_PCLMUL
130
131#elif defined PV_USE_PCLMUL_DETECT
132#define PCLMUL_MEMBER(v) (v).u128x1
133#define CTMUL64_MEMBER(v) (v).u64x2
134#define PV_USE_PCLMUL
135#define PV_USE_CTMUL64
136
137#elif defined PV_USE_CTMUL64
138#define CTMUL64_MEMBER(v) (v)
139#endif
140
141#ifdef PV_USE_PCLMUL
142#include "ext/polyval/pclmul.c"
143
144DISABLE_GCC_WARNING("-Waggregate-return")
145static inline u128
146u128_from_bytes_pclmul(const uint8_t *bytes)
147{
148 u128 r;
149 PCLMUL_MEMBER(r) = _mm_loadu_si128((const __m128i*)bytes);
150 return r;
151}
152ENABLE_GCC_WARNING("-Waggregate-return")
153
154static inline void
155u128_to_bytes_pclmul(u128 val, uint8_t *bytes_out)
156{
157 _mm_storeu_si128((__m128i*)bytes_out, PCLMUL_MEMBER(val));
158}
159static inline void
160pv_xor_y_pclmul(polyval_t *pv, u128 v)
161{
162 PCLMUL_MEMBER(pv->y) = _mm_xor_si128(PCLMUL_MEMBER(pv->y),
163 PCLMUL_MEMBER(v));
164}
165#endif
166
167#if defined(PV_USE_CTMUL64)
168#include "ext/polyval/ctmul64.c"
169
170DISABLE_GCC_WARNING("-Waggregate-return")
171static inline u128
172u128_from_bytes_ctmul64(const uint8_t *bytes)
173{
174 u128 r;
175 memcpy(&CTMUL64_MEMBER(r).lo, bytes, 8);
176 memcpy(&CTMUL64_MEMBER(r).hi, bytes + 8, 8);
177 CTMUL64_MEMBER(r).lo = convert_byte_order64(CTMUL64_MEMBER(r).lo);
178 CTMUL64_MEMBER(r).hi = convert_byte_order64(CTMUL64_MEMBER(r).hi);
179 return r;
180}
181ENABLE_GCC_WARNING("-Waggregate-return")
182
183static inline void
184u128_to_bytes_ctmul64(u128 val, uint8_t *bytes_out)
185{
186 uint64_t lo = convert_byte_order64(CTMUL64_MEMBER(val).lo);
187 uint64_t hi = convert_byte_order64(CTMUL64_MEMBER(val).hi);
188 memcpy(bytes_out, &lo, 8);
189 memcpy(bytes_out + 8, &hi, 8);
190}
191static inline void
192pv_xor_y_ctmul64(polyval_t *pv, u128 val)
193{
194 CTMUL64_MEMBER(pv->y).lo ^= CTMUL64_MEMBER(val).lo;
195 CTMUL64_MEMBER(pv->y).hi ^= CTMUL64_MEMBER(val).hi;
196}
197#endif
198
199#if defined(PV_USE_CTMUL)
200#include "ext/polyval/ctmul.c"
201
202DISABLE_GCC_WARNING("-Waggregate-return")
203static inline u128
204u128_from_bytes_ctmul(const uint8_t *bytes)
205{
206 u128 r;
207 memcpy(&r.v, bytes, 16);
208 for (int i = 0; i < 4; ++i) {
209 r.v[i] = convert_byte_order32(r.v[i]);
210 }
211 return r;
212}
213ENABLE_GCC_WARNING("-Waggregate-return")
214
215static inline void
216u128_to_bytes_ctmul(u128 val, uint8_t *bytes_out)
217{
218 uint32_t v[4];
219 for (int i = 0; i < 4; ++i) {
220 v[i] = convert_byte_order32(val.v[i]);
221 }
222 memcpy(bytes_out, v, 16);
223}
224static inline void
225pv_xor_y_ctmul(polyval_t *pv, u128 val)
226{
227 for (int i = 0; i < 4; ++i) {
228 pv->y.v[i] ^= val.v[i];
229 }
230}
231#endif
232
234static inline void add_multiple_none(polyval_t *pv,
235 const uint8_t *input,
236 const struct expanded_key_none *expanded)
237{
238 (void) pv;
239 (void) input;
240 (void) expanded;
241}
242static inline void expand_key_none(const polyval_t *inp,
243 struct expanded_key_none *out)
244{
245 (void) inp;
246 (void) out;
247}
248
249/* Kludge: a special value to use for block_stride when we don't support
250 * processing multiple blocks at once. Previously we used 0, but that
251 * caused warnings with some comparisons. */
252#define BLOCK_STRIDE_NONE 0xffff
253
254DISABLE_GCC_WARNING("-Waggregate-return")
255#define PV_DECLARE(prefix, \
256 st, \
257 u128_from_bytes, \
258 u128_to_bytes, \
259 pv_xor_y, \
260 pv_mul_y_h, \
261 block_stride, \
262 expanded_key_tp, expand_fn, add_multiple_fn) \
263 st void \
264 prefix ## polyval_key_init(polyval_key_t *pvk, const uint8_t *key) \
265 { \
266 pvk->h = u128_from_bytes(key); \
267 } \
268 st void \
269 prefix ## polyval_init(polyval_t *pv, const uint8_t *key) \
270 { \
271 polyval_key_init(&pv->key, key); \
272 memset(&pv->y, 0, sizeof(u128)); \
273 } \
274 st void \
275 prefix ## polyval_init_from_key(polyval_t *pv, const polyval_key_t *key) \
276 { \
277 memcpy(&pv->key, key, sizeof(polyval_key_t)); \
278 memset(&pv->y, 0, sizeof(u128)); \
279 } \
280 st void \
281 prefix ## polyval_add_block(polyval_t *pv, const uint8_t *block) \
282 { \
283 u128 b = u128_from_bytes(block); \
284 pv_xor_y(pv, b); \
285 pv_mul_y_h(pv); \
286 } \
287 st void \
288 prefix ## polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n) \
289 { \
290 /* since block_stride is a constant, this should get optimized */ \
291 if ((block_stride != BLOCK_STRIDE_NONE) \
292 && n >= (block_stride) * 16) { \
293 expanded_key_tp expanded_key; \
294 expand_fn(pv, &expanded_key); \
295 while (n >= (block_stride) * 16) { \
296 add_multiple_fn(pv, data, &expanded_key); \
297 n -= block_stride*16; \
298 data += block_stride * 16; \
299 } \
300 } \
301 while (n > 16) { \
302 polyval_add_block(pv, data); \
303 data += 16; \
304 n -= 16; \
305 } \
306 if (n) { \
307 uint8_t block[16]; \
308 memset(&block, 0, sizeof(block)); \
309 memcpy(block, data, n); \
310 polyval_add_block(pv, block); \
311 } \
312 } \
313 st void \
314 prefix ## polyval_get_tag(const polyval_t *pv, uint8_t *tag_out) \
315 { \
316 u128_to_bytes(pv->y, tag_out); \
317 } \
318 st void \
319 prefix ## polyval_reset(polyval_t *pv) \
320 { \
321 memset(&pv->y, 0, sizeof(u128)); \
322 }
323ENABLE_GCC_WARNING("-Waggregate-return")
324
325#ifdef PV_USE_PCLMUL_DETECT
326/* We use a boolean to distinguish whether to use the PCLMUL instructions,
327 * but instead we could use function pointers. It's probably worth
328 * benchmarking, though it's unlikely to make a measurable difference.
329 */
330static bool use_pclmul = false;
331
332/* Declare _both_ variations of our code, statically,
333 * with different prefixes. */
334DISABLE_GCC_WARNING("-Waggregate-return")
335PV_DECLARE(pclmul_, static,
336 u128_from_bytes_pclmul,
337 u128_to_bytes_pclmul,
338 pv_xor_y_pclmul,
339 pv_mul_y_h_pclmul,
340 PV_BLOCK_STRIDE,
341 pv_expanded_key_t,
342 expand_key_pclmul,
343 pv_add_multiple_pclmul)
344
345PV_DECLARE(ctmul64_, static,
346 u128_from_bytes_ctmul64,
347 u128_to_bytes_ctmul64,
348 pv_xor_y_ctmul64,
349 pv_mul_y_h_ctmul64,
350 BLOCK_STRIDE_NONE,
351 struct expanded_key_none,
352 expand_key_none,
353 add_multiple_none)
354ENABLE_GCC_WARNING("-Waggregate-return")
355
356void
357polyval_key_init(polyval_key_t *pv, const uint8_t *key)
358{
359 if (use_pclmul)
360 pclmul_polyval_key_init(pv, key);
361 else
362 ctmul64_polyval_key_init(pv, key);
363}
364void
365polyval_init(polyval_t *pv, const uint8_t *key)
366{
367 if (use_pclmul)
368 pclmul_polyval_init(pv, key);
369 else
370 ctmul64_polyval_init(pv, key);
371}
372void
374{
375 if (use_pclmul)
376 pclmul_polyval_init_from_key(pv, key);
377 else
378 ctmul64_polyval_init_from_key(pv, key);
379}
380void
381polyval_add_block(polyval_t *pv, const uint8_t *block)
382{
383 if (use_pclmul)
384 pclmul_polyval_add_block(pv, block);
385 else
386 ctmul64_polyval_add_block(pv, block);
387}
388void
389polyval_add_zpad(polyval_t *pv, const uint8_t *data, size_t n)
390{
391 if (use_pclmul)
392 pclmul_polyval_add_zpad(pv, data, n);
393 else
394 ctmul64_polyval_add_zpad(pv, data, n);
395}
396void
397polyval_get_tag(const polyval_t *pv, uint8_t *tag_out)
398{
399 if (use_pclmul)
400 pclmul_polyval_get_tag(pv, tag_out);
401 else
402 ctmul64_polyval_get_tag(pv, tag_out);
403}
404void
406{
407 if (use_pclmul)
408 pclmul_polyval_reset(pv);
409 else
410 ctmul64_polyval_reset(pv);
411}
412
413#elif defined(PV_USE_PCLMUL)
414PV_DECLARE(, ,
415 u128_from_bytes_pclmul,
416 u128_to_bytes_pclmul,
417 pv_xor_y_pclmul,
418 pv_mul_y_h_pclmul,
419 PV_BLOCK_STRIDE,
420 pv_expanded_key_t,
421 expand_key_pclmul,
422 pv_add_multiple_pclmul)
423#elif defined(PV_USE_CTMUL64)
424PV_DECLARE(, ,
425 u128_from_bytes_ctmul64,
426 u128_to_bytes_ctmul64,
427 pv_xor_y_ctmul64,
428 pv_mul_y_h_ctmul64,
429 BLOCK_STRIDE_NONE,
430 struct expanded_key_none,
431 expand_key_none,
432 add_multiple_none)
433
434#elif defined(PV_USE_CTMUL)
435DISABLE_GCC_WARNING("-Waggregate-return")
436PV_DECLARE(, , u128_from_bytes_ctmul,
437 u128_to_bytes_ctmul,
438 pv_xor_y_ctmul,
439 pv_mul_y_h_ctmul,
440 BLOCK_STRIDE_NONE,
441 struct expanded_key_none,
442 expand_key_none,
443 add_multiple_none)
444ENABLE_GCC_WARNING("-Waggregate-return")
445#endif
446
447#ifdef PV_USE_PCLMUL_DETECT
448void
449polyval_detect_implementation(void)
450{
451 unsigned int eax, ebx, ecx, edx;
452 use_pclmul = false;
453 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
454 if (0 != (ecx & bit_PCLMUL)) {
455 use_pclmul = true;
456 }
457 }
458}
459#else
460void
461polyval_detect_implementation(void)
462{
463}
464#endif
465
466#ifdef POLYVAL_USE_EXPANDED_KEYS
467
468#ifdef PV_USE_PCLMUL_DETECT
469#define SHOULD_EXPAND() (use_pclmul)
470#else
471#define SHOULD_EXPAND() (1)
472#endif
473
474void
475polyvalx_init(polyvalx_t *pvx, const uint8_t *key)
476{
477 polyval_init(&pvx->pv, key);
478 if (SHOULD_EXPAND()) {
479 expand_key_pclmul(&pvx->pv, &pvx->expanded);
480 }
481}
482void
483polyvalx_init_from_key(polyvalx_t *pvx, const polyval_key_t *key)
484{
485 polyval_init_from_key(&pvx->pv, key);
486 if (SHOULD_EXPAND()) {
487 expand_key_pclmul(&pvx->pv, &pvx->expanded);
488 }
489}
490void
491polyvalx_add_block(polyvalx_t *pvx, const uint8_t *block)
492{
493 polyval_add_block(&pvx->pv, block);
494}
495void
496polyvalx_add_zpad(polyvalx_t *pvx, const uint8_t *data, size_t n)
497{
498 if (SHOULD_EXPAND() && n >= PV_BLOCK_STRIDE * 16) {
499 while (n > PV_BLOCK_STRIDE * 16) {
500 pv_add_multiple_pclmul(&pvx->pv, data, &pvx->expanded);
501 data += PV_BLOCK_STRIDE * 16;
502 n -= PV_BLOCK_STRIDE * 16;
503 }
504 }
505 while (n > 16) {
506 polyval_add_block(&pvx->pv, data);
507 data += 16;
508 n -= 16;
509 }
510 if (n) {
511 uint8_t block[16];
512 memset(&block, 0, sizeof(block));
513 memcpy(block, data, n);
514 polyval_add_block(&pvx->pv, block);
515 }
516}
517void
518polyvalx_get_tag(const polyvalx_t *pvx, uint8_t *tag_out)
519{
520 polyval_get_tag(&pvx->pv, tag_out);
521}
522void polyvalx_reset(polyvalx_t *pvx)
523{
524 polyval_reset(&pvx->pv);
525}
526#endif
527
528#if 0
529#include <stdio.h>
530int
531main(int c, char **v)
532{
533 // From RFC 8452 appendix A
534 uint8_t key[16] =
535 { 0x25, 0x62, 0x93, 0x47, 0x58, 0x92, 0x42, 0x76,
536 0x1d, 0x31, 0xf8, 0x26, 0xba, 0x4b, 0x75, 0x7b };
537 uint8_t block1[16] =
538 { 0x4f, 0x4f, 0x95, 0x66, 0x8c, 0x83, 0xdf, 0xb6,
539 0x40, 0x17, 0x62, 0xbb, 0x2d, 0x01, 0xa2, 0x62 };
540 uint8_t block2[16] = {
541 0xd1, 0xa2, 0x4d, 0xdd, 0x27, 0x21, 0xd0, 0x06,
542 0xbb, 0xe4, 0x5f, 0x20, 0xd3, 0xc9, 0xf3, 0x62 };
543 uint8_t expect_result[16] = {
544 0xf7, 0xa3, 0xb4, 0x7b, 0x84, 0x61, 0x19, 0xfa,
545 0xe5, 0xb7, 0x86, 0x6c, 0xf5, 0xe5, 0xb7, 0x7e };
546
547 polyval_t pv;
548 uint8_t tag[16];
549 polyval_init(&pv, key);
550 polyval_add_block(&pv, block1);
551 polyval_add_block(&pv, block2);
552 polyval_get_tag(&pv, tag);
553 if (!memcmp(expect_result, tag, 16)) {
554 puts("OK");
555 return 0;
556 } else {
557 puts("NOPE");
558 return 1;
559 }
560}
561#endif
Utility macros to handle different features and behavior in different compilers.
Implementation for polyval universal hash function.
void polyval_reset(polyval_t *)
void polyval_init(polyval_t *, const uint8_t *key)
void polyval_add_zpad(polyval_t *, const uint8_t *data, size_t n)
void polyval_add_block(polyval_t *, const uint8_t *block)
void polyval_get_tag(const polyval_t *, uint8_t *tag_out)
void polyval_init_from_key(polyval_t *, const polyval_key_t *key)
void polyval_key_init(polyval_key_t *, const uint8_t *key)
pv_u128_ y
Definition polyval.h:94
int main(int argc, char *argv[])
Definition tor_main.c:25