Branch data Line data Source code
1 : : // Copyright (c) 2017-2021 The Bitcoin Core developers
2 : : // Distributed under the MIT software license, see the accompanying
3 : : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 : :
5 : : #ifndef BITCOIN_CRYPTO_MUHASH_H
6 : : #define BITCOIN_CRYPTO_MUHASH_H
7 : :
8 : : #include <serialize.h>
9 : : #include <uint256.h>
10 : :
11 : : #include <stdint.h>
12 : :
13 : : class Num3072
14 : : {
15 : : private:
16 : : void FullReduce();
17 : : bool IsOverflow() const;
18 : : Num3072 GetInverse() const;
19 : :
20 : : public:
21 : : static constexpr size_t BYTE_SIZE = 384;
22 : :
23 : : #ifdef __SIZEOF_INT128__
24 : : typedef unsigned __int128 double_limb_t;
25 : : typedef signed __int128 signed_double_limb_t;
26 : : typedef uint64_t limb_t;
27 : : typedef int64_t signed_limb_t;
28 : : static constexpr int LIMBS = 48;
29 : : static constexpr int SIGNED_LIMBS = 50;
30 : : static constexpr int LIMB_SIZE = 64;
31 : : static constexpr int SIGNED_LIMB_SIZE = 62;
32 : : #else
33 : : typedef uint64_t double_limb_t;
34 : : typedef int64_t signed_double_limb_t;
35 : : typedef uint32_t limb_t;
36 : : typedef int32_t signed_limb_t;
37 : : static constexpr int LIMBS = 96;
38 : : static constexpr int SIGNED_LIMBS = 103;
39 : : static constexpr int LIMB_SIZE = 32;
40 : : static constexpr int SIGNED_LIMB_SIZE = 30;
41 : : #endif
42 : : limb_t limbs[LIMBS];
43 : :
44 : : // Sanity check for Num3072 constants
45 : : static_assert(LIMB_SIZE * LIMBS == 3072, "Num3072 isn't 3072 bits");
46 : : static_assert(sizeof(double_limb_t) == sizeof(limb_t) * 2, "bad size for double_limb_t");
47 : : static_assert(sizeof(limb_t) * 8 == LIMB_SIZE, "LIMB_SIZE is incorrect");
48 : : static_assert(SIGNED_LIMB_SIZE * SIGNED_LIMBS > 3072, "SIGNED_LIMBS * SIGNED_LIMB_SIZE is too small");
49 : : static_assert(3072 / SIGNED_LIMB_SIZE == SIGNED_LIMBS - 1, "Bit 3072 must land in top signed limb");
50 : :
51 : : // Hard coded values in MuHash3072 constructor and Finalize
52 : : static_assert(sizeof(limb_t) == 4 || sizeof(limb_t) == 8, "bad size for limb_t");
53 : :
54 : : void Multiply(const Num3072& a);
55 : : void Divide(const Num3072& a);
56 : : void SetToOne();
57 : : void ToBytes(unsigned char (&out)[BYTE_SIZE]);
58 : :
59 : 47402 : Num3072() { this->SetToOne(); };
60 : : Num3072(const unsigned char (&data)[BYTE_SIZE]);
61 : :
62 : 930 : SERIALIZE_METHODS(Num3072, obj)
63 : : {
64 [ + + ]: 16170 : for (auto& limb : obj.limbs) {
65 : 15840 : READWRITE(limb);
66 : : }
67 : 60 : }
68 : : };
69 : :
70 : : /** A class representing MuHash sets
71 : : *
72 : : * MuHash is a hashing algorithm that supports adding set elements in any
73 : : * order but also deleting in any order. As a result, it can maintain a
74 : : * running sum for a set of data as a whole, and add/remove when data
75 : : * is added to or removed from it. A downside of MuHash is that computing
76 : : * an inverse is relatively expensive. This is solved by representing
77 : : * the running value as a fraction, and multiplying added elements into
78 : : * the numerator and removed elements into the denominator. Only when the
79 : : * final hash is desired, a single modular inverse and multiplication is
80 : : * needed to combine the two. The combination is also run on serialization
81 : : * to allow for space-efficient storage on disk.
82 : : *
83 : : * As the update operations are also associative, H(a)+H(b)+H(c)+H(d) can
84 : : * in fact be computed as (H(a)+H(b)) + (H(c)+H(d)). This implies that
85 : : * all of this is perfectly parallellizable: each thread can process an
86 : : * arbitrary subset of the update operations, allowing them to be
87 : : * efficiently combined later.
88 : : *
89 : : * MuHash does not support checking if an element is already part of the
90 : : * set. That is why this class does not enforce the use of a set as the
91 : : * data it represents because there is no efficient way to do so.
92 : : * It is possible to add elements more than once and also to remove
93 : : * elements that have not been added before. However, this implementation
94 : : * is intended to represent a set of elements.
95 : : *
96 : : * See also https://cseweb.ucsd.edu/~mihir/papers/inchash.pdf and
97 : : * https://lists.linuxfoundation.org/pipermail/bitcoin-dev/2017-May/014337.html.
98 : : */
99 : : class MuHash3072
100 : : {
101 : : private:
102 : : Num3072 m_numerator;
103 : : Num3072 m_denominator;
104 : :
105 : : Num3072 ToNum3072(Span<const unsigned char> in);
106 : :
107 : : public:
108 : : /* The empty set. */
109 [ + - + - ]: 111 : MuHash3072() noexcept = default;
[ + - ]
110 : :
111 : : /* A singleton with variable sized data in it. */
112 : : explicit MuHash3072(Span<const unsigned char> in) noexcept;
113 : :
114 : : /* Insert a single piece of data into the set. */
115 : : MuHash3072& Insert(Span<const unsigned char> in) noexcept;
116 : :
117 : : /* Remove a single piece of data from the set. */
118 : : MuHash3072& Remove(Span<const unsigned char> in) noexcept;
119 : :
120 : : /* Multiply (resulting in a hash for the union of the sets) */
121 : : MuHash3072& operator*=(const MuHash3072& mul) noexcept;
122 : :
123 : : /* Divide (resulting in a hash for the difference of the sets) */
124 : : MuHash3072& operator/=(const MuHash3072& div) noexcept;
125 : :
126 : : /* Finalize into a 32-byte hash. Does not change this object's value. */
127 : : void Finalize(uint256& out) noexcept;
128 : :
129 : 495 : SERIALIZE_METHODS(MuHash3072, obj)
130 : : {
131 : 165 : READWRITE(obj.m_numerator);
132 : 165 : READWRITE(obj.m_denominator);
133 : : }
134 : : };
135 : :
136 : : #endif // BITCOIN_CRYPTO_MUHASH_H
|