Branch data Line data Source code
1 : : // Copyright (c) The Bitcoin Core developers
2 : : // Distributed under the MIT software license, see the accompanying
3 : : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 : :
5 : : #ifndef BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
6 : : #define BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
7 : :
8 : : #include <cluster_linearize.h>
9 : : #include <serialize.h>
10 : : #include <span.h>
11 : : #include <streams.h>
12 : : #include <util/bitset.h>
13 : : #include <util/feefrac.h>
14 : :
15 : : #include <stdint.h>
16 : : #include <numeric>
17 : : #include <vector>
18 : : #include <utility>
19 : :
20 : : namespace {
21 : :
22 : : using namespace cluster_linearize;
23 : :
24 : : using TestBitSet = BitSet<32>;
25 : :
26 : : /** Check if a graph is acyclic. */
27 : : template<typename SetType>
28 : 6 : bool IsAcyclic(const DepGraph<SetType>& depgraph) noexcept
29 : : {
30 [ + + ]: 21 : for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
31 [ + - ]: 15 : if ((depgraph.Ancestors(i) & depgraph.Descendants(i)) != SetType::Singleton(i)) {
32 : : return false;
33 : : }
34 : : }
35 : : return true;
36 : : }
37 : :
38 : : /** A formatter for a bespoke serialization for acyclic DepGraph objects.
39 : : *
40 : : * The serialization format outputs information about transactions in a topological order (parents
41 : : * before children), together with position information so transactions can be moved back to their
42 : : * correct position on deserialization.
43 : : *
44 : : * - For each transaction t in the DepGraph (in some topological order);
45 : : * - The size: VARINT(t.size), which cannot be 0.
46 : : * - The fee: VARINT(SignedToUnsigned(t.fee)), see below for SignedToUnsigned.
47 : : * - For each direct dependency:
48 : : * - VARINT(skip)
49 : : * - The position of t in the cluster: VARINT(skip)
50 : : * - The end of the graph: VARINT(0)
51 : : *
52 : : * The list of skip values encodes the dependencies of t, as well as its position in the cluster.
53 : : * Each skip value is the number of possibilities that were available, but were not taken. These
54 : : * possibilities are, in order:
55 : : * - For each previous transaction in the graph, in reverse serialization order, whether it is a
56 : : * direct parent of t (but excluding transactions which are already implied to be dependencies
57 : : * by parent relations that were serialized before it).
58 : : * - The various insertion positions in the cluster, from the very end of the cluster, to the
59 : : * front.
60 : : *
61 : : * Let's say you have a 7-transaction cluster, consisting of transactions F,A,C,B,G,E,D, but
62 : : * serialized in order A,B,C,D,E,F,G, because that happens to be a topological ordering. By the
63 : : * time G gets serialized, what has been serialized already represents the cluster F,A,C,B,E,D (in
64 : : * that order). G has B and E as direct parents, and E depends on C.
65 : : *
66 : : * In this case, the possibilities are, in order:
67 : : * - [ ] the dependency G->F
68 : : * - [X] the dependency G->E
69 : : * - [ ] the dependency G->D
70 : : * - [X] the dependency G->B
71 : : * - [ ] the dependency G->A
72 : : * - [ ] put G at the end of the cluster
73 : : * - [ ] put G before D
74 : : * - [X] put G before E
75 : : * - [ ] put G before B
76 : : * - [ ] put G before C
77 : : * - [ ] put G before A
78 : : * - [ ] put G before F
79 : : *
80 : : * The skip values in this case are 1 (G->F), 1 (G->D), 3 (G->A, G at end, G before D). No skip
81 : : * after 3 is needed (or permitted), because there can only be one position for G. Also note that
82 : : * G->C is not included in the list of possibilities, as it is implied by the included G->E and
83 : : * E->C that came before it. On deserialization, if the last skip value was 8 or larger (putting
84 : : * G before the beginning of the cluster), it is interpreted as wrapping around back to the end.
85 : : *
86 : : *
87 : : * Rationale:
88 : : * - Why VARINTs? They are flexible enough to represent large numbers where needed, but more
89 : : * compact for smaller numbers. The serialization format is designed so that simple structures
90 : : * involve smaller numbers, so smaller size maps to simpler graphs.
91 : : * - Why use SignedToUnsigned? It results in small unsigned values for signed values with small
92 : : * absolute value. This way we can encode negative fees in graphs, but still let small negative
93 : : * numbers have small encodings.
94 : : * - Why are the parents emitted in reverse order compared to the transactions themselves? This
95 : : * naturally lets us skip parents-of-parents, as they will be reflected as implied dependencies.
96 : : * - Why encode skip values and not a bitmask to convey the list positions? It turns out that the
97 : : * most complex graphs (in terms of linearization complexity) are ones with ~1 dependency per
98 : : * transaction. The current encoding uses ~1 byte per transaction for dependencies in this case,
99 : : * while a bitmask would require ~N/2 bits per transaction.
100 : : */
101 : :
102 : : struct DepGraphFormatter
103 : : {
104 : : /** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */
105 : 30 : static uint64_t SignedToUnsigned(int64_t x) noexcept
106 : : {
107 [ + + ]: 30 : if (x < 0) {
108 : 6 : return 2 * uint64_t(-(x + 1)) + 1;
109 : : } else {
110 : 24 : return 2 * uint64_t(x);
111 : : }
112 : : }
113 : :
114 : : /** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */
115 : 45 : static int64_t UnsignedToSigned(uint64_t x) noexcept
116 : : {
117 : 45 : if (x & 1) {
118 : 9 : return -int64_t(x / 2) - 1;
119 : : } else {
120 : 36 : return int64_t(x / 2);
121 : : }
122 : : }
123 : :
124 : : template <typename Stream, typename SetType>
125 : 12 : static void Ser(Stream& s, const DepGraph<SetType>& depgraph)
126 : : {
127 : : /** Construct a topological order to serialize the transactions in. */
128 : 12 : std::vector<ClusterIndex> topo_order(depgraph.TxCount());
129 : 12 : std::iota(topo_order.begin(), topo_order.end(), ClusterIndex{0});
130 : 12 : std::sort(topo_order.begin(), topo_order.end(), [&](ClusterIndex a, ClusterIndex b) {
131 [ + + ]: 42 : auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count();
132 [ + + ]: 42 : if (anc_a != anc_b) return anc_a < anc_b;
133 : 10 : return a < b;
134 : : });
135 : :
136 : : /** Which transactions the deserializer already knows when it has deserialized what has
137 : : * been serialized here so far, and in what order. */
138 [ + - ]: 12 : std::vector<ClusterIndex> rebuilt_order;
139 [ + - ]: 12 : rebuilt_order.reserve(depgraph.TxCount());
140 : :
141 : : // Loop over the transactions in topological order.
142 [ + + ]: 42 : for (ClusterIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) {
143 : : /** Which depgraph index we are currently writing. */
144 [ + - ]: 30 : ClusterIndex idx = topo_order[topo_idx];
145 : : // Write size, which must be larger than 0.
146 [ + - ]: 30 : s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED);
147 : : // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative).
148 [ + + + - ]: 60 : s << VARINT(SignedToUnsigned(depgraph.FeeRate(idx).fee));
149 : : // Write dependency information.
150 : 30 : SetType written_parents;
151 : 30 : uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over.
152 [ + + ]: 70 : for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
153 : : /** Which depgraph index we are currently considering as parent of idx. */
154 [ + + ]: 40 : ClusterIndex dep_idx = topo_order[topo_idx - 1 - dep_dist];
155 : : // Ignore transactions which are already known to be ancestors.
156 [ + + ]: 40 : if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue;
157 [ + + ]: 34 : if (depgraph.Ancestors(idx)[dep_idx]) {
158 : : // When an actual parent is encounted, encode how many non-parents were skipped
159 : : // before it.
160 [ + - ]: 20 : s << VARINT(diff);
161 : 20 : diff = 0;
162 : 20 : written_parents.Set(dep_idx);
163 : : } else {
164 : : // When a non-parent is encountered, increment the skip counter.
165 : 14 : ++diff;
166 : : }
167 : : }
168 : : // Write position information.
169 : : ClusterIndex insert_distance = 0;
170 [ + + ]: 40 : while (insert_distance < rebuilt_order.size()) {
171 : : // Loop to find how far from the end in rebuilt_order to insert.
172 [ + + ]: 26 : if (idx > *(rebuilt_order.end() - 1 - insert_distance)) break;
173 : 10 : ++insert_distance;
174 : : }
175 [ + - ]: 30 : rebuilt_order.insert(rebuilt_order.end() - insert_distance, idx);
176 [ + - ]: 60 : s << VARINT(diff + insert_distance);
177 : : }
178 : :
179 : : // Output a final 0 to denote the end of the graph.
180 [ + - ]: 24 : s << uint8_t{0};
181 : 12 : }
182 : :
183 : : template <typename Stream, typename SetType>
184 : 18 : void Unser(Stream& s, DepGraph<SetType>& depgraph)
185 : : {
186 : : /** The dependency graph which we deserialize into first, with transactions in
187 : : * topological serialization order, not original cluster order. */
188 : 18 : DepGraph<SetType> topo_depgraph;
189 : : /** Mapping from cluster order to serialization order, used later to reconstruct the
190 : : * cluster order. */
191 : 18 : std::vector<ClusterIndex> reordering;
192 : :
193 : : // Read transactions in topological order.
194 : : try {
195 : : while (true) {
196 : : // Read size. Size 0 signifies the end of the DepGraph.
197 : : int32_t size;
198 [ + + ]: 63 : s >> VARINT_MODE(size, VarIntMode::NONNEGATIVE_SIGNED);
199 : 57 : size &= 0x3FFFFF; // Enough for size up to 4M.
200 : : static_assert(0x3FFFFF >= 4000000);
201 [ + + + - ]: 57 : if (size == 0 || topo_depgraph.TxCount() == SetType::Size()) break;
202 : : // Read fee, encoded as an unsigned varint (odd=negative, even=non-negative).
203 : : uint64_t coded_fee;
204 [ + - ]: 45 : s >> VARINT(coded_fee);
205 : 45 : coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC.
206 : : static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000);
207 [ + + ]: 45 : auto fee = UnsignedToSigned(coded_fee);
208 : : // Extend topo_depgraph with the new transaction (at the end).
209 : 45 : auto topo_idx = topo_depgraph.AddTransaction({fee, size});
210 [ + - ]: 45 : reordering.push_back(topo_idx);
211 : : // Read dependency information.
212 [ + - ]: 45 : uint64_t diff = 0; //!< How many potential parents we have to skip.
213 [ + - ]: 45 : s >> VARINT(diff);
214 [ + + ]: 105 : for (ClusterIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
215 : : /** Which topo_depgraph index we are currently considering as parent of topo_idx. */
216 [ + + ]: 60 : ClusterIndex dep_topo_idx = topo_idx - 1 - dep_dist;
217 : : // Ignore transactions which are already known ancestors of topo_idx.
218 [ + + ]: 60 : if (topo_depgraph.Descendants(dep_topo_idx)[topo_idx]) continue;
219 [ + + ]: 51 : if (diff == 0) {
220 : : // When the skip counter has reached 0, add an actual dependency.
221 [ + - ]: 30 : topo_depgraph.AddDependency(dep_topo_idx, topo_idx);
222 : : // And read the number of skips after it.
223 [ + - ]: 90 : s >> VARINT(diff);
224 : : } else {
225 : : // Otherwise, dep_topo_idx is not a parent. Decrement and continue.
226 : 21 : --diff;
227 : : }
228 : : }
229 : : // If we reach this point, we can interpret the remaining skip value as how far from the
230 : : // end of reordering topo_idx should be placed (wrapping around), so move it to its
231 : : // correct location. The preliminary reordering.push_back(topo_idx) above was to make
232 : : // sure that if a deserialization exception occurs, topo_idx still appears somewhere.
233 : 45 : reordering.pop_back();
234 [ + - ]: 45 : reordering.insert(reordering.end() - (diff % (reordering.size() + 1)), topo_idx);
235 : : }
236 [ - + ]: 6 : } catch (const std::ios_base::failure&) {}
237 : :
238 : : // Construct the original cluster order depgraph.
239 : 18 : depgraph = {};
240 : : // Add transactions to depgraph in the original cluster order.
241 [ + + ]: 63 : for (auto topo_idx : reordering) {
242 : 45 : depgraph.AddTransaction(topo_depgraph.FeeRate(topo_idx));
243 : : }
244 : : // Translate dependencies from topological to cluster order.
245 [ + + ]: 63 : for (ClusterIndex idx = 0; idx < reordering.size(); ++idx) {
246 : 45 : ClusterIndex topo_idx = reordering[idx];
247 [ + + ]: 210 : for (ClusterIndex dep_idx = 0; dep_idx < reordering.size(); ++dep_idx) {
248 [ + + ]: 165 : ClusterIndex dep_topo_idx = reordering[dep_idx];
249 [ + + ]: 165 : if (topo_depgraph.Ancestors(topo_idx)[dep_topo_idx]) {
250 : 84 : depgraph.AddDependency(dep_idx, idx);
251 : : }
252 : : }
253 : : }
254 : 18 : }
255 : : };
256 : :
257 : : /** Perform a sanity/consistency check on a DepGraph. */
258 : : template<typename SetType>
259 : 6 : void SanityCheck(const DepGraph<SetType>& depgraph)
260 : : {
261 : : // Consistency check between ancestors internally.
262 [ + + ]: 21 : for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
263 : : // Transactions include themselves as ancestors.
264 [ - + ]: 15 : assert(depgraph.Ancestors(i)[i]);
265 : : // If a is an ancestor of b, then b's ancestors must include all of a's ancestors.
266 [ + - - + : 58 : for (auto a : depgraph.Ancestors(i)) {
+ + ]
267 [ - + ]: 28 : assert(depgraph.Ancestors(i).IsSupersetOf(depgraph.Ancestors(a)));
268 : : }
269 : : }
270 : : // Consistency check between ancestors and descendants.
271 [ + + ]: 21 : for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
272 [ + + ]: 70 : for (ClusterIndex j = 0; j < depgraph.TxCount(); ++j) {
273 [ - + ]: 55 : assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]);
274 : : }
275 : : }
276 : : // If DepGraph is acyclic, serialize + deserialize must roundtrip.
277 [ + - ]: 6 : if (IsAcyclic(depgraph)) {
278 : 6 : std::vector<unsigned char> ser;
279 [ + - ]: 6 : VectorWriter writer(ser, 0);
280 [ + - + - ]: 12 : writer << Using<DepGraphFormatter>(depgraph);
281 [ + - ]: 6 : SpanReader reader(ser);
282 [ + - ]: 6 : DepGraph<TestBitSet> decoded_depgraph;
283 [ + - ]: 12 : reader >> Using<DepGraphFormatter>(decoded_depgraph);
284 [ - + ]: 6 : assert(depgraph == decoded_depgraph);
285 [ - + ]: 6 : assert(reader.empty());
286 : : // It must also deserialize correctly without the terminal 0 byte (as the deserializer
287 : : // will upon EOF still return what it read so far).
288 [ + - - + ]: 6 : assert(ser.size() >= 1 && ser.back() == 0);
289 : 6 : ser.pop_back();
290 : 6 : reader = SpanReader{ser};
291 [ + - ]: 6 : decoded_depgraph = {};
292 [ + - ]: 12 : reader >> Using<DepGraphFormatter>(decoded_depgraph);
293 [ - + ]: 6 : assert(depgraph == decoded_depgraph);
294 [ - + ]: 6 : assert(reader.empty());
295 : 6 : }
296 : 6 : }
297 : :
298 : : /** Verify that a DepGraph corresponds to the information in a cluster. */
299 : : template<typename SetType>
300 : 6 : void VerifyDepGraphFromCluster(const Cluster<SetType>& cluster, const DepGraph<SetType>& depgraph)
301 : : {
302 : : // Sanity check the depgraph, which includes a check for correspondence between ancestors and
303 : : // descendants, so it suffices to check just ancestors below.
304 [ - + ]: 6 : SanityCheck(depgraph);
305 : : // Verify transaction count.
306 [ - + ]: 6 : assert(cluster.size() == depgraph.TxCount());
307 : : // Verify feerates.
308 [ + + ]: 21 : for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
309 [ + - ]: 30 : assert(depgraph.FeeRate(i) == cluster[i].first);
310 : : }
311 : : // Verify ancestors.
312 [ + + ]: 21 : for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) {
313 : : // Start with the transaction having itself as ancestor.
314 : 15 : auto ancestors = SetType::Singleton(i);
315 : : // Add parents of ancestors to the set of ancestors until it stops changing.
316 : : while (true) {
317 : 25 : const auto old_ancestors = ancestors;
318 [ + - + + ]: 90 : for (auto ancestor : ancestors) {
319 : 40 : ancestors |= cluster[ancestor].second;
320 : : }
321 [ + + ]: 25 : if (old_ancestors == ancestors) break;
322 : : }
323 : : // Compare against depgraph.
324 [ - + - + ]: 15 : assert(depgraph.Ancestors(i) == ancestors);
325 : : // Some additional sanity tests:
326 : : // - Every transaction has itself as ancestor.
327 [ - + ]: 15 : assert(ancestors[i]);
328 : : // - Every transaction has its direct parents as ancestors.
329 [ + + - + : 33 : for (auto parent : cluster[i].second) {
+ + ]
330 [ - + ]: 10 : assert(ancestors[parent]);
331 : : }
332 : : }
333 : 6 : }
334 : :
335 : : /** Perform a sanity check on a linearization. */
336 : : template<typename SetType>
337 [ # # ]: 0 : void SanityCheck(const DepGraph<SetType>& depgraph, Span<const ClusterIndex> linearization)
338 : : {
339 : : // Check completeness.
340 [ # # ]: 0 : assert(linearization.size() == depgraph.TxCount());
341 : 0 : TestBitSet done;
342 [ # # ]: 0 : for (auto i : linearization) {
343 : : // Check transaction position is in range.
344 [ # # ]: 0 : assert(i < depgraph.TxCount());
345 : : // Check topology and lack of duplicates.
346 [ # # ]: 0 : assert((depgraph.Ancestors(i) - done) == TestBitSet::Singleton(i));
347 : 0 : done.Set(i);
348 : : }
349 : 0 : }
350 : :
351 : : } // namespace
352 : :
353 : : #endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
|