Branch data Line data Source code
1 : : // Copyright (c) The Bitcoin Core developers
2 : : // Distributed under the MIT software license, see the accompanying
3 : : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 : :
5 : : #ifndef BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
6 : : #define BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
7 : :
8 : : #include <cluster_linearize.h>
9 : : #include <serialize.h>
10 : : #include <span.h>
11 : : #include <streams.h>
12 : : #include <util/bitset.h>
13 : : #include <util/feefrac.h>
14 : :
15 : : #include <stdint.h>
16 : : #include <numeric>
17 : : #include <vector>
18 : : #include <utility>
19 : :
20 : : namespace {
21 : :
22 : : using namespace cluster_linearize;
23 : :
24 : : using TestBitSet = BitSet<32>;
25 : :
26 : : /** A formatter for a bespoke serialization for acyclic DepGraph objects.
27 : : *
28 : : * The serialization format outputs information about transactions in a topological order (parents
29 : : * before children), together with position information so transactions can be moved back to their
30 : : * correct position on deserialization.
31 : : *
32 : : * - For each transaction t in the DepGraph (in some topological order);
33 : : * - The size: VARINT(t.size), which cannot be 0.
34 : : * - The fee: VARINT(SignedToUnsigned(t.fee)), see below for SignedToUnsigned.
35 : : * - For each direct dependency:
36 : : * - VARINT(skip)
37 : : * - The position of t in the cluster: VARINT(skip)
38 : : * - The end of the graph: VARINT(0)
39 : : *
40 : : * The list of skip values encodes the dependencies of t, as well as its position in the cluster.
41 : : * Each skip value is the number of possibilities that were available, but were not taken. These
42 : : * possibilities are, in order:
43 : : * - For each previous transaction in the graph, in reverse serialization order, whether it is a
44 : : * direct parent of t (but excluding transactions which are already implied to be dependencies
45 : : * by parent relations that were serialized before it).
46 : : * - The various insertion positions in the cluster, from the very end of the cluster, to the
47 : : * front.
48 : : * - The appending of 1, 2, 3, ... holes at the end of the cluster, followed by appending the new
49 : : * transaction.
50 : : *
51 : : * Let's say you have a 7-transaction cluster, consisting of transactions F,A,C,B,_,G,E,_,D
52 : : * (where _ represent holes; unused positions within the DepGraph) but serialized in order
53 : : * A,B,C,D,E,F,G, because that happens to be a topological ordering. By the time G gets serialized,
54 : : * what has been serialized already represents the cluster F,A,C,B,_,E,_,D (in that order). G has B
55 : : * and E as direct parents, and E depends on C.
56 : : *
57 : : * In this case, the possibilities are, in order:
58 : : * - [ ] the dependency G->F
59 : : * - [X] the dependency G->E
60 : : * - [ ] the dependency G->D
61 : : * - [X] the dependency G->B
62 : : * - [ ] the dependency G->A
63 : : * - [ ] put G at the end of the cluster
64 : : * - [ ] put G before D
65 : : * - [ ] put G before the hole before D
66 : : * - [X] put G before E
67 : : * - [ ] put G before the hole before E
68 : : * - [ ] put G before B
69 : : * - [ ] put G before C
70 : : * - [ ] put G before A
71 : : * - [ ] put G before F
72 : : * - [ ] add 1 hole at the end of the cluster, followed by G
73 : : * - [ ] add 2 holes at the end of the cluster, followed by G
74 : : * - [ ] add ...
75 : : *
76 : : * The skip values in this case are 1 (G->F), 1 (G->D), 4 (G->A, G at end, G before D, G before
77 : : * hole). No skip after 4 is needed (or permitted), because there can only be one position for G.
78 : : * Also note that G->C is not included in the list of possibilities, as it is implied by the
79 : : * included G->E and E->C that came before it. On deserialization, if the last skip value was 8 or
80 : : * larger (putting G before the beginning of the cluster), it is interpreted as wrapping around
81 : : * back to the end.
82 : : *
83 : : *
84 : : * Rationale:
85 : : * - Why VARINTs? They are flexible enough to represent large numbers where needed, but more
86 : : * compact for smaller numbers. The serialization format is designed so that simple structures
87 : : * involve smaller numbers, so smaller size maps to simpler graphs.
88 : : * - Why use SignedToUnsigned? It results in small unsigned values for signed values with small
89 : : * absolute value. This way we can encode negative fees in graphs, but still let small negative
90 : : * numbers have small encodings.
91 : : * - Why are the parents emitted in reverse order compared to the transactions themselves? This
92 : : * naturally lets us skip parents-of-parents, as they will be reflected as implied dependencies.
93 : : * - Why encode skip values and not a bitmask to convey the list positions? It turns out that the
94 : : * most complex graphs (in terms of linearization complexity) are ones with ~1 dependency per
95 : : * transaction. The current encoding uses ~1 byte per transaction for dependencies in this case,
96 : : * while a bitmask would require ~N/2 bits per transaction.
97 : : */
98 : :
99 : : struct DepGraphFormatter
100 : : {
101 : : /** Convert x>=0 to 2x (even), x<0 to -2x-1 (odd). */
102 : 6252 : [[maybe_unused]] static uint64_t SignedToUnsigned(int64_t x) noexcept
103 : : {
104 : 6252 : if (x < 0) {
105 : 3550 : return 2 * uint64_t(-(x + 1)) + 1;
106 : : } else {
107 : 2702 : return 2 * uint64_t(x);
108 : : }
109 : : }
110 : :
111 : : /** Convert even x to x/2 (>=0), odd x to -(x/2)-1 (<0). */
112 : 42162 : [[maybe_unused]] static int64_t UnsignedToSigned(uint64_t x) noexcept
113 : : {
114 : 42162 : if (x & 1) {
115 : 20722 : return -int64_t(x / 2) - 1;
116 : : } else {
117 : 21440 : return int64_t(x / 2);
118 : : }
119 : : }
120 : :
121 : : template <typename Stream, typename SetType>
122 : 363 : static void Ser(Stream& s, const DepGraph<SetType>& depgraph)
123 : : {
124 : : /** Construct a topological order to serialize the transactions in. */
125 [ + - ]: 363 : std::vector<DepGraphIndex> topo_order;
126 [ + - ]: 363 : topo_order.reserve(depgraph.TxCount());
127 [ + + + - : 6959 : for (auto i : depgraph.Positions()) topo_order.push_back(i);
+ + ]
128 : 363 : std::sort(topo_order.begin(), topo_order.end(), [&](DepGraphIndex a, DepGraphIndex b) {
129 [ + + ]: 36262 : auto anc_a = depgraph.Ancestors(a).Count(), anc_b = depgraph.Ancestors(b).Count();
130 [ + + ]: 36262 : if (anc_a != anc_b) return anc_a < anc_b;
131 : 16771 : return a < b;
132 : : });
133 : :
134 : : /** Which positions (incl. holes) the deserializer already knows when it has deserialized
135 : : * what has been serialized here so far. */
136 : 363 : SetType done;
137 : :
138 : : // Loop over the transactions in topological order.
139 [ + + ]: 6615 : for (DepGraphIndex topo_idx = 0; topo_idx < topo_order.size(); ++topo_idx) {
140 : : /** Which depgraph index we are currently writing. */
141 [ + - ]: 6252 : DepGraphIndex idx = topo_order[topo_idx];
142 : : // Write size, which must be larger than 0.
143 [ + - + + ]: 12504 : s << VARINT_MODE(depgraph.FeeRate(idx).size, VarIntMode::NONNEGATIVE_SIGNED);
144 : : // Write fee, encoded as an unsigned varint (odd=negative, even=non-negative).
145 [ + + + - ]: 12504 : s << VARINT(SignedToUnsigned(depgraph.FeeRate(idx).fee));
146 : : // Write dependency information.
147 : 6252 : SetType written_parents;
148 : 6252 : uint64_t diff = 0; //!< How many potential parent/child relations we have skipped over.
149 [ + + ]: 82620 : for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
150 : : /** Which depgraph index we are currently considering as parent of idx. */
151 [ + + ]: 76368 : DepGraphIndex dep_idx = topo_order[topo_idx - 1 - dep_dist];
152 : : // Ignore transactions which are already known to be ancestors.
153 [ + + ]: 76368 : if (depgraph.Descendants(dep_idx).Overlaps(written_parents)) continue;
154 [ + + ]: 57960 : if (depgraph.Ancestors(idx)[dep_idx]) {
155 : : // When an actual parent is encountered, encode how many non-parents were skipped
156 : : // before it.
157 [ + - ]: 6181 : s << VARINT(diff);
158 : 6181 : diff = 0;
159 : 6181 : written_parents.Set(dep_idx);
160 : : } else {
161 : : // When a non-parent is encountered, increment the skip counter.
162 : 51779 : ++diff;
163 : : }
164 : : }
165 : : // Write position information.
166 [ + + ]: 6252 : auto add_holes = SetType::Fill(idx) - done - depgraph.Positions();
167 [ + + ]: 6252 : if (add_holes.None()) {
168 : : // The new transaction is to be inserted N positions back from the end of the
169 : : // cluster. Emit N to indicate that that many insertion choices are skipped.
170 [ + - ]: 5596 : auto skips = (done - SetType::Fill(idx)).Count();
171 [ + - ]: 5596 : s << VARINT(diff + skips);
172 : : } else {
173 : : // The new transaction is to be appended at the end of the cluster, after N holes.
174 : : // Emit current_cluster_size + N, to indicate all insertion choices are skipped,
175 : : // plus N possibilities for the number of holes.
176 [ + - ]: 1312 : s << VARINT(diff + done.Count() + add_holes.Count());
177 : 656 : done |= add_holes;
178 : : }
179 : 6252 : done.Set(idx);
180 : : }
181 : :
182 : : // Output a final 0 to denote the end of the graph.
183 [ + - ]: 726 : s << uint8_t{0};
184 : 363 : }
185 : :
186 : : template <typename Stream, typename SetType>
187 : 2329 : void Unser(Stream& s, DepGraph<SetType>& depgraph)
188 : : {
189 : : /** The dependency graph which we deserialize into first, with transactions in
190 : : * topological serialization order, not original cluster order. */
191 : 2329 : DepGraph<SetType> topo_depgraph;
192 : : /** Mapping from serialization order to cluster order, used later to reconstruct the
193 : : * cluster order. */
194 : 2329 : std::vector<DepGraphIndex> reordering;
195 : : /** How big the entries vector in the reconstructed depgraph will be (including holes). */
196 : 2329 : DepGraphIndex total_size{0};
197 : :
198 : : // Read transactions in topological order.
199 : : while (true) {
200 : 43820 : FeeFrac new_feerate; //!< The new transaction's fee and size.
201 : 43820 : SetType new_ancestors; //!< The new transaction's ancestors (excluding itself).
202 : 43820 : uint64_t diff{0}; //!< How many potential parents/insertions we have to skip.
203 [ + + ]: 43820 : bool read_error{false};
204 : : try {
205 : : // Read size. Size 0 signifies the end of the DepGraph.
206 : : int32_t size;
207 [ + + ]: 43820 : s >> VARINT_MODE(size, VarIntMode::NONNEGATIVE_SIGNED);
208 : 43125 : size &= 0x3FFFFF; // Enough for size up to 4M.
209 : : static_assert(0x3FFFFF >= 4000000);
210 [ + + + + ]: 43125 : if (size == 0 || topo_depgraph.TxCount() == SetType::Size()) break;
211 : : // Read fee, encoded as an unsigned varint (odd=negative, even=non-negative).
212 : : uint64_t coded_fee;
213 [ + + ]: 42288 : s >> VARINT(coded_fee);
214 : 42162 : coded_fee &= 0xFFFFFFFFFFFFF; // Enough for fee between -21M...21M BTC.
215 : : static_assert(0xFFFFFFFFFFFFF > uint64_t{2} * 21000000 * 100000000);
216 [ + + + + ]: 84324 : new_feerate = {UnsignedToSigned(coded_fee), size};
217 : : // Read dependency information.
218 [ + + ]: 42162 : auto topo_idx = reordering.size();
219 [ + + ]: 42162 : s >> VARINT(diff);
220 [ + + ]: 557353 : for (DepGraphIndex dep_dist = 0; dep_dist < topo_idx; ++dep_dist) {
221 : : /** Which topo_depgraph index we are currently considering as parent of topo_idx. */
222 : 515862 : DepGraphIndex dep_topo_idx = topo_idx - 1 - dep_dist;
223 : : // Ignore transactions which are already known ancestors of topo_idx.
224 [ + + ]: 515862 : if (new_ancestors[dep_topo_idx]) continue;
225 [ + + ]: 439187 : if (diff == 0) {
226 : : // When the skip counter has reached 0, add an actual dependency.
227 [ + + ]: 29686 : new_ancestors |= topo_depgraph.Ancestors(dep_topo_idx);
228 : : // And read the number of skips after it.
229 [ + + ]: 545271 : s >> VARINT(diff);
230 : : } else {
231 : : // Otherwise, dep_topo_idx is not a parent. Decrement and continue.
232 : 409501 : --diff;
233 : : }
234 : : }
235 [ - + ]: 1492 : } catch (const std::ios_base::failure&) {
236 : : // Continue even if a read error was encountered.
237 : 1492 : read_error = true;
238 : : }
239 : : // Construct a new transaction whenever we made it past the new_feerate construction.
240 [ + + ]: 42983 : if (new_feerate.IsEmpty()) break;
241 [ - + ]: 42162 : assert(reordering.size() < SetType::Size());
242 : 42162 : auto topo_idx = topo_depgraph.AddTransaction(new_feerate);
243 : 42162 : topo_depgraph.AddDependencies(new_ancestors, topo_idx);
244 [ + + ]: 42162 : if (total_size < SetType::Size()) {
245 : : // Normal case.
246 : 23994 : diff %= SetType::Size();
247 [ + + ]: 23994 : if (diff <= total_size) {
248 : : // Insert the new transaction at distance diff back from the end.
249 [ + + ]: 230742 : for (auto& pos : reordering) {
250 : 210775 : pos += (pos >= total_size - diff);
251 : : }
252 [ + - ]: 19967 : reordering.push_back(total_size++ - diff);
253 : : } else {
254 : : // Append diff - total_size holes at the end, plus the new transaction.
255 : 4027 : total_size = diff;
256 [ + - ]: 4027 : reordering.push_back(total_size++);
257 : : }
258 : : } else {
259 : : // In case total_size == SetType::Size, it is not possible to insert the new
260 : : // transaction without exceeding SetType's size. Instead, interpret diff as an
261 : : // index into the holes, and overwrite a position there. This branch is never used
262 : : // when deserializing the output of the serializer, but gives meaning to otherwise
263 : : // invalid input.
264 : 18168 : diff %= (SetType::Size() - reordering.size());
265 : 18168 : SetType holes = SetType::Fill(SetType::Size());
266 [ + + ]: 320068 : for (auto pos : reordering) holes.Reset(pos);
267 [ + - + - ]: 148240 : for (auto pos : holes) {
268 [ + + ]: 130072 : if (diff == 0) {
269 [ + - ]: 18168 : reordering.push_back(pos);
270 : : break;
271 : : }
272 : 111904 : --diff;
273 : : }
274 : : }
275 : : // Stop if a read error was encountered during deserialization.
276 [ + + ]: 42162 : if (read_error) break;
277 : : }
278 : :
279 : : // Construct the original cluster order depgraph.
280 : 2329 : depgraph = DepGraph(topo_depgraph, reordering, total_size);
281 : 2329 : }
282 : : };
283 : :
284 : : /** Perform a sanity/consistency check on a DepGraph. */
285 : : template<typename SetType>
286 : 378 : void SanityCheck(const DepGraph<SetType>& depgraph)
287 : : {
288 : : // Verify Positions and PositionRange consistency.
289 : 378 : DepGraphIndex num_positions{0};
290 [ + + ]: 378 : DepGraphIndex position_range{0};
291 [ + + + + ]: 7162 : for (DepGraphIndex i : depgraph.Positions()) {
292 : 6425 : ++num_positions;
293 : 6425 : position_range = i + 1;
294 : : }
295 [ - + ]: 378 : assert(num_positions == depgraph.TxCount());
296 [ - + ]: 378 : assert(position_range == depgraph.PositionRange());
297 [ - + ]: 378 : assert(position_range >= num_positions);
298 [ - + ]: 378 : assert(position_range <= SetType::Size());
299 : : // Consistency check between ancestors internally.
300 [ + + + + ]: 7162 : for (DepGraphIndex i : depgraph.Positions()) {
301 : : // Transactions include themselves as ancestors.
302 [ - + ]: 6425 : assert(depgraph.Ancestors(i)[i]);
303 : : // If a is an ancestor of b, then b's ancestors must include all of a's ancestors.
304 [ + - - + : 46161 : for (auto a : depgraph.Ancestors(i)) {
+ + ]
305 [ - + ]: 33311 : assert(depgraph.Ancestors(i).IsSupersetOf(depgraph.Ancestors(a)));
306 : : }
307 : : }
308 : : // Consistency check between ancestors and descendants.
309 [ + + + - : 7162 : for (DepGraphIndex i : depgraph.Positions()) {
+ + ]
310 [ + - + + ]: 175689 : for (DepGraphIndex j : depgraph.Positions()) {
311 [ - + ]: 162839 : assert(depgraph.Ancestors(i)[j] == depgraph.Descendants(j)[i]);
312 : : }
313 : : // No transaction is a parent or child of itself.
314 : 6425 : auto parents = depgraph.GetReducedParents(i);
315 : 6425 : auto children = depgraph.GetReducedChildren(i);
316 [ - + ]: 6425 : assert(!parents[i]);
317 [ - + ]: 6425 : assert(!children[i]);
318 : : // Parents of a transaction do not have ancestors inside those parents (except itself).
319 : : // Note that even the transaction itself may be missing (if it is part of a cycle).
320 [ + + + + ]: 15906 : for (auto parent : parents) {
321 [ - + ]: 6289 : assert((depgraph.Ancestors(parent) & parents).IsSubsetOf(SetType::Singleton(parent)));
322 : : }
323 : : // Similar for children and descendants.
324 [ + + + + ]: 17129 : for (auto child : children) {
325 [ - + ]: 6331 : assert((depgraph.Descendants(child) & children).IsSubsetOf(SetType::Singleton(child)));
326 : : }
327 : : }
328 [ + + ]: 378 : if (depgraph.IsAcyclic()) {
329 : : // If DepGraph is acyclic, serialize + deserialize must roundtrip.
330 : 363 : std::vector<unsigned char> ser;
331 [ + - ]: 363 : VectorWriter writer(ser, 0);
332 [ + - + - ]: 726 : writer << Using<DepGraphFormatter>(depgraph);
333 [ + - ]: 363 : SpanReader reader(ser);
334 [ + - ]: 363 : DepGraph<TestBitSet> decoded_depgraph;
335 [ + - ]: 363 : reader >> Using<DepGraphFormatter>(decoded_depgraph);
336 [ - + ]: 363 : assert(depgraph == decoded_depgraph);
337 [ - + ]: 363 : assert(reader.empty());
338 : : // It must also deserialize correctly without the terminal 0 byte (as the deserializer
339 : : // will upon EOF still return what it read so far).
340 [ + - - + ]: 363 : assert(ser.size() >= 1 && ser.back() == 0);
341 : 363 : ser.pop_back();
342 : 363 : reader = SpanReader{ser};
343 [ + - ]: 363 : decoded_depgraph = {};
344 [ + - ]: 363 : reader >> Using<DepGraphFormatter>(decoded_depgraph);
345 [ - + ]: 363 : assert(depgraph == decoded_depgraph);
346 [ - + ]: 363 : assert(reader.empty());
347 : :
348 : : // In acyclic graphs, the union of parents with parents of parents etc. yields the
349 : : // full ancestor set (and similar for children and descendants).
350 [ + - + - ]: 363 : std::vector<SetType> parents(depgraph.PositionRange()), children(depgraph.PositionRange());
351 [ + + + + ]: 6959 : for (DepGraphIndex i : depgraph.Positions()) {
352 : 6252 : parents[i] = depgraph.GetReducedParents(i);
353 : 6252 : children[i] = depgraph.GetReducedChildren(i);
354 : : }
355 [ + + + + ]: 6959 : for (auto i : depgraph.Positions()) {
356 : : // Initialize the set of ancestors with just the current transaction itself.
357 : 6252 : SetType ancestors = SetType::Singleton(i);
358 : : // Iteratively add parents of all transactions in the ancestor set to itself.
359 : : while (true) {
360 : 21895 : const auto old_ancestors = ancestors;
361 [ + - + + ]: 176530 : for (auto j : ancestors) ancestors |= parents[j];
362 : : // Stop when no more changes are being made.
363 [ + + ]: 21895 : if (old_ancestors == ancestors) break;
364 : : }
365 [ + - ]: 6252 : assert(ancestors == depgraph.Ancestors(i));
366 : :
367 : : // Initialize the set of descendants with just the current transaction itself.
368 : 6252 : SetType descendants = SetType::Singleton(i);
369 : : // Iteratively add children of all transactions in the descendant set to itself.
370 : : while (true) {
371 : 24264 : const auto old_descendants = descendants;
372 [ + - + + ]: 185811 : for (auto j : descendants) descendants |= children[j];
373 : : // Stop when no more changes are being made.
374 [ + + ]: 24264 : if (old_descendants == descendants) break;
375 : : }
376 [ - + ]: 6252 : assert(descendants == depgraph.Descendants(i));
377 : : }
378 : 363 : }
379 : 378 : }
380 : :
381 : : /** Perform a sanity check on a linearization. */
382 : : template<typename SetType>
383 [ - + ]: 1632 : void SanityCheck(const DepGraph<SetType>& depgraph, std::span<const DepGraphIndex> linearization)
384 : : {
385 : : // Check completeness.
386 [ - + ]: 1632 : assert(linearization.size() == depgraph.TxCount());
387 : 1632 : TestBitSet done;
388 [ + + ]: 35525 : for (auto i : linearization) {
389 : : // Check transaction position is in range.
390 [ - + ]: 33893 : assert(depgraph.Positions()[i]);
391 : : // Check topology and lack of duplicates.
392 [ + - ]: 33893 : assert((depgraph.Ancestors(i) - done) == TestBitSet::Singleton(i));
393 : 33893 : done.Set(i);
394 : : }
395 : 1632 : }
396 : :
397 : : } // namespace
398 : :
399 : : #endif // BITCOIN_TEST_UTIL_CLUSTER_LINEARIZE_H
|