Branch data Line data Source code
1 : : // Copyright (c) 2019-present The Bitcoin Core developers
2 : : // Distributed under the MIT software license, see the accompanying
3 : : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 : :
5 : : #include <util/asmap.h>
6 : :
7 : : #include <clientversion.h>
8 : : #include <hash.h>
9 : : #include <serialize.h>
10 : : #include <streams.h>
11 : : #include <uint256.h>
12 : : #include <util/fs.h>
13 : : #include <util/log.h>
14 : :
15 : : #include <algorithm>
16 : : #include <bit>
17 : : #include <cassert>
18 : : #include <cstddef>
19 : : #include <cstdio>
20 : : #include <span>
21 : : #include <utility>
22 : : #include <vector>
23 : :
24 : : /*
25 : : * ASMap (Autonomous System Map) Implementation
26 : : *
27 : : * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
28 : : * Uses a binary trie structure encoded as bytecode instructions that are interpreted
29 : : * at runtime to find the ASN for a given IP address.
30 : : *
31 : : * The format of the asmap data is a bit-packed binary format where the entire mapping
32 : : * is treated as a continuous sequence of bits. Instructions and their arguments are
33 : : * encoded using variable numbers of bits and concatenated together without regard for
34 : : * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
35 : : *
36 : : * The data structure internally represents the mapping as a binary trie where:
37 : : * - Unassigned subnets (no ASN mapping present) map to 0
38 : : * - Subnets mapped entirely to one ASN become leaf nodes
39 : : * - Subnets whose lower and upper halves have different mappings branch into subtrees
40 : : *
41 : : * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
42 : : * MATCH, DEFAULT) to efficiently represent the trie.
43 : : */
44 : :
45 : : namespace {
46 : :
47 : : // Indicates decoding errors or invalid data
48 : : constexpr uint32_t INVALID = 0xFFFFFFFF;
49 : :
50 : : /**
51 : : * Extract a single bit from byte array using little-endian bit ordering (LSB first).
52 : : * Used for ASMap data.
53 : : */
54 : 526753642 : inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
55 : : {
56 : 526753642 : const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
57 : 526753642 : ++bitpos;
58 [ + + ]: 145369377 : return bit;
59 : : }
60 : :
61 : : /**
62 : : * Extract a single bit from byte array using big-endian bit ordering (MSB first).
63 : : * Used for IP addresses to match network byte order conventions.
64 : : */
65 : 18589009 : inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
66 : : {
67 : 18589009 : const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
68 : 18589009 : ++bitpos;
69 [ + + + + ]: 18589009 : return bit;
70 : : }
71 : :
72 : : /**
73 : : * Variable-length integer decoder using a custom encoding scheme.
74 : : *
75 : : * The encoding is easiest to describe using an example. Let's say minval=100 and
76 : : * bit_sizes=[4,2,2,3]. In that case:
77 : : * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)]
78 : : * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)]
79 : : * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)]
80 : : * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)]
81 : : *
82 : : * In general, every number is encoded as:
83 : : * - First, k "1"-bits, where k is the class the number falls in
84 : : * - Then, a "0"-bit, unless k is the highest class
85 : : * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class
86 : : */
87 : 68338508 : uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
88 : : {
89 : 68338508 : uint32_t val = minval; // Start with minimum encodable value
90 : 68338508 : bool bit;
91 [ + - ]: 162098512 : for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
92 : : // Read continuation bit to determine if we're in this class
93 [ + + ]: 162098512 : if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class
94 [ + + ]: 145365113 : if (bitpos >= data.size() * 8) break;
95 : 145358907 : bit = ConsumeBitLE(bitpos, data);
96 : : } else {
97 : : bit = 0; // Last class has no continuation bit
98 : : }
99 [ + + ]: 162092306 : if (bit) {
100 : : // If the value will not fit in this class, subtract its range from val,
101 : : // emit a "1" bit and continue with the next class
102 : 93760004 : val += (1 << *bit_sizes_it); // Add size of this class
103 : : } else {
104 : : // Decode the position within this class in big endian
105 [ + + ]: 449716567 : for (int b = 0; b < *bit_sizes_it; b++) {
106 [ + + ]: 381450193 : if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
107 : 381384265 : bit = ConsumeBitLE(bitpos, data);
108 : 381384265 : val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
109 : : }
110 : : return val;
111 : : }
112 : : }
113 : : return INVALID; // Reached EOF in exponent
114 : : }
115 : :
116 : : /**
117 : : * Instruction Set
118 : : *
119 : : * The instruction set is designed to efficiently encode a binary trie
120 : : * that maps IP prefixes to ASNs. Each instruction type serves a specific
121 : : * role in trie traversal and evaluation.
122 : : */
123 : : enum class Instruction : uint32_t
124 : : {
125 : : // A return instruction, encoded as [0], returns a constant ASN.
126 : : // It is followed by an integer using the ASN encoding.
127 : : RETURN = 0,
128 : : // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
129 : : // and either continues execution (if 0), or skips a specified number of bits (if 1).
130 : : // It is followed by an integer using jump encoding.
131 : : JUMP = 1,
132 : : // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
133 : : // in the input. If they all match, execution continues. If not, the default ASN is returned
134 : : // (or 0 if unset). The match value encodes both the pattern and its length.
135 : : MATCH = 2,
136 : : // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
137 : : // and continues execution. It is followed by an integer in ASN encoding.
138 : : DEFAULT = 3,
139 : : };
140 : :
141 : : // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
142 : : constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
143 : 34173622 : Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
144 : : {
145 : 34173622 : return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
146 : : }
147 : :
148 : : // ASN encoding: Can encode ASNs from 1 to ~16.7 million.
149 : : // Uses variable-length encoding optimized for real-world ASN distribution.
150 : : // ASN 0 is reserved and used if there isn't a match.
151 : : constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
152 : 17521102 : uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
153 : : {
154 : 17521102 : return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
155 : : }
156 : :
157 : : // MATCH argument: Values in [2, 511]. The highest set bit determines the match length
158 : : // n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
159 : : constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
160 : 8615730 : uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
161 : : {
162 : 8615730 : return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
163 : : }
164 : :
165 : : // JUMP offset: Minimum value 17. Variable-length coded and may be large
166 : : // for skipping big subtrees.
167 : : constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
168 : 8028054 : uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
169 : : {
170 : 8028054 : return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
171 : : }
172 : :
173 : : } // anonymous namespace
174 : :
175 : : /**
176 : : * Execute the ASMap bytecode to find the ASN for an IP
177 : : *
178 : : * This function interprets the asmap bytecode and uses bits from the IP
179 : : * address to navigate through the encoded trie structure, ultimately
180 : : * returning an ASN value.
181 : : */
182 : 13820136 : uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
183 : : {
184 : 13820136 : size_t pos{0};
185 : 13820136 : const size_t endpos{asmap.size() * 8};
186 : 13820136 : uint8_t ip_bit{0};
187 : 13820136 : const uint8_t ip_bits_end = ip.size() * 8;
188 : 13820136 : uint32_t default_asn = 0;
189 [ + - ]: 27106176 : while (pos < endpos) {
190 : 27106176 : Instruction opcode = DecodeType(pos, asmap);
191 [ + + ]: 27106176 : if (opcode == Instruction::RETURN) {
192 : : // Found leaf node - return the ASN
193 : 8890379 : uint32_t asn = DecodeASN(pos, asmap);
194 [ - + ]: 8890379 : if (asn == INVALID) break; // ASN straddles EOF
195 : : return asn;
196 [ + + ]: 18215797 : } else if (opcode == Instruction::JUMP) {
197 : : // Binary branch: if IP bit is 1, jump forward; else continue
198 : 5960880 : uint32_t jump = DecodeJump(pos, asmap);
199 [ + - ]: 5960880 : if (jump == INVALID) break; // Jump offset straddles EOF
200 [ + - ]: 5960880 : if (ip_bit == ip_bits_end) break; // No input bits left
201 [ + - ]: 5960880 : if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
202 [ + + ]: 5960880 : if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian)
203 : 2112279 : pos += jump; // Bit = 1: skip to right subtree
204 : : }
205 : : // Bit = 0: fall through to left subtree
206 [ + + ]: 12254917 : } else if (opcode == Instruction::MATCH) {
207 : : // Compare multiple IP bits against a pattern
208 : : // The match value encodes both length and pattern:
209 : : // - highest set bit position determines length (bit_width - 1)
210 : : // - lower bits contain the pattern to compare
211 : 7508788 : uint32_t match = DecodeMatch(pos, asmap);
212 [ + - ]: 7508788 : if (match == INVALID) break; // Match bits straddle EOF
213 [ + - ]: 7508788 : int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits
214 [ + - ]: 7508788 : if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
215 [ + + ]: 15207160 : for (int bit = 0; bit < matchlen; bit++) {
216 [ + + ]: 12628129 : if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
217 : : return default_asn; // Pattern mismatch - use default
218 : : }
219 : : }
220 : : // Pattern matched - continue execution
221 [ + - ]: 4746129 : } else if (opcode == Instruction::DEFAULT) {
222 : : // Update the default ASN for subsequent MATCH failures
223 : 4746129 : default_asn = DecodeASN(pos, asmap);
224 [ + - ]: 4746129 : if (default_asn == INVALID) break; // ASN straddles EOF
225 : : } else {
226 : : break; // Instruction straddles EOF
227 : : }
228 : : }
229 : : // Reached EOF without RETURN, or aborted (see any of the breaks above)
230 : : // - should have been caught by SanityCheckAsmap below
231 : 0 : assert(false);
232 : : return 0; // 0 is not a valid ASN
233 : : }
234 : :
235 : : /**
236 : : * Validates ASMap structure by simulating all possible execution paths.
237 : : * Ensures well-formed bytecode, valid jumps, and proper termination.
238 : : */
239 : 128130 : bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
240 : : {
241 : 128130 : size_t pos{0};
242 [ + - ]: 128130 : const size_t endpos{asmap.size() * 8};
243 : 128130 : std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
244 [ + - ]: 128130 : jumps.reserve(bits);
245 : : Instruction prevopcode = Instruction::JUMP;
246 : : bool had_incomplete_match = false; // Track <8 bit matches for efficiency check
247 : :
248 [ + + ]: 7075134 : while (pos != endpos) {
249 : : // There was a jump into the middle of the previous instruction
250 [ + + + + ]: 7067510 : if (!jumps.empty() && pos >= jumps.back().first) return false;
251 : :
252 : 7067446 : Instruction opcode = DecodeType(pos, asmap);
253 [ + + ]: 7067446 : if (opcode == Instruction::RETURN) {
254 : : // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
255 [ + + ]: 2051032 : if (prevopcode == Instruction::DEFAULT) return false;
256 : 2043685 : uint32_t asn = DecodeASN(pos, asmap);
257 [ + + ]: 2043685 : if (asn == INVALID) return false; // ASN straddles EOF
258 [ + + ]: 2029769 : if (jumps.empty()) {
259 : : // Nothing to execute anymore
260 [ + + ]: 5401 : if (endpos - pos > 7) return false; // Excessive padding
261 [ + + ]: 13857 : while (pos != endpos) {
262 [ + + ]: 10470 : if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
263 : : }
264 : : return true; // Sanely reached EOF
265 : : } else {
266 : : // Continue by pretending we jumped to the next instruction
267 [ + + ]: 2024368 : if (pos != jumps.back().first) return false; // Unreachable code
268 : 2024180 : bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
269 : 2024180 : jumps.pop_back();
270 : 2024180 : prevopcode = Instruction::JUMP;
271 : : }
272 [ + + ]: 5016414 : } else if (opcode == Instruction::JUMP) {
273 : 2067174 : uint32_t jump = DecodeJump(pos, asmap);
274 [ + + ]: 2067174 : if (jump == INVALID) return false; // Jump offset straddles EOF
275 [ + + ]: 2058824 : if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
276 [ + + ]: 2025007 : if (bits == 0) return false; // Consuming bits past the end of the input
277 : 2024988 : --bits;
278 : 2024988 : uint32_t jump_offset = pos + jump;
279 [ + + + + ]: 2024988 : if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
280 [ + - ]: 2024823 : jumps.emplace_back(jump_offset, bits); // Queue jump target for validation
281 : : prevopcode = Instruction::JUMP;
282 [ + + ]: 2949240 : } else if (opcode == Instruction::MATCH) {
283 : 1106942 : uint32_t match = DecodeMatch(pos, asmap);
284 [ + + ]: 1106942 : if (match == INVALID) return false; // Match bits straddle EOF
285 [ + - ]: 1104319 : int matchlen = std::bit_width(match) - 1;
286 [ + + ]: 1104319 : if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
287 : : // Within a sequence of matches only at most one should be incomplete
288 [ + + ]: 1104319 : if (matchlen < 8 && had_incomplete_match) return false;
289 : 1103282 : had_incomplete_match = (matchlen < 8);
290 [ + + ]: 1103282 : if (bits < matchlen) return false; // Consuming bits past the end of the input
291 : 1103254 : bits -= matchlen;
292 : 1103254 : prevopcode = Instruction::MATCH;
293 [ + + ]: 1842298 : } else if (opcode == Instruction::DEFAULT) {
294 : : // There should not be two successive DEFAULTs (they could be combined into one)
295 [ + + ]: 1841215 : if (prevopcode == Instruction::DEFAULT) return false;
296 : 1840909 : uint32_t asn = DecodeASN(pos, asmap);
297 [ + + ]: 1840909 : if (asn == INVALID) return false; // ASN straddles EOF
298 : : prevopcode = Instruction::DEFAULT;
299 : : } else {
300 : : return false; // Instruction straddles EOF
301 : : }
302 : : }
303 : : return false; // Reached EOF without RETURN instruction
304 : 128130 : }
305 : :
306 : : /**
307 : : * Provides a safe interface for validating ASMap data before use.
308 : : * Returns true if the data is valid for 128 bits long inputs.
309 : : */
310 : 11453 : bool CheckStandardAsmap(const std::span<const std::byte> data)
311 : : {
312 [ + + ]: 11453 : if (!SanityCheckAsmap(data, 128)) {
313 : 8252 : LogWarning("Sanity check of asmap data failed\n");
314 : 8252 : return false;
315 : : }
316 : : return true;
317 : : }
318 : :
319 : : /**
320 : : * Loads an ASMap file from disk and validates it.
321 : : */
322 : 0 : std::vector<std::byte> DecodeAsmap(fs::path path)
323 : : {
324 : 0 : FILE *filestr = fsbridge::fopen(path, "rb");
325 : 0 : AutoFile file{filestr};
326 [ # # ]: 0 : if (file.IsNull()) {
327 [ # # ]: 0 : LogWarning("Failed to open asmap file from disk");
328 : 0 : return {};
329 : : }
330 [ # # ]: 0 : int64_t length{file.size()};
331 [ # # # # ]: 0 : LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
332 : :
333 : : // Read entire file into memory
334 [ # # ]: 0 : std::vector<std::byte> buffer(length);
335 [ # # # # ]: 0 : file.read(buffer);
336 : :
337 [ # # # # : 0 : if (!CheckStandardAsmap(buffer)) {
# # ]
338 [ # # # # ]: 0 : LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
339 : 0 : return {};
340 : : }
341 : :
342 : 0 : return buffer;
343 : 0 : }
344 : :
345 : : /**
346 : : * Computes SHA256 hash of ASMap data for versioning and consistency checks.
347 : : */
348 : 8757 : uint256 AsmapVersion(const std::span<const std::byte> data)
349 : : {
350 [ + + ]: 8757 : if (data.empty()) return {};
351 : :
352 : 3230 : HashWriter asmap_hasher;
353 : 3230 : asmap_hasher << data;
354 : 3230 : return asmap_hasher.GetHash();
355 : : }
|