Branch data Line data Source code
1 : : // Copyright (c) 2019-present The Bitcoin Core developers
2 : : // Distributed under the MIT software license, see the accompanying
3 : : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
4 : :
5 : : #include <util/asmap.h>
6 : :
7 : : #include <hash.h>
8 : : #include <streams.h>
9 : : #include <uint256.h>
10 : : #include <util/check.h>
11 : : #include <util/fs.h>
12 : : #include <util/log.h>
13 : :
14 : : #include <bit>
15 : : #include <cstddef>
16 : : #include <cstdio>
17 : : #include <span>
18 : : #include <string>
19 : : #include <utility>
20 : : #include <vector>
21 : :
22 : : /*
23 : : * ASMap (Autonomous System Map) Implementation
24 : : *
25 : : * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
26 : : * Uses a binary trie structure encoded as bytecode instructions that are interpreted
27 : : * at runtime to find the ASN for a given IP address.
28 : : *
29 : : * The format of the asmap data is a bit-packed binary format where the entire mapping
30 : : * is treated as a continuous sequence of bits. Instructions and their arguments are
31 : : * encoded using variable numbers of bits and concatenated together without regard for
32 : : * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
33 : : *
34 : : * The data structure internally represents the mapping as a binary trie where:
35 : : * - Unassigned subnets (no ASN mapping present) map to 0
36 : : * - Subnets mapped entirely to one ASN become leaf nodes
37 : : * - Subnets whose lower and upper halves have different mappings branch into subtrees
38 : : *
39 : : * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
40 : : * MATCH, DEFAULT) to efficiently represent the trie.
41 : : */
42 : :
43 : : namespace {
44 : :
45 : : // Indicates decoding errors or invalid data
46 : : constexpr uint32_t INVALID = 0xFFFFFFFF;
47 : :
48 : : /**
49 : : * Extract a single bit from byte array using little-endian bit ordering (LSB first).
50 : : * Used for ASMap data.
51 : : */
52 : 25355293 : inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
53 : : {
54 : 25355293 : const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
55 : 25355293 : ++bitpos;
56 [ + - ]: 7305002 : return bit;
57 : : }
58 : :
59 : : /**
60 : : * Extract a single bit from byte array using big-endian bit ordering (MSB first).
61 : : * Used for IP addresses to match network byte order conventions.
62 : : */
63 : 412421 : inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
64 : : {
65 : 412421 : const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
66 : 412421 : ++bitpos;
67 [ + + + + ]: 412421 : return bit;
68 : : }
69 : :
70 : : /**
71 : : * Variable-length integer decoder using a custom encoding scheme.
72 : : *
73 : : * The encoding is easiest to describe using an example. Let's say minval=100 and
74 : : * bit_sizes=[4,2,2,3]. In that case:
75 : : * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)]
76 : : * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)]
77 : : * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)]
78 : : * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)]
79 : : *
80 : : * In general, every number is encoded as:
81 : : * - First, k "1"-bits, where k is the class the number falls in
82 : : * - Then, a "0"-bit, unless k is the highest class
83 : : * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class
84 : : */
85 : 3760136 : uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
86 : : {
87 : 3760136 : uint32_t val = minval; // Start with minimum encodable value
88 : 3760136 : bool bit;
89 [ + - ]: 8372888 : for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
90 : : // Read continuation bit to determine if we're in this class
91 [ + + ]: 8372888 : if (bit_sizes_it + 1 != bit_sizes.end()) { // Unless we're in the last class
92 [ + - ]: 7304984 : if (bitpos >= data.size() * 8) break;
93 : 7304984 : bit = ConsumeBitLE(bitpos, data);
94 : : } else {
95 : : bit = 0; // Last class has no continuation bit
96 : : }
97 [ + + ]: 8372888 : if (bit) {
98 : : // If the value will not fit in this class, subtract its range from val,
99 : : // emit a "1" bit and continue with the next class
100 : 4612752 : val += (1 << *bit_sizes_it); // Add size of this class
101 : : } else {
102 : : // Decode the position within this class in big endian
103 [ + + ]: 21810427 : for (int b = 0; b < *bit_sizes_it; b++) {
104 [ + - ]: 18050291 : if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
105 : 18050291 : bit = ConsumeBitLE(bitpos, data);
106 : 18050291 : val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
107 : : }
108 : : return val;
109 : : }
110 : : }
111 : : return INVALID; // Reached EOF in exponent
112 : : }
113 : :
114 : : /**
115 : : * Instruction Set
116 : : *
117 : : * The instruction set is designed to efficiently encode a binary trie
118 : : * that maps IP prefixes to ASNs. Each instruction type serves a specific
119 : : * role in trie traversal and evaluation.
120 : : */
121 : : enum class Instruction : uint32_t
122 : : {
123 : : // A return instruction, encoded as [0], returns a constant ASN.
124 : : // It is followed by an integer using the ASN encoding.
125 : : RETURN = 0,
126 : : // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
127 : : // and either continues execution (if 0), or skips a specified number of bits (if 1).
128 : : // It is followed by an integer using jump encoding.
129 : : JUMP = 1,
130 : : // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
131 : : // in the input. If they all match, execution continues. If not, the default ASN is returned
132 : : // (or 0 if unset). The match value encodes both the pattern and its length.
133 : : MATCH = 2,
134 : : // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
135 : : // and continues execution. It is followed by an integer in ASN encoding.
136 : : DEFAULT = 3,
137 : : };
138 : :
139 : : // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
140 : : constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
141 : 1880068 : Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
142 : : {
143 : 1880068 : return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
144 : : }
145 : :
146 : : // ASN encoding: Can encode ASNs from 1 to ~16.7 million.
147 : : // Uses variable-length encoding optimized for real-world ASN distribution.
148 : : // ASN 0 is reserved and used if there isn't a match.
149 : : constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
150 : 802891 : uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
151 : : {
152 : 802891 : return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
153 : : }
154 : :
155 : : // MATCH argument: Values in [2, 511]. The highest set bit determines the match length
156 : : // n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
157 : : constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
158 : 637589 : uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
159 : : {
160 : 637589 : return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
161 : : }
162 : :
163 : : // JUMP offset: Minimum value 17. Variable-length coded and may be large
164 : : // for skipping big subtrees.
165 : : constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
166 : 439588 : uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
167 : : {
168 : 439588 : return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
169 : : }
170 : :
171 : : } // anonymous namespace
172 : :
173 : : /**
174 : : * Execute the ASMap bytecode to find the ASN for an IP
175 : : *
176 : : * This function interprets the asmap bytecode and uses bits from the IP
177 : : * address to navigate through the encoded trie structure, ultimately
178 : : * returning an ASN value.
179 : : */
180 : 4210 : uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
181 : : {
182 : 4210 : size_t pos{0};
183 : 4210 : const size_t endpos{asmap.size() * 8};
184 : 4210 : uint8_t ip_bit{0};
185 : 4210 : const uint8_t ip_bits_end = ip.size() * 8;
186 : 4210 : uint32_t default_asn = 0;
187 [ + - ]: 59605 : while (pos < endpos) {
188 : 59605 : Instruction opcode = DecodeType(pos, asmap);
189 [ + + ]: 59605 : if (opcode == Instruction::RETURN) {
190 : : // Found leaf node - return the ASN
191 : 3724 : uint32_t asn = DecodeASN(pos, asmap);
192 [ - + ]: 3724 : if (asn == INVALID) break; // ASN straddles EOF
193 : : return asn;
194 [ + + ]: 55881 : } else if (opcode == Instruction::JUMP) {
195 : : // Binary branch: if IP bit is 1, jump forward; else continue
196 : 4606 : uint32_t jump = DecodeJump(pos, asmap);
197 [ + - ]: 4606 : if (jump == INVALID) break; // Jump offset straddles EOF
198 [ + - ]: 4606 : if (ip_bit == ip_bits_end) break; // No input bits left
199 [ + - ]: 4606 : if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
200 [ + + ]: 4606 : if (ConsumeBitBE(ip_bit, ip)) { // Check next IP bit (big-endian)
201 : 3682 : pos += jump; // Bit = 1: skip to right subtree
202 : : }
203 : : // Bit = 0: fall through to left subtree
204 [ + + ]: 51275 : } else if (opcode == Instruction::MATCH) {
205 : : // Compare multiple IP bits against a pattern
206 : : // The match value encodes both length and pattern:
207 : : // - highest set bit position determines length (bit_width - 1)
208 : : // - lower bits contain the pattern to compare
209 : 51253 : uint32_t match = DecodeMatch(pos, asmap);
210 [ + - ]: 51253 : if (match == INVALID) break; // Match bits straddle EOF
211 [ + - ]: 51253 : int matchlen = std::bit_width(match) - 1; // An n-bit value matches n-1 input bits
212 [ + - ]: 51253 : if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
213 [ + + ]: 458582 : for (int bit = 0; bit < matchlen; bit++) {
214 [ + + ]: 407815 : if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
215 : : return default_asn; // Pattern mismatch - use default
216 : : }
217 : : }
218 : : // Pattern matched - continue execution
219 [ + - ]: 22 : } else if (opcode == Instruction::DEFAULT) {
220 : : // Update the default ASN for subsequent MATCH failures
221 : 22 : default_asn = DecodeASN(pos, asmap);
222 [ + - ]: 22 : if (default_asn == INVALID) break; // ASN straddles EOF
223 : : } else {
224 : : break; // Instruction straddles EOF
225 : : }
226 : : }
227 : : // Reached EOF without RETURN, or aborted (see any of the breaks above)
228 : : // - should have been caught by SanityCheckAsmap below
229 : 0 : assert(false);
230 : : return 0; // 0 is not a valid ASN
231 : : }
232 : :
233 : : /**
234 : : * Validates ASMap structure by simulating all possible execution paths.
235 : : * Ensures well-formed bytecode, valid jumps, and proper termination.
236 : : */
237 : 8 : bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
238 : : {
239 : 8 : size_t pos{0};
240 [ + - ]: 8 : const size_t endpos{asmap.size() * 8};
241 : 8 : std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
242 [ + - ]: 8 : jumps.reserve(bits);
243 : : Instruction prevopcode = Instruction::JUMP;
244 : : bool had_incomplete_match = false; // Track <8 bit matches for efficiency check
245 : :
246 [ + + ]: 1820464 : while (pos != endpos) {
247 : : // There was a jump into the middle of the previous instruction
248 [ + + + - ]: 1820463 : if (!jumps.empty() && pos >= jumps.back().first) return false;
249 : :
250 : 1820463 : Instruction opcode = DecodeType(pos, asmap);
251 [ + + ]: 1820463 : if (opcode == Instruction::RETURN) {
252 : : // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
253 [ + - ]: 434989 : if (prevopcode == Instruction::DEFAULT) return false;
254 : 434989 : uint32_t asn = DecodeASN(pos, asmap);
255 [ + - ]: 434989 : if (asn == INVALID) return false; // ASN straddles EOF
256 [ + + ]: 434989 : if (jumps.empty()) {
257 : : // Nothing to execute anymore
258 [ + - ]: 7 : if (endpos - pos > 7) return false; // Excessive padding
259 [ + + ]: 25 : while (pos != endpos) {
260 [ + - ]: 18 : if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
261 : : }
262 : : return true; // Sanely reached EOF
263 : : } else {
264 : : // Continue by pretending we jumped to the next instruction
265 [ + - ]: 434982 : if (pos != jumps.back().first) return false; // Unreachable code
266 : 434982 : bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
267 : 434982 : jumps.pop_back();
268 : 434982 : prevopcode = Instruction::JUMP;
269 : : }
270 [ + + ]: 1385474 : } else if (opcode == Instruction::JUMP) {
271 : 434982 : uint32_t jump = DecodeJump(pos, asmap);
272 [ + - ]: 434982 : if (jump == INVALID) return false; // Jump offset straddles EOF
273 [ + - ]: 434982 : if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
274 [ + - ]: 434982 : if (bits == 0) return false; // Consuming bits past the end of the input
275 : 434982 : --bits;
276 : 434982 : uint32_t jump_offset = pos + jump;
277 [ + + + - ]: 434982 : if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
278 [ + - ]: 434982 : jumps.emplace_back(jump_offset, bits); // Queue jump target for validation
279 : : prevopcode = Instruction::JUMP;
280 [ + + ]: 950492 : } else if (opcode == Instruction::MATCH) {
281 : 586336 : uint32_t match = DecodeMatch(pos, asmap);
282 [ + - ]: 586336 : if (match == INVALID) return false; // Match bits straddle EOF
283 [ + - ]: 586336 : int matchlen = std::bit_width(match) - 1;
284 [ + + ]: 586336 : if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
285 : : // Within a sequence of matches only at most one should be incomplete
286 [ + - ]: 586336 : if (matchlen < 8 && had_incomplete_match) return false;
287 : 586336 : had_incomplete_match = (matchlen < 8);
288 [ + - ]: 586336 : if (bits < matchlen) return false; // Consuming bits past the end of the input
289 : 586336 : bits -= matchlen;
290 : 586336 : prevopcode = Instruction::MATCH;
291 [ + - ]: 364156 : } else if (opcode == Instruction::DEFAULT) {
292 : : // There should not be two successive DEFAULTs (they could be combined into one)
293 [ + - ]: 364156 : if (prevopcode == Instruction::DEFAULT) return false;
294 : 364156 : uint32_t asn = DecodeASN(pos, asmap);
295 [ + - ]: 364156 : if (asn == INVALID) return false; // ASN straddles EOF
296 : : prevopcode = Instruction::DEFAULT;
297 : : } else {
298 : : return false; // Instruction straddles EOF
299 : : }
300 : : }
301 : : return false; // Reached EOF without RETURN instruction
302 : 8 : }
303 : :
304 : : /**
305 : : * Provides a safe interface for validating ASMap data before use.
306 : : * Returns true if the data is valid for 128 bits long inputs.
307 : : */
308 : 8 : bool CheckStandardAsmap(const std::span<const std::byte> data)
309 : : {
310 [ + + ]: 8 : if (!SanityCheckAsmap(data, 128)) {
311 : 1 : LogWarning("Sanity check of asmap data failed\n");
312 : 1 : return false;
313 : : }
314 : : return true;
315 : : }
316 : :
317 : : /**
318 : : * Loads an ASMap file from disk and validates it.
319 : : */
320 : 6 : std::vector<std::byte> DecodeAsmap(fs::path path)
321 : : {
322 : 6 : FILE *filestr = fsbridge::fopen(path, "rb");
323 : 12 : AutoFile file{filestr};
324 [ - + ]: 6 : if (file.IsNull()) {
325 [ # # ]: 0 : LogWarning("Failed to open asmap file from disk");
326 : 0 : return {};
327 : : }
328 [ + - ]: 6 : int64_t length{file.size()};
329 [ - + + - ]: 12 : LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
330 : :
331 : : // Read entire file into memory
332 [ + - ]: 6 : std::vector<std::byte> buffer(length);
333 [ - + + - ]: 6 : file.read(buffer);
334 : :
335 [ - + + - : 6 : if (!CheckStandardAsmap(buffer)) {
+ + ]
336 [ - + + - ]: 2 : LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
337 : 1 : return {};
338 : : }
339 : :
340 : 5 : return buffer;
341 : 6 : }
342 : :
343 : : /**
344 : : * Computes SHA256 hash of ASMap data for versioning and consistency checks.
345 : : */
346 : 2149 : uint256 AsmapVersion(const std::span<const std::byte> data)
347 : : {
348 [ + + ]: 2149 : if (data.empty()) return {};
349 : :
350 : 26 : HashWriter asmap_hasher;
351 : 26 : asmap_hasher << data;
352 : 26 : return asmap_hasher.GetHash();
353 : : }
|