LCOV - code coverage report
Current view: top level - src/util - asmap.cpp (source / functions) Coverage Total Hit
Test: total_coverage.info Lines: 97.6 % 126 123
Test Date: 2026-04-07 04:59:12 Functions: 100.0 % 10 10
Branches: 65.9 % 138 91

             Branch data     Line data    Source code
       1                 :             : // Copyright (c) 2019-present The Bitcoin Core developers
       2                 :             : // Distributed under the MIT software license, see the accompanying
       3                 :             : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
       4                 :             : 
       5                 :             : #include <util/asmap.h>
       6                 :             : 
       7                 :             : #include <hash.h>
       8                 :             : #include <streams.h>
       9                 :             : #include <uint256.h>
      10                 :             : #include <util/check.h>
      11                 :             : #include <util/fs.h>
      12                 :             : #include <util/log.h>
      13                 :             : 
      14                 :             : #include <bit>
      15                 :             : #include <cstddef>
      16                 :             : #include <cstdio>
      17                 :             : #include <span>
      18                 :             : #include <string>
      19                 :             : #include <utility>
      20                 :             : #include <vector>
      21                 :             : 
      22                 :             : /*
      23                 :             :  * ASMap (Autonomous System Map) Implementation
      24                 :             :  *
      25                 :             :  * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
      26                 :             :  * Uses a binary trie structure encoded as bytecode instructions that are interpreted
      27                 :             :  * at runtime to find the ASN for a given IP address.
      28                 :             :  *
      29                 :             :  * The format of the asmap data is a bit-packed binary format where the entire mapping
      30                 :             :  * is treated as a continuous sequence of bits. Instructions and their arguments are
      31                 :             :  * encoded using variable numbers of bits and concatenated together without regard for
      32                 :             :  * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
      33                 :             :  *
      34                 :             :  * The data structure internally represents the mapping as a binary trie where:
      35                 :             :  * - Unassigned subnets (no ASN mapping present) map to 0
      36                 :             :  * - Subnets mapped entirely to one ASN become leaf nodes
      37                 :             :  * - Subnets whose lower and upper halves have different mappings branch into subtrees
      38                 :             :  *
      39                 :             :  * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
      40                 :             :  * MATCH, DEFAULT) to efficiently represent the trie.
      41                 :             :  */
      42                 :             : 
      43                 :             : namespace {
      44                 :             : 
      45                 :             : // Indicates decoding errors or invalid data
      46                 :             : constexpr uint32_t INVALID = 0xFFFFFFFF;
      47                 :             : 
      48                 :             : /**
      49                 :             :  * Extract a single bit from byte array using little-endian bit ordering (LSB first).
      50                 :             :  * Used for ASMap data.
      51                 :             :  */
      52                 :    25355293 : inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
      53                 :             : {
      54                 :    25355293 :     const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
      55                 :    25355293 :     ++bitpos;
      56         [ +  - ]:     7305002 :     return bit;
      57                 :             : }
      58                 :             : 
      59                 :             : /**
      60                 :             :  * Extract a single bit from byte array using big-endian bit ordering (MSB first).
      61                 :             :  * Used for IP addresses to match network byte order conventions.
      62                 :             :  */
      63                 :      412421 : inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
      64                 :             : {
      65                 :      412421 :     const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
      66                 :      412421 :     ++bitpos;
      67   [ +  +  +  + ]:      412421 :     return bit;
      68                 :             : }
      69                 :             : 
      70                 :             : /**
      71                 :             :  * Variable-length integer decoder using a custom encoding scheme.
      72                 :             :  *
      73                 :             :  * The encoding is easiest to describe using an example. Let's say minval=100 and
      74                 :             :  * bit_sizes=[4,2,2,3]. In that case:
      75                 :             :  * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)]
      76                 :             :  * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)]
      77                 :             :  * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)]
      78                 :             :  * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)]
      79                 :             :  *
      80                 :             :  * In general, every number is encoded as:
      81                 :             :  * - First, k "1"-bits, where k is the class the number falls in
      82                 :             :  * - Then, a "0"-bit, unless k is the highest class
      83                 :             :  * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class
      84                 :             :  */
      85                 :     3760136 : uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
      86                 :             : {
      87                 :     3760136 :     uint32_t val = minval;  // Start with minimum encodable value
      88                 :     3760136 :     bool bit;
      89         [ +  - ]:     8372888 :     for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
      90                 :             :         // Read continuation bit to determine if we're in this class
      91         [ +  + ]:     8372888 :         if (bit_sizes_it + 1 != bit_sizes.end()) {  // Unless we're in the last class
      92         [ +  - ]:     7304984 :             if (bitpos >= data.size() * 8) break;
      93                 :     7304984 :             bit = ConsumeBitLE(bitpos, data);
      94                 :             :         } else {
      95                 :             :             bit = 0;  // Last class has no continuation bit
      96                 :             :         }
      97         [ +  + ]:     8372888 :         if (bit) {
      98                 :             :             // If the value will not fit in this class, subtract its range from val,
      99                 :             :             // emit a "1" bit and continue with the next class
     100                 :     4612752 :             val += (1 << *bit_sizes_it);  // Add size of this class
     101                 :             :         } else {
     102                 :             :             // Decode the position within this class in big endian
     103         [ +  + ]:    21810427 :             for (int b = 0; b < *bit_sizes_it; b++) {
     104         [ +  - ]:    18050291 :                 if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
     105                 :    18050291 :                 bit = ConsumeBitLE(bitpos, data);
     106                 :    18050291 :                 val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
     107                 :             :             }
     108                 :             :             return val;
     109                 :             :         }
     110                 :             :     }
     111                 :             :     return INVALID; // Reached EOF in exponent
     112                 :             : }
     113                 :             : 
     114                 :             : /**
     115                 :             :  * Instruction Set
     116                 :             :  *
     117                 :             :  * The instruction set is designed to efficiently encode a binary trie
     118                 :             :  * that maps IP prefixes to ASNs. Each instruction type serves a specific
     119                 :             :  * role in trie traversal and evaluation.
     120                 :             :  */
     121                 :             : enum class Instruction : uint32_t
     122                 :             : {
     123                 :             :     // A return instruction, encoded as [0], returns a constant ASN.
     124                 :             :     // It is followed by an integer using the ASN encoding.
     125                 :             :     RETURN = 0,
     126                 :             :     // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
     127                 :             :     // and either continues execution (if 0), or skips a specified number of bits (if 1).
     128                 :             :     // It is followed by an integer using jump encoding.
     129                 :             :     JUMP = 1,
     130                 :             :     // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
     131                 :             :     // in the input. If they all match, execution continues. If not, the default ASN is returned
     132                 :             :     // (or 0 if unset). The match value encodes both the pattern and its length.
     133                 :             :     MATCH = 2,
     134                 :             :     // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
     135                 :             :     // and continues execution. It is followed by an integer in ASN encoding.
     136                 :             :     DEFAULT = 3,
     137                 :             : };
     138                 :             : 
     139                 :             : // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
     140                 :             : constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
     141                 :     1880068 : Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
     142                 :             : {
     143                 :     1880068 :     return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
     144                 :             : }
     145                 :             : 
     146                 :             : // ASN encoding: Can encode ASNs from 1 to ~16.7 million.
     147                 :             : // Uses variable-length encoding optimized for real-world ASN distribution.
     148                 :             : // ASN 0 is reserved and used if there isn't a match.
     149                 :             : constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
     150                 :      802891 : uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
     151                 :             : {
     152                 :      802891 :     return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
     153                 :             : }
     154                 :             : 
     155                 :             : // MATCH argument: Values in [2, 511]. The highest set bit determines the match length
     156                 :             : // n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
     157                 :             : constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
     158                 :      637589 : uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
     159                 :             : {
     160                 :      637589 :     return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
     161                 :             : }
     162                 :             : 
     163                 :             : // JUMP offset: Minimum value 17. Variable-length coded and may be large
     164                 :             : // for skipping big subtrees.
     165                 :             : constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
     166                 :      439588 : uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
     167                 :             : {
     168                 :      439588 :     return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
     169                 :             : }
     170                 :             : 
     171                 :             : } // anonymous namespace
     172                 :             : 
     173                 :             : /**
     174                 :             :  * Execute the ASMap bytecode to find the ASN for an IP
     175                 :             :  *
     176                 :             :  * This function interprets the asmap bytecode and uses bits from the IP
     177                 :             :  * address to navigate through the encoded trie structure, ultimately
     178                 :             :  * returning an ASN value.
     179                 :             :  */
     180                 :        4210 : uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
     181                 :             : {
     182                 :        4210 :     size_t pos{0};
     183                 :        4210 :     const size_t endpos{asmap.size() * 8};
     184                 :        4210 :     uint8_t ip_bit{0};
     185                 :        4210 :     const uint8_t ip_bits_end = ip.size() * 8;
     186                 :        4210 :     uint32_t default_asn = 0;
     187         [ +  - ]:       59605 :     while (pos < endpos) {
     188                 :       59605 :         Instruction opcode = DecodeType(pos, asmap);
     189         [ +  + ]:       59605 :         if (opcode == Instruction::RETURN) {
     190                 :             :             // Found leaf node - return the ASN
     191                 :        3724 :             uint32_t asn = DecodeASN(pos, asmap);
     192         [ -  + ]:        3724 :             if (asn == INVALID) break; // ASN straddles EOF
     193                 :             :             return asn;
     194         [ +  + ]:       55881 :         } else if (opcode == Instruction::JUMP) {
     195                 :             :             // Binary branch: if IP bit is 1, jump forward; else continue
     196                 :        4606 :             uint32_t jump = DecodeJump(pos, asmap);
     197         [ +  - ]:        4606 :             if (jump == INVALID) break; // Jump offset straddles EOF
     198         [ +  - ]:        4606 :             if (ip_bit == ip_bits_end) break; // No input bits left
     199         [ +  - ]:        4606 :             if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
     200         [ +  + ]:        4606 :             if (ConsumeBitBE(ip_bit, ip)) {  // Check next IP bit (big-endian)
     201                 :        3682 :                 pos += jump;  // Bit = 1: skip to right subtree
     202                 :             :             }
     203                 :             :             // Bit = 0: fall through to left subtree
     204         [ +  + ]:       51275 :         } else if (opcode == Instruction::MATCH) {
     205                 :             :             // Compare multiple IP bits against a pattern
     206                 :             :             // The match value encodes both length and pattern:
     207                 :             :             // - highest set bit position determines length (bit_width - 1)
     208                 :             :             // - lower bits contain the pattern to compare
     209                 :       51253 :             uint32_t match = DecodeMatch(pos, asmap);
     210         [ +  - ]:       51253 :             if (match == INVALID) break; // Match bits straddle EOF
     211         [ +  - ]:       51253 :             int matchlen = std::bit_width(match) - 1;  // An n-bit value matches n-1 input bits
     212         [ +  - ]:       51253 :             if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
     213         [ +  + ]:      458582 :             for (int bit = 0; bit < matchlen; bit++) {
     214         [ +  + ]:      407815 :                 if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
     215                 :             :                     return default_asn;  // Pattern mismatch - use default
     216                 :             :                 }
     217                 :             :             }
     218                 :             :             // Pattern matched - continue execution
     219         [ +  - ]:          22 :         } else if (opcode == Instruction::DEFAULT) {
     220                 :             :             // Update the default ASN for subsequent MATCH failures
     221                 :          22 :             default_asn = DecodeASN(pos, asmap);
     222         [ +  - ]:          22 :             if (default_asn == INVALID) break; // ASN straddles EOF
     223                 :             :         } else {
     224                 :             :             break; // Instruction straddles EOF
     225                 :             :         }
     226                 :             :     }
     227                 :             :     // Reached EOF without RETURN, or aborted (see any of the breaks above)
     228                 :             :     // - should have been caught by SanityCheckAsmap below
     229                 :           0 :     assert(false);
     230                 :             :     return 0; // 0 is not a valid ASN
     231                 :             : }
     232                 :             : 
     233                 :             : /**
     234                 :             :  * Validates ASMap structure by simulating all possible execution paths.
     235                 :             :  * Ensures well-formed bytecode, valid jumps, and proper termination.
     236                 :             :  */
     237                 :           8 : bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
     238                 :             : {
     239                 :           8 :     size_t pos{0};
     240         [ +  - ]:           8 :     const size_t endpos{asmap.size() * 8};
     241                 :           8 :     std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
     242         [ +  - ]:           8 :     jumps.reserve(bits);
     243                 :             :     Instruction prevopcode = Instruction::JUMP;
     244                 :             :     bool had_incomplete_match = false;  // Track <8 bit matches for efficiency check
     245                 :             : 
     246         [ +  + ]:     1820464 :     while (pos != endpos) {
     247                 :             :         // There was a jump into the middle of the previous instruction
     248   [ +  +  +  - ]:     1820463 :         if (!jumps.empty() && pos >= jumps.back().first) return false;
     249                 :             : 
     250                 :     1820463 :         Instruction opcode = DecodeType(pos, asmap);
     251         [ +  + ]:     1820463 :         if (opcode == Instruction::RETURN) {
     252                 :             :             // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
     253         [ +  - ]:      434989 :             if (prevopcode == Instruction::DEFAULT) return false;
     254                 :      434989 :             uint32_t asn = DecodeASN(pos, asmap);
     255         [ +  - ]:      434989 :             if (asn == INVALID) return false; // ASN straddles EOF
     256         [ +  + ]:      434989 :             if (jumps.empty()) {
     257                 :             :                 // Nothing to execute anymore
     258         [ +  - ]:           7 :                 if (endpos - pos > 7) return false; // Excessive padding
     259         [ +  + ]:          25 :                 while (pos != endpos) {
     260         [ +  - ]:          18 :                     if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
     261                 :             :                 }
     262                 :             :                 return true; // Sanely reached EOF
     263                 :             :             } else {
     264                 :             :                 // Continue by pretending we jumped to the next instruction
     265         [ +  - ]:      434982 :                 if (pos != jumps.back().first) return false; // Unreachable code
     266                 :      434982 :                 bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
     267                 :      434982 :                 jumps.pop_back();
     268                 :      434982 :                 prevopcode = Instruction::JUMP;
     269                 :             :             }
     270         [ +  + ]:     1385474 :         } else if (opcode == Instruction::JUMP) {
     271                 :      434982 :             uint32_t jump = DecodeJump(pos, asmap);
     272         [ +  - ]:      434982 :             if (jump == INVALID) return false; // Jump offset straddles EOF
     273         [ +  - ]:      434982 :             if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
     274         [ +  - ]:      434982 :             if (bits == 0) return false; // Consuming bits past the end of the input
     275                 :      434982 :             --bits;
     276                 :      434982 :             uint32_t jump_offset = pos + jump;
     277   [ +  +  +  - ]:      434982 :             if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
     278         [ +  - ]:      434982 :             jumps.emplace_back(jump_offset, bits);  // Queue jump target for validation
     279                 :             :             prevopcode = Instruction::JUMP;
     280         [ +  + ]:      950492 :         } else if (opcode == Instruction::MATCH) {
     281                 :      586336 :             uint32_t match = DecodeMatch(pos, asmap);
     282         [ +  - ]:      586336 :             if (match == INVALID) return false; // Match bits straddle EOF
     283         [ +  - ]:      586336 :             int matchlen = std::bit_width(match) - 1;
     284         [ +  + ]:      586336 :             if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
     285                 :             :             // Within a sequence of matches only at most one should be incomplete
     286         [ +  - ]:      586336 :             if (matchlen < 8 && had_incomplete_match) return false;
     287                 :      586336 :             had_incomplete_match = (matchlen < 8);
     288         [ +  - ]:      586336 :             if (bits < matchlen) return false; // Consuming bits past the end of the input
     289                 :      586336 :             bits -= matchlen;
     290                 :      586336 :             prevopcode = Instruction::MATCH;
     291         [ +  - ]:      364156 :         } else if (opcode == Instruction::DEFAULT) {
     292                 :             :             // There should not be two successive DEFAULTs (they could be combined into one)
     293         [ +  - ]:      364156 :             if (prevopcode == Instruction::DEFAULT) return false;
     294                 :      364156 :             uint32_t asn = DecodeASN(pos, asmap);
     295         [ +  - ]:      364156 :             if (asn == INVALID) return false; // ASN straddles EOF
     296                 :             :             prevopcode = Instruction::DEFAULT;
     297                 :             :         } else {
     298                 :             :             return false; // Instruction straddles EOF
     299                 :             :         }
     300                 :             :     }
     301                 :             :     return false; // Reached EOF without RETURN instruction
     302                 :           8 : }
     303                 :             : 
     304                 :             : /**
     305                 :             :  * Provides a safe interface for validating ASMap data before use.
     306                 :             :  * Returns true if the data is valid for 128 bits long inputs.
     307                 :             :  */
     308                 :           8 : bool CheckStandardAsmap(const std::span<const std::byte> data)
     309                 :             : {
     310         [ +  + ]:           8 :     if (!SanityCheckAsmap(data, 128)) {
     311                 :           1 :         LogWarning("Sanity check of asmap data failed\n");
     312                 :           1 :         return false;
     313                 :             :     }
     314                 :             :     return true;
     315                 :             : }
     316                 :             : 
     317                 :             : /**
     318                 :             :  * Loads an ASMap file from disk and validates it.
     319                 :             :  */
     320                 :           6 : std::vector<std::byte> DecodeAsmap(fs::path path)
     321                 :             : {
     322                 :           6 :     FILE *filestr = fsbridge::fopen(path, "rb");
     323                 :          12 :     AutoFile file{filestr};
     324         [ -  + ]:           6 :     if (file.IsNull()) {
     325         [ #  # ]:           0 :         LogWarning("Failed to open asmap file from disk");
     326                 :           0 :         return {};
     327                 :             :     }
     328         [ +  - ]:           6 :     int64_t length{file.size()};
     329   [ -  +  +  - ]:          12 :     LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
     330                 :             : 
     331                 :             :     // Read entire file into memory
     332         [ +  - ]:           6 :     std::vector<std::byte> buffer(length);
     333   [ -  +  +  - ]:           6 :     file.read(buffer);
     334                 :             : 
     335   [ -  +  +  -  :           6 :     if (!CheckStandardAsmap(buffer)) {
                   +  + ]
     336   [ -  +  +  - ]:           2 :         LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
     337                 :           1 :         return {};
     338                 :             :     }
     339                 :             : 
     340                 :           5 :     return buffer;
     341                 :           6 : }
     342                 :             : 
     343                 :             : /**
     344                 :             :  * Computes SHA256 hash of ASMap data for versioning and consistency checks.
     345                 :             :  */
     346                 :        2149 : uint256 AsmapVersion(const std::span<const std::byte> data)
     347                 :             : {
     348         [ +  + ]:        2149 :     if (data.empty()) return {};
     349                 :             : 
     350                 :          26 :     HashWriter asmap_hasher;
     351                 :          26 :     asmap_hasher << data;
     352                 :          26 :     return asmap_hasher.GetHash();
     353                 :             : }
        

Generated by: LCOV version 2.0-1