LCOV - code coverage report
Current view: top level - src/util - asmap.cpp (source / functions) Coverage Total Hit
Test: fuzz_coverage.info Lines: 87.3 % 126 110
Test Date: 2026-02-10 04:14:08 Functions: 90.0 % 10 9
Branches: 70.3 % 138 97

             Branch data     Line data    Source code
       1                 :             : // Copyright (c) 2019-present The Bitcoin Core developers
       2                 :             : // Distributed under the MIT software license, see the accompanying
       3                 :             : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
       4                 :             : 
       5                 :             : #include <util/asmap.h>
       6                 :             : 
       7                 :             : #include <clientversion.h>
       8                 :             : #include <hash.h>
       9                 :             : #include <serialize.h>
      10                 :             : #include <streams.h>
      11                 :             : #include <uint256.h>
      12                 :             : #include <util/fs.h>
      13                 :             : #include <util/log.h>
      14                 :             : 
      15                 :             : #include <algorithm>
      16                 :             : #include <bit>
      17                 :             : #include <cassert>
      18                 :             : #include <cstddef>
      19                 :             : #include <cstdio>
      20                 :             : #include <span>
      21                 :             : #include <utility>
      22                 :             : #include <vector>
      23                 :             : 
      24                 :             : /*
      25                 :             :  * ASMap (Autonomous System Map) Implementation
      26                 :             :  *
      27                 :             :  * Provides a compressed mapping from IP address prefixes to Autonomous System Numbers (ASNs).
      28                 :             :  * Uses a binary trie structure encoded as bytecode instructions that are interpreted
      29                 :             :  * at runtime to find the ASN for a given IP address.
      30                 :             :  *
      31                 :             :  * The format of the asmap data is a bit-packed binary format where the entire mapping
      32                 :             :  * is treated as a continuous sequence of bits. Instructions and their arguments are
      33                 :             :  * encoded using variable numbers of bits and concatenated together without regard for
      34                 :             :  * byte boundaries. The bits are stored in bytes using little-endian bit ordering.
      35                 :             :  *
      36                 :             :  * The data structure internally represents the mapping as a binary trie where:
      37                 :             :  * - Unassigned subnets (no ASN mapping present) map to 0
      38                 :             :  * - Subnets mapped entirely to one ASN become leaf nodes
      39                 :             :  * - Subnets whose lower and upper halves have different mappings branch into subtrees
      40                 :             :  *
      41                 :             :  * The encoding uses variable-length integers and four instruction types (RETURN, JUMP,
      42                 :             :  * MATCH, DEFAULT) to efficiently represent the trie.
      43                 :             :  */
      44                 :             : 
      45                 :             : namespace {
      46                 :             : 
      47                 :             : // Indicates decoding errors or invalid data
      48                 :             : constexpr uint32_t INVALID = 0xFFFFFFFF;
      49                 :             : 
      50                 :             : /**
      51                 :             :  * Extract a single bit from byte array using little-endian bit ordering (LSB first).
      52                 :             :  * Used for ASMap data.
      53                 :             :  */
      54                 :   526753642 : inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
      55                 :             : {
      56                 :   526753642 :     const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
      57                 :   526753642 :     ++bitpos;
      58         [ +  + ]:   145369377 :     return bit;
      59                 :             : }
      60                 :             : 
      61                 :             : /**
      62                 :             :  * Extract a single bit from byte array using big-endian bit ordering (MSB first).
      63                 :             :  * Used for IP addresses to match network byte order conventions.
      64                 :             :  */
      65                 :    18589009 : inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
      66                 :             : {
      67                 :    18589009 :     const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
      68                 :    18589009 :     ++bitpos;
      69   [ +  +  +  + ]:    18589009 :     return bit;
      70                 :             : }
      71                 :             : 
      72                 :             : /**
      73                 :             :  * Variable-length integer decoder using a custom encoding scheme.
      74                 :             :  *
      75                 :             :  * The encoding is easiest to describe using an example. Let's say minval=100 and
      76                 :             :  * bit_sizes=[4,2,2,3]. In that case:
      77                 :             :  * - x in [100..115]: encoded as [0] + [4-bit BE encoding of (x-100)]
      78                 :             :  * - x in [116..119]: encoded as [1,0] + [2-bit BE encoding of (x-116)]
      79                 :             :  * - x in [120..123]: encoded as [1,1,0] + [2-bit BE encoding of (x-120)]
      80                 :             :  * - x in [124..131]: encoded as [1,1,1] + [3-bit BE encoding of (x-124)]
      81                 :             :  *
      82                 :             :  * In general, every number is encoded as:
      83                 :             :  * - First, k "1"-bits, where k is the class the number falls in
      84                 :             :  * - Then, a "0"-bit, unless k is the highest class
      85                 :             :  * - Lastly, bit_sizes[k] bits encoding in big endian the position within that class
      86                 :             :  */
      87                 :    68338508 : uint32_t DecodeBits(size_t& bitpos, const std::span<const std::byte> data, uint8_t minval, const std::span<const uint8_t> bit_sizes)
      88                 :             : {
      89                 :    68338508 :     uint32_t val = minval;  // Start with minimum encodable value
      90                 :    68338508 :     bool bit;
      91         [ +  - ]:   162098512 :     for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
      92                 :             :         // Read continuation bit to determine if we're in this class
      93         [ +  + ]:   162098512 :         if (bit_sizes_it + 1 != bit_sizes.end()) {  // Unless we're in the last class
      94         [ +  + ]:   145365113 :             if (bitpos >= data.size() * 8) break;
      95                 :   145358907 :             bit = ConsumeBitLE(bitpos, data);
      96                 :             :         } else {
      97                 :             :             bit = 0;  // Last class has no continuation bit
      98                 :             :         }
      99         [ +  + ]:   162092306 :         if (bit) {
     100                 :             :             // If the value will not fit in this class, subtract its range from val,
     101                 :             :             // emit a "1" bit and continue with the next class
     102                 :    93760004 :             val += (1 << *bit_sizes_it);  // Add size of this class
     103                 :             :         } else {
     104                 :             :             // Decode the position within this class in big endian
     105         [ +  + ]:   449716567 :             for (int b = 0; b < *bit_sizes_it; b++) {
     106         [ +  + ]:   381450193 :                 if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
     107                 :   381384265 :                 bit = ConsumeBitLE(bitpos, data);
     108                 :   381384265 :                 val += bit << (*bit_sizes_it - 1 - b); // Big-endian within the class
     109                 :             :             }
     110                 :             :             return val;
     111                 :             :         }
     112                 :             :     }
     113                 :             :     return INVALID; // Reached EOF in exponent
     114                 :             : }
     115                 :             : 
     116                 :             : /**
     117                 :             :  * Instruction Set
     118                 :             :  *
     119                 :             :  * The instruction set is designed to efficiently encode a binary trie
     120                 :             :  * that maps IP prefixes to ASNs. Each instruction type serves a specific
     121                 :             :  * role in trie traversal and evaluation.
     122                 :             :  */
     123                 :             : enum class Instruction : uint32_t
     124                 :             : {
     125                 :             :     // A return instruction, encoded as [0], returns a constant ASN.
     126                 :             :     // It is followed by an integer using the ASN encoding.
     127                 :             :     RETURN = 0,
     128                 :             :     // A jump instruction, encoded as [1,0], inspects the next unused bit in the input
     129                 :             :     // and either continues execution (if 0), or skips a specified number of bits (if 1).
     130                 :             :     // It is followed by an integer using jump encoding.
     131                 :             :     JUMP = 1,
     132                 :             :     // A match instruction, encoded as [1,1,0], inspects 1 or more of the next unused bits
     133                 :             :     // in the input. If they all match, execution continues. If not, the default ASN is returned
     134                 :             :     // (or 0 if unset). The match value encodes both the pattern and its length.
     135                 :             :     MATCH = 2,
     136                 :             :     // A default instruction, encoded as [1,1,1], sets the default variable to its argument,
     137                 :             :     // and continues execution. It is followed by an integer in ASN encoding.
     138                 :             :     DEFAULT = 3,
     139                 :             : };
     140                 :             : 
     141                 :             : // Instruction type encoding: RETURN=[0], JUMP=[1,0], MATCH=[1,1,0], DEFAULT=[1,1,1]
     142                 :             : constexpr uint8_t TYPE_BIT_SIZES[]{0, 0, 1};
     143                 :    34173622 : Instruction DecodeType(size_t& bitpos, const std::span<const std::byte> data)
     144                 :             : {
     145                 :    34173622 :     return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
     146                 :             : }
     147                 :             : 
     148                 :             : // ASN encoding: Can encode ASNs from 1 to ~16.7 million.
     149                 :             : // Uses variable-length encoding optimized for real-world ASN distribution.
     150                 :             : // ASN 0 is reserved and used if there isn't a match.
     151                 :             : constexpr uint8_t ASN_BIT_SIZES[]{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
     152                 :    17521102 : uint32_t DecodeASN(size_t& bitpos, const std::span<const std::byte> data)
     153                 :             : {
     154                 :    17521102 :     return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
     155                 :             : }
     156                 :             : 
     157                 :             : // MATCH argument: Values in [2, 511]. The highest set bit determines the match length
     158                 :             : // n ∈ [1,8]; the lower n-1 bits are the pattern to compare.
     159                 :             : constexpr uint8_t MATCH_BIT_SIZES[]{1, 2, 3, 4, 5, 6, 7, 8};
     160                 :     8615730 : uint32_t DecodeMatch(size_t& bitpos, const std::span<const std::byte> data)
     161                 :             : {
     162                 :     8615730 :     return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
     163                 :             : }
     164                 :             : 
     165                 :             : // JUMP offset: Minimum value 17. Variable-length coded and may be large
     166                 :             : // for skipping big subtrees.
     167                 :             : constexpr uint8_t JUMP_BIT_SIZES[]{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
     168                 :     8028054 : uint32_t DecodeJump(size_t& bitpos, const std::span<const std::byte> data)
     169                 :             : {
     170                 :     8028054 :     return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
     171                 :             : }
     172                 :             : 
     173                 :             : } // anonymous namespace
     174                 :             : 
     175                 :             : /**
     176                 :             :  * Execute the ASMap bytecode to find the ASN for an IP
     177                 :             :  *
     178                 :             :  * This function interprets the asmap bytecode and uses bits from the IP
     179                 :             :  * address to navigate through the encoded trie structure, ultimately
     180                 :             :  * returning an ASN value.
     181                 :             :  */
     182                 :    13820136 : uint32_t Interpret(const std::span<const std::byte> asmap, const std::span<const std::byte> ip)
     183                 :             : {
     184                 :    13820136 :     size_t pos{0};
     185                 :    13820136 :     const size_t endpos{asmap.size() * 8};
     186                 :    13820136 :     uint8_t ip_bit{0};
     187                 :    13820136 :     const uint8_t ip_bits_end = ip.size() * 8;
     188                 :    13820136 :     uint32_t default_asn = 0;
     189         [ +  - ]:    27106176 :     while (pos < endpos) {
     190                 :    27106176 :         Instruction opcode = DecodeType(pos, asmap);
     191         [ +  + ]:    27106176 :         if (opcode == Instruction::RETURN) {
     192                 :             :             // Found leaf node - return the ASN
     193                 :     8890379 :             uint32_t asn = DecodeASN(pos, asmap);
     194         [ -  + ]:     8890379 :             if (asn == INVALID) break; // ASN straddles EOF
     195                 :             :             return asn;
     196         [ +  + ]:    18215797 :         } else if (opcode == Instruction::JUMP) {
     197                 :             :             // Binary branch: if IP bit is 1, jump forward; else continue
     198                 :     5960880 :             uint32_t jump = DecodeJump(pos, asmap);
     199         [ +  - ]:     5960880 :             if (jump == INVALID) break; // Jump offset straddles EOF
     200         [ +  - ]:     5960880 :             if (ip_bit == ip_bits_end) break; // No input bits left
     201         [ +  - ]:     5960880 :             if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
     202         [ +  + ]:     5960880 :             if (ConsumeBitBE(ip_bit, ip)) {  // Check next IP bit (big-endian)
     203                 :     2112279 :                 pos += jump;  // Bit = 1: skip to right subtree
     204                 :             :             }
     205                 :             :             // Bit = 0: fall through to left subtree
     206         [ +  + ]:    12254917 :         } else if (opcode == Instruction::MATCH) {
     207                 :             :             // Compare multiple IP bits against a pattern
     208                 :             :             // The match value encodes both length and pattern:
     209                 :             :             // - highest set bit position determines length (bit_width - 1)
     210                 :             :             // - lower bits contain the pattern to compare
     211                 :     7508788 :             uint32_t match = DecodeMatch(pos, asmap);
     212         [ +  - ]:     7508788 :             if (match == INVALID) break; // Match bits straddle EOF
     213         [ +  - ]:     7508788 :             int matchlen = std::bit_width(match) - 1;  // An n-bit value matches n-1 input bits
     214         [ +  - ]:     7508788 :             if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
     215         [ +  + ]:    15207160 :             for (int bit = 0; bit < matchlen; bit++) {
     216         [ +  + ]:    12628129 :                 if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
     217                 :             :                     return default_asn;  // Pattern mismatch - use default
     218                 :             :                 }
     219                 :             :             }
     220                 :             :             // Pattern matched - continue execution
     221         [ +  - ]:     4746129 :         } else if (opcode == Instruction::DEFAULT) {
     222                 :             :             // Update the default ASN for subsequent MATCH failures
     223                 :     4746129 :             default_asn = DecodeASN(pos, asmap);
     224         [ +  - ]:     4746129 :             if (default_asn == INVALID) break; // ASN straddles EOF
     225                 :             :         } else {
     226                 :             :             break; // Instruction straddles EOF
     227                 :             :         }
     228                 :             :     }
     229                 :             :     // Reached EOF without RETURN, or aborted (see any of the breaks above)
     230                 :             :     // - should have been caught by SanityCheckAsmap below
     231                 :           0 :     assert(false);
     232                 :             :     return 0; // 0 is not a valid ASN
     233                 :             : }
     234                 :             : 
     235                 :             : /**
     236                 :             :  * Validates ASMap structure by simulating all possible execution paths.
     237                 :             :  * Ensures well-formed bytecode, valid jumps, and proper termination.
     238                 :             :  */
     239                 :      128130 : bool SanityCheckAsmap(const std::span<const std::byte> asmap, int bits)
     240                 :             : {
     241                 :      128130 :     size_t pos{0};
     242         [ +  - ]:      128130 :     const size_t endpos{asmap.size() * 8};
     243                 :      128130 :     std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
     244         [ +  - ]:      128130 :     jumps.reserve(bits);
     245                 :             :     Instruction prevopcode = Instruction::JUMP;
     246                 :             :     bool had_incomplete_match = false;  // Track <8 bit matches for efficiency check
     247                 :             : 
     248         [ +  + ]:     7075134 :     while (pos != endpos) {
     249                 :             :         // There was a jump into the middle of the previous instruction
     250   [ +  +  +  + ]:     7067510 :         if (!jumps.empty() && pos >= jumps.back().first) return false;
     251                 :             : 
     252                 :     7067446 :         Instruction opcode = DecodeType(pos, asmap);
     253         [ +  + ]:     7067446 :         if (opcode == Instruction::RETURN) {
     254                 :             :             // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
     255         [ +  + ]:     2051032 :             if (prevopcode == Instruction::DEFAULT) return false;
     256                 :     2043685 :             uint32_t asn = DecodeASN(pos, asmap);
     257         [ +  + ]:     2043685 :             if (asn == INVALID) return false; // ASN straddles EOF
     258         [ +  + ]:     2029769 :             if (jumps.empty()) {
     259                 :             :                 // Nothing to execute anymore
     260         [ +  + ]:        5401 :                 if (endpos - pos > 7) return false; // Excessive padding
     261         [ +  + ]:       13857 :                 while (pos != endpos) {
     262         [ +  + ]:       10470 :                     if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
     263                 :             :                 }
     264                 :             :                 return true; // Sanely reached EOF
     265                 :             :             } else {
     266                 :             :                 // Continue by pretending we jumped to the next instruction
     267         [ +  + ]:     2024368 :                 if (pos != jumps.back().first) return false; // Unreachable code
     268                 :     2024180 :                 bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
     269                 :     2024180 :                 jumps.pop_back();
     270                 :     2024180 :                 prevopcode = Instruction::JUMP;
     271                 :             :             }
     272         [ +  + ]:     5016414 :         } else if (opcode == Instruction::JUMP) {
     273                 :     2067174 :             uint32_t jump = DecodeJump(pos, asmap);
     274         [ +  + ]:     2067174 :             if (jump == INVALID) return false; // Jump offset straddles EOF
     275         [ +  + ]:     2058824 :             if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
     276         [ +  + ]:     2025007 :             if (bits == 0) return false; // Consuming bits past the end of the input
     277                 :     2024988 :             --bits;
     278                 :     2024988 :             uint32_t jump_offset = pos + jump;
     279   [ +  +  +  + ]:     2024988 :             if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
     280         [ +  - ]:     2024823 :             jumps.emplace_back(jump_offset, bits);  // Queue jump target for validation
     281                 :             :             prevopcode = Instruction::JUMP;
     282         [ +  + ]:     2949240 :         } else if (opcode == Instruction::MATCH) {
     283                 :     1106942 :             uint32_t match = DecodeMatch(pos, asmap);
     284         [ +  + ]:     1106942 :             if (match == INVALID) return false; // Match bits straddle EOF
     285         [ +  - ]:     1104319 :             int matchlen = std::bit_width(match) - 1;
     286         [ +  + ]:     1104319 :             if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
     287                 :             :             // Within a sequence of matches only at most one should be incomplete
     288         [ +  + ]:     1104319 :             if (matchlen < 8 && had_incomplete_match) return false;
     289                 :     1103282 :             had_incomplete_match = (matchlen < 8);
     290         [ +  + ]:     1103282 :             if (bits < matchlen) return false; // Consuming bits past the end of the input
     291                 :     1103254 :             bits -= matchlen;
     292                 :     1103254 :             prevopcode = Instruction::MATCH;
     293         [ +  + ]:     1842298 :         } else if (opcode == Instruction::DEFAULT) {
     294                 :             :             // There should not be two successive DEFAULTs (they could be combined into one)
     295         [ +  + ]:     1841215 :             if (prevopcode == Instruction::DEFAULT) return false;
     296                 :     1840909 :             uint32_t asn = DecodeASN(pos, asmap);
     297         [ +  + ]:     1840909 :             if (asn == INVALID) return false; // ASN straddles EOF
     298                 :             :             prevopcode = Instruction::DEFAULT;
     299                 :             :         } else {
     300                 :             :             return false; // Instruction straddles EOF
     301                 :             :         }
     302                 :             :     }
     303                 :             :     return false; // Reached EOF without RETURN instruction
     304                 :      128130 : }
     305                 :             : 
     306                 :             : /**
     307                 :             :  * Provides a safe interface for validating ASMap data before use.
     308                 :             :  * Returns true if the data is valid for 128 bits long inputs.
     309                 :             :  */
     310                 :       11453 : bool CheckStandardAsmap(const std::span<const std::byte> data)
     311                 :             : {
     312         [ +  + ]:       11453 :     if (!SanityCheckAsmap(data, 128)) {
     313                 :        8252 :         LogWarning("Sanity check of asmap data failed\n");
     314                 :        8252 :         return false;
     315                 :             :     }
     316                 :             :     return true;
     317                 :             : }
     318                 :             : 
     319                 :             : /**
     320                 :             :  * Loads an ASMap file from disk and validates it.
     321                 :             :  */
     322                 :           0 : std::vector<std::byte> DecodeAsmap(fs::path path)
     323                 :             : {
     324                 :           0 :     FILE *filestr = fsbridge::fopen(path, "rb");
     325                 :           0 :     AutoFile file{filestr};
     326         [ #  # ]:           0 :     if (file.IsNull()) {
     327         [ #  # ]:           0 :         LogWarning("Failed to open asmap file from disk");
     328                 :           0 :         return {};
     329                 :             :     }
     330         [ #  # ]:           0 :     int64_t length{file.size()};
     331   [ #  #  #  # ]:           0 :     LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
     332                 :             : 
     333                 :             :     // Read entire file into memory
     334         [ #  # ]:           0 :     std::vector<std::byte> buffer(length);
     335   [ #  #  #  # ]:           0 :     file.read(buffer);
     336                 :             : 
     337   [ #  #  #  #  :           0 :     if (!CheckStandardAsmap(buffer)) {
                   #  # ]
     338   [ #  #  #  # ]:           0 :         LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
     339                 :           0 :         return {};
     340                 :             :     }
     341                 :             : 
     342                 :           0 :     return buffer;
     343                 :           0 : }
     344                 :             : 
     345                 :             : /**
     346                 :             :  * Computes SHA256 hash of ASMap data for versioning and consistency checks.
     347                 :             :  */
     348                 :        8757 : uint256 AsmapVersion(const std::span<const std::byte> data)
     349                 :             : {
     350         [ +  + ]:        8757 :     if (data.empty()) return {};
     351                 :             : 
     352                 :        3230 :     HashWriter asmap_hasher;
     353                 :        3230 :     asmap_hasher << data;
     354                 :        3230 :     return asmap_hasher.GetHash();
     355                 :             : }
        

Generated by: LCOV version 2.0-1