LCOV - total_coverage.info - src/util/strencodings.h

LCOV - code coverage report

Current view:	top level - src/util - strencodings.h (source / functions)		Coverage	Total	Hit
Test:	total_coverage.info	Lines:	100.0 %	52	52
Test Date:	2025-08-01 05:08:13	Functions:	100.0 %	33	33
		Branches:	66.4 %	116	77

             Branch data     Line data    Source code

       1                 :             : // Copyright (c) 2009-2010 Satoshi Nakamoto
       2                 :             : // Copyright (c) 2009-present The Bitcoin Core developers
       3                 :             : // Distributed under the MIT software license, see the accompanying
       4                 :             : // file COPYING or http://www.opensource.org/licenses/mit-license.php.
       5                 :             : 
       6                 :             : /**
       7                 :             :  * Utilities for converting data from/to strings.
       8                 :             :  */
       9                 :             : #ifndef BITCOIN_UTIL_STRENCODINGS_H
      10                 :             : #define BITCOIN_UTIL_STRENCODINGS_H
      11                 :             : 
      12                 :             : #include <crypto/hex_base.h> // IWYU pragma: export
      13                 :             : #include <span.h>
      14                 :             : #include <util/string.h>
      15                 :             : 
      16                 :             : #include <array>
      17                 :             : #include <bit>
      18                 :             : #include <charconv>
      19                 :             : #include <cstddef>
      20                 :             : #include <cstdint>
      21                 :             : #include <limits>
      22                 :             : #include <optional>
      23                 :             : #include <string>      // IWYU pragma: export
      24                 :             : #include <string_view> // IWYU pragma: export
      25                 :             : #include <system_error>
      26                 :             : #include <type_traits>
      27                 :             : #include <vector>
      28                 :             : 
      29                 :             : /** Used by SanitizeString() */
      30                 :             : enum SafeChars
      31                 :             : {
      32                 :             :     SAFE_CHARS_DEFAULT, //!< The full set of allowed chars
      33                 :             :     SAFE_CHARS_UA_COMMENT, //!< BIP-0014 subset
      34                 :             :     SAFE_CHARS_FILENAME, //!< Chars allowed in filenames
      35                 :             :     SAFE_CHARS_URI, //!< Chars allowed in URIs (RFC 3986)
      36                 :             : };
      37                 :             : 
      38                 :             : /**
      39                 :             :  * Used by ParseByteUnits()
      40                 :             :  * Lowercase base 1000
      41                 :             :  * Uppercase base 1024
      42                 :             : */
      43                 :             : enum class ByteUnit : uint64_t {
      44                 :             :     NOOP = 1ULL,
      45                 :             :     k = 1000ULL,
      46                 :             :     K = 1024ULL,
      47                 :             :     m = 1'000'000ULL,
      48                 :             :     M = 1ULL << 20,
      49                 :             :     g = 1'000'000'000ULL,
      50                 :             :     G = 1ULL << 30,
      51                 :             :     t = 1'000'000'000'000ULL,
      52                 :             :     T = 1ULL << 40,
      53                 :             : };
      54                 :             : 
      55                 :             : /**
      56                 :             : * Remove unsafe chars. Safe chars chosen to allow simple messages/URLs/email
      57                 :             : * addresses, but avoid anything even possibly remotely dangerous like & or >
      58                 :             : * @param[in] str    The string to sanitize
      59                 :             : * @param[in] rule   The set of safe chars to choose (default: least restrictive)
      60                 :             : * @return           A new string without unsafe chars
      61                 :             : */
      62                 :             : std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
      63                 :             : /** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. Returns nullopt on invalid input. */
      64                 :             : template <typename Byte = std::byte>
      65                 :             : std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
      66                 :             : /** Like TryParseHex, but returns an empty vector on invalid input. */
      67                 :             : template <typename Byte = uint8_t>
      68                 :       91620 : std::vector<Byte> ParseHex(std::string_view hex_str)
      69                 :             : {
      70                 :       91620 :     return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
      71                 :             : }
      72                 :             : /* Returns true if each character in str is a hex character, and has an even
      73                 :             :  * number of hex digits.*/
      74                 :             : bool IsHex(std::string_view str);
      75                 :             : std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str);
      76                 :             : std::string EncodeBase64(std::span<const unsigned char> input);
      77                 :          79 : inline std::string EncodeBase64(std::span<const std::byte> input) { return EncodeBase64(MakeUCharSpan(input)); }
      78                 :        1637 : inline std::string EncodeBase64(std::string_view str) { return EncodeBase64(MakeUCharSpan(str)); }
      79                 :             : std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str);
      80                 :             : 
      81                 :             : /**
      82                 :             :  * Base32 encode.
      83                 :             :  * If `pad` is true, then the output will be padded with '=' so that its length
      84                 :             :  * is a multiple of 8.
      85                 :             :  */
      86                 :             : std::string EncodeBase32(std::span<const unsigned char> input, bool pad = true);
      87                 :             : 
      88                 :             : /**
      89                 :             :  * Base32 encode.
      90                 :             :  * If `pad` is true, then the output will be padded with '=' so that its length
      91                 :             :  * is a multiple of 8.
      92                 :             :  */
      93                 :             : std::string EncodeBase32(std::string_view str, bool pad = true);
      94                 :             : 
      95                 :             : /**
      96                 :             :  * Splits socket address string into host string and port value.
      97                 :             :  * Validates port value.
      98                 :             :  *
      99                 :             :  * @param[in] in        The socket address string to split.
     100                 :             :  * @param[out] portOut  Port-portion of the input, if found and parsable.
     101                 :             :  * @param[out] hostOut  Host-portion of the input, if found.
     102                 :             :  * @return              true if port-portion is absent or within its allowed range, otherwise false
     103                 :             :  */
     104                 :             : bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut);
     105                 :             : 
     106                 :             : // LocaleIndependentAtoi is provided for backwards compatibility reasons.
     107                 :             : //
     108                 :             : // New code should use ToIntegral.
     109                 :             : //
     110                 :             : // The goal of LocaleIndependentAtoi is to replicate the defined behaviour of
     111                 :             : // std::atoi as it behaves under the "C" locale, and remove some undefined
     112                 :             : // behavior. If the parsed value is bigger than the integer type's maximum
     113                 :             : // value, or smaller than the integer type's minimum value, std::atoi has
     114                 :             : // undefined behavior, while this function returns the maximum or minimum
     115                 :             : // values, respectively.
     116                 :             : template <typename T>
     117                 :      355052 : T LocaleIndependentAtoi(std::string_view str)
     118                 :             : {
     119                 :             :     static_assert(std::is_integral_v<T>);
     120                 :             :     T result;
     121                 :             :     // Emulate atoi(...) handling of white space and leading +/-.
     122         [ +  + ]:      355052 :     std::string_view s = util::TrimStringView(str);
     123   [ +  +  +  + ]:      355052 :     if (!s.empty() && s[0] == '+') {
     124   [ +  -  +  + ]:          24 :         if (s.length() >= 2 && s[1] == '-') {
     125                 :             :             return 0;
     126                 :             :         }
     127                 :          18 :         s = s.substr(1);
     128                 :             :     }
     129                 :      355046 :     auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
     130         [ +  + ]:      355046 :     if (error_condition == std::errc::result_out_of_range) {
     131   [ +  -  +  + ]:          38 :         if (s.length() >= 1 && s[0] == '-') {
     132                 :             :             // Saturate underflow, per strtoll's behavior.
     133                 :             :             return std::numeric_limits<T>::min();
     134                 :             :         } else {
     135                 :             :             // Saturate overflow, per strtoll's behavior.
     136                 :             :             return std::numeric_limits<T>::max();
     137                 :             :         }
     138         [ +  + ]:      355008 :     } else if (error_condition != std::errc{}) {
     139                 :             :         return 0;
     140                 :             :     }
     141                 :      354921 :     return result;
     142                 :             : }
     143                 :             : 
     144                 :             : /**
     145                 :             :  * Tests if the given character is a decimal digit.
     146                 :             :  * @param[in] c     character to test
     147                 :             :  * @return          true if the argument is a decimal digit; otherwise false.
     148                 :             :  */
     149                 :      905473 : constexpr bool IsDigit(char c)
     150                 :             : {
     151   [ +  +  +  +  :      905473 :     return c >= '0' && c <= '9';
           +  + ][ +  +  
          +  -  +  +  +  
                -  +  - ]
     152                 :             : }
     153                 :             : 
     154                 :             : /**
     155                 :             :  * Tests if the given character is a whitespace character. The whitespace characters
     156                 :             :  * are: space, form-feed ('\f'), newline ('\n'), carriage return ('\r'), horizontal
     157                 :             :  * tab ('\t'), and vertical tab ('\v').
     158                 :             :  *
     159                 :             :  * This function is locale independent. Under the C locale this function gives the
     160                 :             :  * same result as std::isspace.
     161                 :             :  *
     162                 :             :  * @param[in] c     character to test
     163                 :             :  * @return          true if the argument is a whitespace character; otherwise false
     164                 :             :  */
     165                 :  1725835713 : constexpr inline bool IsSpace(char c) noexcept {
     166   [ +  +  +  + ]:  1725835713 :     return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
     167                 :             : }
     168                 :             : 
     169                 :             : /**
     170                 :             :  * Convert string to integral type T. Leading whitespace, a leading +, or any
     171                 :             :  * trailing character fail the parsing. The required format expressed as regex
     172                 :             :  * is `-?[0-9]+`. The minus sign is only permitted for signed integer types.
     173                 :             :  *
     174                 :             :  * @returns std::nullopt if the entire string could not be parsed, or if the
     175                 :             :  *   parsed value is not in the range representable by the type T.
     176                 :             :  */
     177                 :             : template <typename T>
     178                 :       41821 : std::optional<T> ToIntegral(std::string_view str)
     179                 :             : {
     180                 :             :     static_assert(std::is_integral_v<T>);
     181                 :             :     T result;
     182                 :       41821 :     const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result);
     183   [ +  +  +  + ]:       41821 :     if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
     184                 :         669 :         return std::nullopt;
     185                 :             :     }
     186                 :       41152 :     return result;
     187                 :             : }
     188                 :             : 
     189                 :             : /**
     190                 :             :  * Format a paragraph of text to a fixed width, adding spaces for
     191                 :             :  * indentation to any added line.
     192                 :             :  */
     193                 :             : std::string FormatParagraph(std::string_view in, size_t width = 79, size_t indent = 0);
     194                 :             : 
     195                 :             : /**
     196                 :             :  * Timing-attack-resistant comparison.
     197                 :             :  * Takes time proportional to length
     198                 :             :  * of first argument.
     199                 :             :  */
     200                 :             : template <typename T>
     201         [ +  + ]:      384276 : bool TimingResistantEqual(const T& a, const T& b)
     202                 :             : {
     203         [ +  + ]:      384276 :     if (b.size() == 0) return a.size() == 0;
     204                 :      384272 :     size_t accumulator = a.size() ^ b.size();
     205         [ +  + ]:    14597417 :     for (size_t i = 0; i < a.size(); i++)
     206                 :    14213145 :         accumulator |= size_t(a[i] ^ b[i%b.size()]);
     207                 :      384272 :     return accumulator == 0;
     208                 :             : }
     209                 :             : 
     210                 :             : /** Parse number as fixed point according to JSON number syntax.
     211                 :             :  * @returns true on success, false on error.
     212                 :             :  * @note The result must be in the range (-10^18,10^18), otherwise an overflow error will trigger.
     213                 :             :  */
     214                 :             : [[nodiscard]] bool ParseFixedPoint(std::string_view, int decimals, int64_t *amount_out);
     215                 :             : 
     216                 :             : namespace {
     217                 :             : /** Helper class for the default infn argument to ConvertBits (just returns the input). */
     218                 :             : struct IntIdentity
     219                 :             : {
     220                 :             :     [[maybe_unused]] int operator()(int x) const { return x; }
     221                 :             : };
     222                 :             : 
     223                 :             : } // namespace
     224                 :             : 
     225                 :             : /** Convert from one power-of-2 number base to another. */
     226                 :             : template<int frombits, int tobits, bool pad, typename O, typename It, typename I = IntIdentity>
     227                 :      261193 : bool ConvertBits(O outfn, It it, It end, I infn = {}) {
     228                 :      261193 :     size_t acc = 0;
     229                 :      261193 :     size_t bits = 0;
     230                 :      261193 :     constexpr size_t maxv = (1 << tobits) - 1;
     231                 :      261193 :     constexpr size_t max_acc = (1 << (frombits + tobits - 1)) - 1;
     232         [ +  + ]:    29409014 :     while (it != end) {
     233         [ +  + ]:    29147833 :         int v = infn(*it);
     234         [ +  + ]:    24742401 :         if (v < 0) return false;
     235                 :    29147821 :         acc = ((acc << frombits) | v) & max_acc;
     236                 :    29147821 :         bits += frombits;
     237         [ +  + ]:    53577938 :         while (bits >= tobits) {
     238                 :    24430117 :             bits -= tobits;
     239                 :    24430117 :             outfn((acc >> bits) & maxv);
     240                 :             :         }
     241                 :    29147821 :         ++it;
     242                 :             :     }
     243                 :             :     if (pad) {
     244         [ +  + ]:       53792 :         if (bits) outfn((acc << (tobits - bits)) & maxv);
     245   [ +  +  +  + ]:      207389 :     } else if (bits >= frombits || ((acc << (tobits - bits)) & maxv)) {
     246                 :           7 :         return false;
     247                 :             :     }
     248                 :             :     return true;
     249                 :             : }
     250                 :             : 
     251                 :             : /**
     252                 :             :  * Converts the given character to its lowercase equivalent.
     253                 :             :  * This function is locale independent. It only converts uppercase
     254                 :             :  * characters in the standard 7-bit ASCII range.
     255                 :             :  * This is a feature, not a limitation.
     256                 :             :  *
     257                 :             :  * @param[in] c     the character to convert to lowercase.
     258                 :             :  * @return          the lowercase equivalent of c; or the argument
     259                 :             :  *                  if no conversion is possible.
     260                 :             :  */
     261                 :       95878 : constexpr char ToLower(char c)
     262                 :             : {
     263         [ +  + ]:       95878 :     return (c >= 'A' && c <= 'Z' ? (c - 'A') + 'a' : c);
     264                 :             : }
     265                 :             : 
     266                 :             : /**
     267                 :             :  * Returns the lowercase equivalent of the given string.
     268                 :             :  * This function is locale independent. It only converts uppercase
     269                 :             :  * characters in the standard 7-bit ASCII range.
     270                 :             :  * This is a feature, not a limitation.
     271                 :             :  *
     272                 :             :  * @param[in] str   the string to convert to lowercase.
     273                 :             :  * @returns         lowercased equivalent of str
     274                 :             :  */
     275                 :             : std::string ToLower(std::string_view str);
     276                 :             : 
     277                 :             : /**
     278                 :             :  * Converts the given character to its uppercase equivalent.
     279                 :             :  * This function is locale independent. It only converts lowercase
     280                 :             :  * characters in the standard 7-bit ASCII range.
     281                 :             :  * This is a feature, not a limitation.
     282                 :             :  *
     283                 :             :  * @param[in] c     the character to convert to uppercase.
     284                 :             :  * @return          the uppercase equivalent of c; or the argument
     285                 :             :  *                  if no conversion is possible.
     286                 :             :  */
     287                 :        2055 : constexpr char ToUpper(char c)
     288                 :             : {
     289   [ +  -  +  + ]:        2055 :     return (c >= 'a' && c <= 'z' ? (c - 'a') + 'A' : c);
     290                 :             : }
     291                 :             : 
     292                 :             : /**
     293                 :             :  * Returns the uppercase equivalent of the given string.
     294                 :             :  * This function is locale independent. It only converts lowercase
     295                 :             :  * characters in the standard 7-bit ASCII range.
     296                 :             :  * This is a feature, not a limitation.
     297                 :             :  *
     298                 :             :  * @param[in] str   the string to convert to uppercase.
     299                 :             :  * @returns         UPPERCASED EQUIVALENT OF str
     300                 :             :  */
     301                 :             : std::string ToUpper(std::string_view str);
     302                 :             : 
     303                 :             : /**
     304                 :             :  * Capitalizes the first character of the given string.
     305                 :             :  * This function is locale independent. It only converts lowercase
     306                 :             :  * characters in the standard 7-bit ASCII range.
     307                 :             :  * This is a feature, not a limitation.
     308                 :             :  *
     309                 :             :  * @param[in] str   the string to capitalize.
     310                 :             :  * @returns         string with the first letter capitalized.
     311                 :             :  */
     312                 :             : std::string Capitalize(std::string str);
     313                 :             : 
     314                 :             : /**
     315                 :             :  * Parse a string with suffix unit [k|K|m|M|g|G|t|T].
     316                 :             :  * Must be a whole integer, fractions not allowed (0.5t), no whitespace or +-
     317                 :             :  * Lowercase units are 1000 base. Uppercase units are 1024 base.
     318                 :             :  * Examples: 2m,27M,19g,41T
     319                 :             :  *
     320                 :             :  * @param[in] str                  the string to convert into bytes
     321                 :             :  * @param[in] default_multiplier   if no unit is found in str use this unit
     322                 :             :  * @returns                        optional uint64_t bytes from str or nullopt
     323                 :             :  *                                 if ToIntegral is false, str is empty, trailing whitespace or overflow
     324                 :             :  */
     325                 :             : std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);
     326                 :             : 
     327                 :             : namespace util {
     328                 :             : /** consteval version of HexDigit() without the lookup table. */
     329                 :             : consteval uint8_t ConstevalHexDigit(const char c)
     330                 :             : {
     331                 :             :     if (c >= '0' && c <= '9') return c - '0';
     332                 :             :     if (c >= 'a' && c <= 'f') return c - 'a' + 0xa;
     333                 :             : 
     334                 :             :     throw "Only lowercase hex digits are allowed, for consistency";
     335                 :             : }
     336                 :             : 
     337                 :             : namespace detail {
     338                 :             : template <size_t N>
     339                 :             : struct Hex {
     340                 :             :     std::array<std::byte, N / 2> bytes{};
     341                 :             :     consteval Hex(const char (&hex_str)[N])
     342                 :             :         // 2 hex digits required per byte + implicit null terminator
     343                 :             :         requires(N % 2 == 1)
     344                 :             :     {
     345                 :             :         if (hex_str[N - 1]) throw "null terminator required";
     346                 :             :         for (std::size_t i = 0; i < bytes.size(); ++i) {
     347                 :             :             bytes[i] = static_cast<std::byte>(
     348                 :             :                 (ConstevalHexDigit(hex_str[2 * i]) << 4) |
     349                 :             :                  ConstevalHexDigit(hex_str[2 * i + 1]));
     350                 :             :         }
     351                 :             :     }
     352                 :             : };
     353                 :             : } // namespace detail
     354                 :             : 
     355                 :             : /**
     356                 :             :  * ""_hex is a compile-time user-defined literal returning a
     357                 :             :  * `std::array<std::byte>`, equivalent to ParseHex(). Variants provided:
     358                 :             :  *
     359                 :             :  * - ""_hex_v: Returns `std::vector<std::byte>`, useful for heap allocation or
     360                 :             :  *   variable-length serialization.
     361                 :             :  *
     362                 :             :  * - ""_hex_u8: Returns `std::array<uint8_t>`, for cases where `std::byte` is
     363                 :             :  *   incompatible.
     364                 :             :  *
     365                 :             :  * - ""_hex_v_u8: Returns `std::vector<uint8_t>`, combining heap allocation with
     366                 :             :  *   `uint8_t`.
     367                 :             :  *
     368                 :             :  * @warning It could be necessary to use vector instead of array variants when
     369                 :             :  *   serializing, or vice versa, because vectors are assumed to be variable-
     370                 :             :  *   length and serialized with a size prefix, while arrays are considered fixed
     371                 :             :  *   length and serialized with no prefix.
     372                 :             :  *
     373                 :             :  * @warning It may be preferable to use vector variants to save stack space when
     374                 :             :  *   declaring local variables if hex strings are large. Alternatively variables
     375                 :             :  *   could be declared constexpr to avoid using stack space.
     376                 :             :  *
     377                 :             :  * @warning Avoid `uint8_t` variants when not necessary, as the codebase
     378                 :             :  *   migrates to use `std::byte` instead of `unsigned char` and `uint8_t`.
     379                 :             :  *
     380                 :             :  * @note One reason ""_hex uses `std::array` instead of `std::vector` like
     381                 :             :  *   ParseHex() does is because heap-based containers cannot cross the compile-
     382                 :             :  *   time/runtime barrier.
     383                 :             :  */
     384                 :             : inline namespace hex_literals {
     385                 :             : 
     386                 :             : template <util::detail::Hex str>
     387                 :             : constexpr auto operator""_hex() { return str.bytes; }
     388                 :             : 
     389                 :             : template <util::detail::Hex str>
     390                 :             : constexpr auto operator""_hex_u8() { return std::bit_cast<std::array<uint8_t, str.bytes.size()>>(str.bytes); }
     391                 :             : 
     392                 :             : template <util::detail::Hex str>
     393   [ +  -  +  - ]:           4 : constexpr auto operator""_hex_v() { return std::vector<std::byte>{str.bytes.begin(), str.bytes.end()}; }
     394                 :             : 
     395                 :             : template <util::detail::Hex str>
     396   [ +  -  +  -  :        1872 : inline auto operator""_hex_v_u8() { return std::vector<uint8_t>{UCharCast(str.bytes.data()), UCharCast(str.bytes.data() + str.bytes.size())}; }
          +  -  +  -  +  
          -  +  -  +  -  
          +  -  +  -  +  
           - ][ +  -  #  
          #  #  #  #  #  
          #  #  #  #  #  
          #  #  #  #  #  
                   #  # ]
           [ +  -  +  - ]
     397                 :             : 
     398                 :             : } // inline namespace hex_literals
     399                 :             : } // namespace util
     400                 :             : 
     401                 :             : #endif // BITCOIN_UTIL_STRENCODINGS_H

Generated by: LCOV version 2.0-1