LCOV - code coverage report
Current view: top level - src/test/fuzz - FuzzedDataProvider.h (source / functions) Coverage Total Hit
Test: fuzz_coverage.info Lines: 89.4 % 104 93
Test Date: 2025-01-22 04:09:46 Functions: 98.4 % 62 61
Branches: 77.2 % 92 71

             Branch data     Line data    Source code
       1                 :             : //===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
       2                 :             : //
       3                 :             : // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
       4                 :             : // See https://llvm.org/LICENSE.txt for license information.
       5                 :             : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
       6                 :             : //
       7                 :             : //===----------------------------------------------------------------------===//
       8                 :             : // A single header library providing an utility class to break up an array of
       9                 :             : // bytes. Whenever run on the same input, provides the same output, as long as
      10                 :             : // its methods are called in the same order, with the same arguments.
      11                 :             : //===----------------------------------------------------------------------===//
      12                 :             : 
      13                 :             : #ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
      14                 :             : #define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
      15                 :             : 
      16                 :             : #include <algorithm>
      17                 :             : #include <array>
      18                 :             : #include <climits>
      19                 :             : #include <cstddef>
      20                 :             : #include <cstdint>
      21                 :             : #include <cstdlib>
      22                 :             : #include <cstring>
      23                 :             : #include <initializer_list>
      24                 :             : #include <limits>
      25                 :             : #include <string>
      26                 :             : #include <type_traits>
      27                 :             : #include <utility>
      28                 :             : #include <vector>
      29                 :             : 
      30                 :             : // In addition to the comments below, the API is also briefly documented at
      31                 :             : // https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
      32                 :             : class FuzzedDataProvider {
      33                 :             :  public:
      34                 :             :   // |data| is an array of length |size| that the FuzzedDataProvider wraps to
      35                 :             :   // provide more granular access. |data| must outlive the FuzzedDataProvider.
      36                 :      149676 :   FuzzedDataProvider(const uint8_t *data, size_t size)
      37         [ +  - ]:      155168 :       : data_ptr_(data), remaining_bytes_(size) {}
      38                 :             :   ~FuzzedDataProvider() = default;
      39                 :             : 
      40                 :             :   // See the implementation below (after the class definition) for more verbose
      41                 :             :   // comments for each of the methods.
      42                 :             : 
      43                 :             :   // Methods returning std::vector of bytes. These are the most popular choice
      44                 :             :   // when splitting fuzzing input into pieces, as every piece is put into a
      45                 :             :   // separate buffer (i.e. ASan would catch any under-/overflow) and the memory
      46                 :             :   // will be released automatically.
      47                 :             :   template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes);
      48                 :             :   template <typename T>
      49                 :             :   std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0);
      50                 :             :   template <typename T> std::vector<T> ConsumeRemainingBytes();
      51                 :             : 
      52                 :             :   // Methods returning strings. Use only when you need a std::string or a null
      53                 :             :   // terminated C-string. Otherwise, prefer the methods returning std::vector.
      54                 :             :   std::string ConsumeBytesAsString(size_t num_bytes);
      55                 :             :   std::string ConsumeRandomLengthString(size_t max_length);
      56                 :             :   std::string ConsumeRandomLengthString();
      57                 :             :   std::string ConsumeRemainingBytesAsString();
      58                 :             : 
      59                 :             :   // Methods returning integer values.
      60                 :             :   template <typename T> T ConsumeIntegral();
      61                 :             :   template <typename T> T ConsumeIntegralInRange(T min, T max);
      62                 :             : 
      63                 :             :   // Methods returning floating point values.
      64                 :             :   template <typename T> T ConsumeFloatingPoint();
      65                 :             :   template <typename T> T ConsumeFloatingPointInRange(T min, T max);
      66                 :             : 
      67                 :             :   // 0 <= return value <= 1.
      68                 :             :   template <typename T> T ConsumeProbability();
      69                 :             : 
      70                 :             :   bool ConsumeBool();
      71                 :             : 
      72                 :             :   // Returns a value chosen from the given enum.
      73                 :             :   template <typename T> T ConsumeEnum();
      74                 :             : 
      75                 :             :   // Returns a value from the given array.
      76                 :             :   template <typename T, size_t size> T PickValueInArray(const T (&array)[size]);
      77                 :             :   template <typename T, size_t size>
      78                 :             :   T PickValueInArray(const std::array<T, size> &array);
      79                 :             :   template <typename T> T PickValueInArray(std::initializer_list<const T> list);
      80                 :             : 
      81                 :             :   // Writes data to the given destination and returns number of bytes written.
      82                 :             :   size_t ConsumeData(void *destination, size_t num_bytes);
      83                 :             : 
      84                 :             :   // Reports the remaining bytes available for fuzzed input.
      85 [ +  + ][ +  +  :    18515655 :   size_t remaining_bytes() { return remaining_bytes_; }
          +  +  +  +  +  
          +  +  +  +  +  
                   +  + ]
           [ +  +  +  + ]
      86                 :             : 
      87                 :             :  private:
      88                 :             :   FuzzedDataProvider(const FuzzedDataProvider &) = delete;
      89                 :             :   FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
      90                 :             : 
      91                 :             :   void CopyAndAdvance(void *destination, size_t num_bytes);
      92                 :             : 
      93                 :             :   void Advance(size_t num_bytes);
      94                 :             : 
      95                 :             :   template <typename T>
      96                 :             :   std::vector<T> ConsumeBytes(size_t size, size_t num_bytes);
      97                 :             : 
      98                 :             :   template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value);
      99                 :             : 
     100                 :             :   const uint8_t *data_ptr_;
     101                 :             :   size_t remaining_bytes_;
     102                 :             : };
     103                 :             : 
     104                 :             : // Returns a std::vector containing |num_bytes| of input data. If fewer than
     105                 :             : // |num_bytes| of data remain, returns a shorter std::vector containing all
     106                 :             : // of the data that's left. Can be used with any byte sized type, such as
     107                 :             : // char, unsigned char, uint8_t, etc.
     108                 :             : template <typename T>
     109                 :     7996278 : std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t num_bytes) {
     110         [ +  + ]:     7996278 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     111                 :     7996278 :   return ConsumeBytes<T>(num_bytes, num_bytes);
     112                 :             : }
     113                 :             : 
     114                 :             : // Similar to |ConsumeBytes|, but also appends the terminator value at the end
     115                 :             : // of the resulting vector. Useful, when a mutable null-terminated C-string is
     116                 :             : // needed, for example. But that is a rare case. Better avoid it, if possible,
     117                 :             : // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
     118                 :             : template <typename T>
     119                 :             : std::vector<T> FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes,
     120                 :             :                                                               T terminator) {
     121                 :             :   num_bytes = std::min(num_bytes, remaining_bytes_);
     122                 :             :   std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
     123                 :             :   result.back() = terminator;
     124                 :             :   return result;
     125                 :             : }
     126                 :             : 
     127                 :             : // Returns a std::vector containing all remaining bytes of the input data.
     128                 :             : template <typename T>
     129                 :        2641 : std::vector<T> FuzzedDataProvider::ConsumeRemainingBytes() {
     130                 :        2641 :   return ConsumeBytes<T>(remaining_bytes_);
     131                 :             : }
     132                 :             : 
     133                 :             : // Returns a std::string containing |num_bytes| of input data. Using this and
     134                 :             : // |.c_str()| on the resulting string is the best way to get an immutable
     135                 :             : // null-terminated C string. If fewer than |num_bytes| of data remain, returns
     136                 :             : // a shorter std::string containing all of the data that's left.
     137                 :      317157 : inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) {
     138                 :      317157 :   static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
     139                 :             :                 "ConsumeBytesAsString cannot convert the data to a string.");
     140                 :             : 
     141         [ +  + ]:      317157 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     142                 :      317157 :   std::string result(
     143                 :      317157 :       reinterpret_cast<const std::string::value_type *>(data_ptr_), num_bytes);
     144                 :      317157 :   Advance(num_bytes);
     145                 :      317157 :   return result;
     146                 :             : }
     147                 :             : 
     148                 :             : // Returns a std::string of length from 0 to |max_length|. When it runs out of
     149                 :             : // input data, returns what remains of the input. Designed to be more stable
     150                 :             : // with respect to a fuzzer inserting characters than just picking a random
     151                 :             : // length and then consuming that many bytes with |ConsumeBytes|.
     152                 :             : inline std::string
     153                 :     5697295 : FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) {
     154                 :             :   // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
     155                 :             :   // followed by anything else to the end of the string. As a result of this
     156                 :             :   // logic, a fuzzer can insert characters into the string, and the string
     157                 :             :   // will be lengthened to include those new characters, resulting in a more
     158                 :             :   // stable fuzzer than picking the length of a string independently from
     159                 :             :   // picking its contents.
     160         [ +  + ]:     5697295 :   std::string result;
     161                 :             : 
     162                 :             :   // Reserve the anticipated capacity to prevent several reallocations.
     163   [ +  +  +  - ]:    11202726 :   result.reserve(std::min(max_length, remaining_bytes_));
     164   [ +  +  +  + ]:  1523660311 :   for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
     165                 :  1522503552 :     char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
     166                 :  1522503552 :     Advance(1);
     167   [ +  +  +  + ]:  1522503552 :     if (next == '\\' && remaining_bytes_ != 0) {
     168                 :     5326619 :       next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
     169                 :     5326619 :       Advance(1);
     170         [ +  + ]:     5326619 :       if (next != '\\')
     171                 :             :         break;
     172                 :             :     }
     173         [ +  - ]:  3035926032 :     result += next;
     174                 :             :   }
     175                 :             : 
     176                 :     5697295 :   result.shrink_to_fit();
     177                 :     5697295 :   return result;
     178                 :           0 : }
     179                 :             : 
     180                 :             : // Returns a std::string of length from 0 to |remaining_bytes_|.
     181                 :     2840656 : inline std::string FuzzedDataProvider::ConsumeRandomLengthString() {
     182         [ +  - ]:     2840656 :   return ConsumeRandomLengthString(remaining_bytes_);
           [ +  -  +  - ]
     183                 :             : }
     184                 :             : 
     185                 :             : // Returns a std::string containing all remaining bytes of the input data.
     186                 :             : // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
     187                 :             : // object.
     188                 :         177 : inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() {
     189         [ +  - ]:         177 :   return ConsumeBytesAsString(remaining_bytes_);
     190                 :             : }
     191                 :             : 
     192                 :             : // Returns a number in the range [Type's min, Type's max]. The value might
     193                 :             : // not be uniformly distributed in the given range. If there's no input data
     194                 :             : // left, always returns |min|.
     195                 :   135421526 : template <typename T> T FuzzedDataProvider::ConsumeIntegral() {
     196                 :   135421526 :   return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
     197                 :   135421526 :                                 std::numeric_limits<T>::max());
     198                 :             : }
     199                 :             : 
     200                 :             : // Returns a number in the range [min, max] by consuming bytes from the
     201                 :             : // input data. The value might not be uniformly distributed in the given
     202                 :             : // range. If there's no input data left, always returns |min|. |min| must
     203                 :             : // be less than or equal to |max|.
     204                 :             : template <typename T>
     205                 :   257931903 : T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) {
     206                 :             :   static_assert(std::is_integral<T>::value, "An integral type is required.");
     207                 :             :   static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
     208                 :             : 
     209         [ -  + ]:   257931903 :   if (min > max)
     210                 :           0 :     abort();
     211                 :             : 
     212                 :             :   // Use the biggest type possible to hold the range and the result.
     213                 :   257931903 :   uint64_t range = static_cast<uint64_t>(max) - static_cast<uint64_t>(min);
     214                 :   257931903 :   uint64_t result = 0;
     215                 :   257931903 :   size_t offset = 0;
     216                 :             : 
     217   [ +  +  +  + ]:   635459423 :   while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
     218         [ +  + ]:   386870561 :          remaining_bytes_ != 0) {
     219                 :             :     // Pull bytes off the end of the seed data. Experimentally, this seems to
     220                 :             :     // allow the fuzzer to more easily explore the input space. This makes
     221                 :             :     // sense, since it works by modifying inputs that caused new code to run,
     222                 :             :     // and this data is often used to encode length of data read by
     223                 :             :     // |ConsumeBytes|. Separating out read lengths makes it easier modify the
     224                 :             :     // contents of the data that is actually read.
     225                 :   377527520 :     --remaining_bytes_;
     226                 :   377527520 :     result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
     227                 :   377527520 :     offset += CHAR_BIT;
     228                 :             :   }
     229                 :             : 
     230                 :             :   // Avoid division by 0, in case |range + 1| results in overflow.
     231         [ +  + ]:   257931903 :   if (range != std::numeric_limits<decltype(range)>::max())
     232                 :   250996641 :     result = result % (range + 1);
     233                 :             : 
     234                 :   257931903 :   return static_cast<T>(static_cast<uint64_t>(min) + result);
     235                 :             : }
     236                 :             : 
     237                 :             : // Returns a floating point value in the range [Type's lowest, Type's max] by
     238                 :             : // consuming bytes from the input data. If there's no input data left, always
     239                 :             : // returns approximately 0.
     240                 :       73423 : template <typename T> T FuzzedDataProvider::ConsumeFloatingPoint() {
     241                 :       73423 :   return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
     242                 :       73423 :                                         std::numeric_limits<T>::max());
     243                 :             : }
     244                 :             : 
     245                 :             : // Returns a floating point value in the given range by consuming bytes from
     246                 :             : // the input data. If there's no input data left, returns |min|. Note that
     247                 :             : // |min| must be less than or equal to |max|.
     248                 :             : template <typename T>
     249                 :       73423 : T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) {
     250         [ -  + ]:       73423 :   if (min > max)
     251                 :           0 :     abort();
     252                 :             : 
     253                 :       73423 :   T range = .0;
     254                 :       73423 :   T result = min;
     255                 :       73423 :   constexpr T zero(.0);
     256   [ +  -  +  -  :       73423 :   if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
                   +  - ]
     257                 :             :     // The diff |max - min| would overflow the given floating point type. Use
     258                 :             :     // the half of the diff as the range and consume a bool to decide whether
     259                 :             :     // the result is in the first of the second part of the diff.
     260                 :       73423 :     range = (max / 2.0) - (min / 2.0);
     261         [ +  + ]:       73423 :     if (ConsumeBool()) {
     262                 :       52874 :       result += range;
     263                 :             :     }
     264                 :             :   } else {
     265                 :           0 :     range = max - min;
     266                 :             :   }
     267                 :             : 
     268                 :       73423 :   return result + range * ConsumeProbability<T>();
     269                 :             : }
     270                 :             : 
     271                 :             : // Returns a floating point number in the range [0.0, 1.0]. If there's no
     272                 :             : // input data left, always returns 0.
     273                 :       73423 : template <typename T> T FuzzedDataProvider::ConsumeProbability() {
     274                 :             :   static_assert(std::is_floating_point<T>::value,
     275                 :             :                 "A floating point type is required.");
     276                 :             : 
     277                 :             :   // Use different integral types for different floating point types in order
     278                 :             :   // to provide better density of the resulting values.
     279                 :             :   using IntegralType =
     280                 :             :       typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
     281                 :             :                                 uint64_t>::type;
     282                 :             : 
     283                 :       73423 :   T result = static_cast<T>(ConsumeIntegral<IntegralType>());
     284                 :       73423 :   result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
     285                 :       73423 :   return result;
     286                 :             : }
     287                 :             : 
     288                 :             : // Reads one byte and returns a bool, or false when no data remains.
     289                 :   100113950 : inline bool FuzzedDataProvider::ConsumeBool() {
     290 [ +  + ][ +  -  :    99200983 :   return 1 & ConsumeIntegral<uint8_t>();
             +  -  +  - ]
     291                 :             : }
     292                 :             : 
     293                 :             : // Returns an enum value. The enum must start at 0 and be contiguous. It must
     294                 :             : // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
     295                 :             : // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
     296                 :             : template <typename T> T FuzzedDataProvider::ConsumeEnum() {
     297                 :             :   static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
     298                 :             :   return static_cast<T>(
     299                 :             :       ConsumeIntegralInRange<uint32_t>(0, static_cast<uint32_t>(T::kMaxValue)));
     300                 :             : }
     301                 :             : 
     302                 :             : // Returns a copy of the value selected from the given fixed-size |array|.
     303                 :             : template <typename T, size_t size>
     304                 :     4513491 : T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) {
     305                 :             :   static_assert(size > 0, "The array must be non empty.");
     306                 :     4513491 :   return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
     307                 :             : }
     308                 :             : 
     309                 :             : template <typename T, size_t size>
     310                 :      657454 : T FuzzedDataProvider::PickValueInArray(const std::array<T, size> &array) {
     311                 :             :   static_assert(size > 0, "The array must be non empty.");
     312                 :      657454 :   return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
     313                 :             : }
     314                 :             : 
     315                 :             : template <typename T>
     316         [ -  + ]:    14804737 : T FuzzedDataProvider::PickValueInArray(std::initializer_list<const T> list) {
     317                 :             :   // TODO(Dor1s): switch to static_assert once C++14 is allowed.
     318         [ -  + ]:    14804737 :   if (!list.size())
     319                 :           0 :     abort();
     320                 :             : 
     321                 :    14804737 :   return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
     322                 :             : }
     323                 :             : 
     324                 :             : // Writes |num_bytes| of input data to the given destination pointer. If there
     325                 :             : // is not enough data left, writes all remaining bytes. Return value is the
     326                 :             : // number of bytes written.
     327                 :             : // In general, it's better to avoid using this function, but it may be useful
     328                 :             : // in cases when it's necessary to fill a certain buffer or object with
     329                 :             : // fuzzing data.
     330                 :           0 : inline size_t FuzzedDataProvider::ConsumeData(void *destination,
     331                 :             :                                               size_t num_bytes) {
     332         [ #  # ]:           0 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     333                 :           0 :   CopyAndAdvance(destination, num_bytes);
     334                 :           0 :   return num_bytes;
     335                 :             : }
     336                 :             : 
     337                 :             : // Private methods.
     338                 :     7356705 : inline void FuzzedDataProvider::CopyAndAdvance(void *destination,
     339                 :             :                                                size_t num_bytes) {
     340                 :     7356705 :   std::memcpy(destination, data_ptr_, num_bytes);
     341                 :     7356705 :   Advance(num_bytes);
     342                 :     7356705 : }
     343                 :             : 
     344                 :  1535504033 : inline void FuzzedDataProvider::Advance(size_t num_bytes) {
     345         [ -  + ]:  1535504033 :   if (num_bytes > remaining_bytes_)
     346                 :           0 :     abort();
     347                 :             : 
     348                 :  1535504033 :   data_ptr_ += num_bytes;
     349                 :  1535504033 :   remaining_bytes_ -= num_bytes;
     350                 :  1535504033 : }
     351                 :             : 
     352                 :             : template <typename T>
     353                 :     7996278 : std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) {
     354                 :             :   static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
     355                 :             : 
     356                 :             :   // The point of using the size-based constructor below is to increase the
     357                 :             :   // odds of having a vector object with capacity being equal to the length.
     358                 :             :   // That part is always implementation specific, but at least both libc++ and
     359                 :             :   // libstdc++ allocate the requested number of bytes in that constructor,
     360                 :             :   // which seems to be a natural choice for other implementations as well.
     361                 :             :   // To increase the odds even more, we also call |shrink_to_fit| below.
     362                 :     7996278 :   std::vector<T> result(size);
     363         [ +  + ]:     7996278 :   if (size == 0) {
     364         [ -  + ]:      639573 :     if (num_bytes != 0)
     365                 :           0 :       abort();
     366                 :             :     return result;
     367                 :             :   }
     368                 :             : 
     369                 :     7356705 :   CopyAndAdvance(result.data(), num_bytes);
     370                 :             : 
     371                 :             :   // Even though |shrink_to_fit| is also implementation specific, we expect it
     372                 :             :   // to provide an additional assurance in case vector's constructor allocated
     373                 :             :   // a buffer which is larger than the actual amount of data we put inside it.
     374                 :     7356705 :   result.shrink_to_fit();
     375                 :     7356705 :   return result;
     376                 :             : }
     377                 :             : 
     378                 :             : template <typename TS, typename TU>
     379                 :  1527830171 : TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) {
     380                 :             :   static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
     381                 :             :   static_assert(!std::numeric_limits<TU>::is_signed,
     382                 :             :                 "Source type must be unsigned.");
     383                 :             : 
     384                 :             :   // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
     385                 :             :   if (std::numeric_limits<TS>::is_modulo)
     386                 :             :     return static_cast<TS>(value);
     387                 :             : 
     388                 :             :   // Avoid using implementation-defined unsigned to signed conversions.
     389                 :             :   // To learn more, see https://stackoverflow.com/questions/13150449.
     390                 :  1527830171 :   if (value <= std::numeric_limits<TS>::max()) {
     391                 :             :     return static_cast<TS>(value);
     392                 :             :   } else {
     393                 :  1527830171 :     constexpr auto TS_min = std::numeric_limits<TS>::min();
     394                 :             :     return TS_min + static_cast<TS>(value - TS_min);
     395                 :             :   }
     396                 :             : }
     397                 :             : 
     398                 :             : #endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
        

Generated by: LCOV version 2.0-1