LCOV - code coverage report
Current view: top level - src/test/fuzz - FuzzedDataProvider.h (source / functions) Coverage Total Hit
Test: fuzz_coverage.info Lines: 89.4 % 104 93
Test Date: 2024-12-04 04:00:22 Functions: 98.4 % 62 61
Branches: 76.0 % 96 73

             Branch data     Line data    Source code
       1                 :             : //===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===//
       2                 :             : //
       3                 :             : // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
       4                 :             : // See https://llvm.org/LICENSE.txt for license information.
       5                 :             : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
       6                 :             : //
       7                 :             : //===----------------------------------------------------------------------===//
       8                 :             : // A single header library providing an utility class to break up an array of
       9                 :             : // bytes. Whenever run on the same input, provides the same output, as long as
      10                 :             : // its methods are called in the same order, with the same arguments.
      11                 :             : //===----------------------------------------------------------------------===//
      12                 :             : 
      13                 :             : #ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
      14                 :             : #define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
      15                 :             : 
      16                 :             : #include <algorithm>
      17                 :             : #include <array>
      18                 :             : #include <climits>
      19                 :             : #include <cstddef>
      20                 :             : #include <cstdint>
      21                 :             : #include <cstring>
      22                 :             : #include <initializer_list>
      23                 :             : #include <limits>
      24                 :             : #include <string>
      25                 :             : #include <type_traits>
      26                 :             : #include <utility>
      27                 :             : #include <vector>
      28                 :             : 
      29                 :             : // In addition to the comments below, the API is also briefly documented at
      30                 :             : // https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
      31                 :             : class FuzzedDataProvider {
      32                 :             :  public:
      33                 :             :   // |data| is an array of length |size| that the FuzzedDataProvider wraps to
      34                 :             :   // provide more granular access. |data| must outlive the FuzzedDataProvider.
      35                 :      102996 :   FuzzedDataProvider(const uint8_t *data, size_t size)
      36         [ +  - ]:      103447 :       : data_ptr_(data), remaining_bytes_(size) {}
           [ +  -  +  - ]
      37                 :             :   ~FuzzedDataProvider() = default;
      38                 :             : 
      39                 :             :   // See the implementation below (after the class definition) for more verbose
      40                 :             :   // comments for each of the methods.
      41                 :             : 
      42                 :             :   // Methods returning std::vector of bytes. These are the most popular choice
      43                 :             :   // when splitting fuzzing input into pieces, as every piece is put into a
      44                 :             :   // separate buffer (i.e. ASan would catch any under-/overflow) and the memory
      45                 :             :   // will be released automatically.
      46                 :             :   template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes);
      47                 :             :   template <typename T>
      48                 :             :   std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0);
      49                 :             :   template <typename T> std::vector<T> ConsumeRemainingBytes();
      50                 :             : 
      51                 :             :   // Methods returning strings. Use only when you need a std::string or a null
      52                 :             :   // terminated C-string. Otherwise, prefer the methods returning std::vector.
      53                 :             :   std::string ConsumeBytesAsString(size_t num_bytes);
      54                 :             :   std::string ConsumeRandomLengthString(size_t max_length);
      55                 :             :   std::string ConsumeRandomLengthString();
      56                 :             :   std::string ConsumeRemainingBytesAsString();
      57                 :             : 
      58                 :             :   // Methods returning integer values.
      59                 :             :   template <typename T> T ConsumeIntegral();
      60                 :             :   template <typename T> T ConsumeIntegralInRange(T min, T max);
      61                 :             : 
      62                 :             :   // Methods returning floating point values.
      63                 :             :   template <typename T> T ConsumeFloatingPoint();
      64                 :             :   template <typename T> T ConsumeFloatingPointInRange(T min, T max);
      65                 :             : 
      66                 :             :   // 0 <= return value <= 1.
      67                 :             :   template <typename T> T ConsumeProbability();
      68                 :             : 
      69                 :             :   bool ConsumeBool();
      70                 :             : 
      71                 :             :   // Returns a value chosen from the given enum.
      72                 :             :   template <typename T> T ConsumeEnum();
      73                 :             : 
      74                 :             :   // Returns a value from the given array.
      75                 :             :   template <typename T, size_t size> T PickValueInArray(const T (&array)[size]);
      76                 :             :   template <typename T, size_t size>
      77                 :             :   T PickValueInArray(const std::array<T, size> &array);
      78                 :             :   template <typename T> T PickValueInArray(std::initializer_list<const T> list);
      79                 :             : 
      80                 :             :   // Writes data to the given destination and returns number of bytes written.
      81                 :             :   size_t ConsumeData(void *destination, size_t num_bytes);
      82                 :             : 
      83                 :             :   // Reports the remaining bytes available for fuzzed input.
      84         [ +  + ]:    14927323 :   size_t remaining_bytes() { return remaining_bytes_; }
           [ +  +  +  + ]
           [ +  +  +  +  
          +  +  +  +  +  
             +  +  +  +  
                      + ]
      85                 :             : 
      86                 :             :  private:
      87                 :             :   FuzzedDataProvider(const FuzzedDataProvider &) = delete;
      88                 :             :   FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
      89                 :             : 
      90                 :             :   void CopyAndAdvance(void *destination, size_t num_bytes);
      91                 :             : 
      92                 :             :   void Advance(size_t num_bytes);
      93                 :             : 
      94                 :             :   template <typename T>
      95                 :             :   std::vector<T> ConsumeBytes(size_t size, size_t num_bytes);
      96                 :             : 
      97                 :             :   template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value);
      98                 :             : 
      99                 :             :   const uint8_t *data_ptr_;
     100                 :             :   size_t remaining_bytes_;
     101                 :             : };
     102                 :             : 
     103                 :             : // Returns a std::vector containing |num_bytes| of input data. If fewer than
     104                 :             : // |num_bytes| of data remain, returns a shorter std::vector containing all
     105                 :             : // of the data that's left. Can be used with any byte sized type, such as
     106                 :             : // char, unsigned char, uint8_t, etc.
     107                 :             : template <typename T>
     108                 :     5280392 : std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t num_bytes) {
     109         [ +  + ]:     5280392 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     110                 :     5280392 :   return ConsumeBytes<T>(num_bytes, num_bytes);
     111                 :             : }
     112                 :             : 
     113                 :             : // Similar to |ConsumeBytes|, but also appends the terminator value at the end
     114                 :             : // of the resulting vector. Useful, when a mutable null-terminated C-string is
     115                 :             : // needed, for example. But that is a rare case. Better avoid it, if possible,
     116                 :             : // and prefer using |ConsumeBytes| or |ConsumeBytesAsString| methods.
     117                 :             : template <typename T>
     118                 :             : std::vector<T> FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes,
     119                 :             :                                                               T terminator) {
     120                 :             :   num_bytes = std::min(num_bytes, remaining_bytes_);
     121                 :             :   std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
     122                 :             :   result.back() = terminator;
     123                 :             :   return result;
     124                 :             : }
     125                 :             : 
     126                 :             : // Returns a std::vector containing all remaining bytes of the input data.
     127                 :             : template <typename T>
     128                 :        1532 : std::vector<T> FuzzedDataProvider::ConsumeRemainingBytes() {
     129                 :        1532 :   return ConsumeBytes<T>(remaining_bytes_);
     130                 :             : }
     131                 :             : 
     132                 :             : // Returns a std::string containing |num_bytes| of input data. Using this and
     133                 :             : // |.c_str()| on the resulting string is the best way to get an immutable
     134                 :             : // null-terminated C string. If fewer than |num_bytes| of data remain, returns
     135                 :             : // a shorter std::string containing all of the data that's left.
     136                 :      126808 : inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) {
     137                 :      126808 :   static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
     138                 :             :                 "ConsumeBytesAsString cannot convert the data to a string.");
     139                 :             : 
     140         [ +  + ]:      126808 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     141                 :      126808 :   std::string result(
     142                 :      126808 :       reinterpret_cast<const std::string::value_type *>(data_ptr_), num_bytes);
     143                 :      126808 :   Advance(num_bytes);
     144                 :      126808 :   return result;
     145                 :             : }
     146                 :             : 
     147                 :             : // Returns a std::string of length from 0 to |max_length|. When it runs out of
     148                 :             : // input data, returns what remains of the input. Designed to be more stable
     149                 :             : // with respect to a fuzzer inserting characters than just picking a random
     150                 :             : // length and then consuming that many bytes with |ConsumeBytes|.
     151                 :             : inline std::string
     152                 :     3675164 : FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) {
     153                 :             :   // Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
     154                 :             :   // followed by anything else to the end of the string. As a result of this
     155                 :             :   // logic, a fuzzer can insert characters into the string, and the string
     156                 :             :   // will be lengthened to include those new characters, resulting in a more
     157                 :             :   // stable fuzzer than picking the length of a string independently from
     158                 :             :   // picking its contents.
     159         [ +  + ]:     3675164 :   std::string result;
     160                 :             : 
     161                 :             :   // Reserve the anticipated capacity to prevent several reallocations.
     162   [ +  +  +  - ]:     7217669 :   result.reserve(std::min(max_length, remaining_bytes_));
     163   [ +  +  +  + ]:   987789374 :   for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
     164                 :   987068367 :     char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
     165                 :   987068367 :     Advance(1);
     166   [ +  +  +  + ]:   987068367 :     if (next == '\\' && remaining_bytes_ != 0) {
     167                 :     3504671 :       next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
     168                 :     3504671 :       Advance(1);
     169         [ +  + ]:     3504671 :       if (next != '\\')
     170                 :             :         break;
     171                 :             :     }
     172         [ +  - ]:  1968228420 :     result += next;
     173                 :             :   }
     174                 :             : 
     175                 :     3675164 :   result.shrink_to_fit();
     176                 :     3675164 :   return result;
     177                 :           0 : }
     178                 :             : 
     179                 :             : // Returns a std::string of length from 0 to |remaining_bytes_|.
     180                 :     1783951 : inline std::string FuzzedDataProvider::ConsumeRandomLengthString() {
     181         [ +  - ]:     1783951 :   return ConsumeRandomLengthString(remaining_bytes_);
           [ +  -  +  - ]
     182                 :             : }
     183                 :             : 
     184                 :             : // Returns a std::string containing all remaining bytes of the input data.
     185                 :             : // Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
     186                 :             : // object.
     187                 :         163 : inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() {
     188         [ +  - ]:         163 :   return ConsumeBytesAsString(remaining_bytes_);
     189                 :             : }
     190                 :             : 
     191                 :             : // Returns a number in the range [Type's min, Type's max]. The value might
     192                 :             : // not be uniformly distributed in the given range. If there's no input data
     193                 :             : // left, always returns |min|.
     194                 :    89197833 : template <typename T> T FuzzedDataProvider::ConsumeIntegral() {
     195                 :    89197833 :   return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
     196                 :    89197833 :                                 std::numeric_limits<T>::max());
     197                 :             : }
     198                 :             : 
     199                 :             : // Returns a number in the range [min, max] by consuming bytes from the
     200                 :             : // input data. The value might not be uniformly distributed in the given
     201                 :             : // range. If there's no input data left, always returns |min|. |min| must
     202                 :             : // be less than or equal to |max|.
     203                 :             : template <typename T>
     204                 :   168298999 : T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) {
     205                 :             :   static_assert(std::is_integral<T>::value, "An integral type is required.");
     206                 :             :   static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
     207                 :             : 
     208         [ -  + ]:   168298999 :   if (min > max)
     209                 :           0 :     abort();
     210                 :             : 
     211                 :             :   // Use the biggest type possible to hold the range and the result.
     212                 :   168298999 :   uint64_t range = static_cast<uint64_t>(max) - static_cast<uint64_t>(min);
     213                 :   168298999 :   uint64_t result = 0;
     214                 :   168298999 :   size_t offset = 0;
     215                 :             : 
     216   [ +  +  +  + ]:   418641670 :   while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
     217         [ +  + ]:   257619353 :          remaining_bytes_ != 0) {
     218                 :             :     // Pull bytes off the end of the seed data. Experimentally, this seems to
     219                 :             :     // allow the fuzzer to more easily explore the input space. This makes
     220                 :             :     // sense, since it works by modifying inputs that caused new code to run,
     221                 :             :     // and this data is often used to encode length of data read by
     222                 :             :     // |ConsumeBytes|. Separating out read lengths makes it easier modify the
     223                 :             :     // contents of the data that is actually read.
     224                 :   250342671 :     --remaining_bytes_;
     225                 :   250342671 :     result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
     226                 :   250342671 :     offset += CHAR_BIT;
     227                 :             :   }
     228                 :             : 
     229                 :             :   // Avoid division by 0, in case |range + 1| results in overflow.
     230         [ +  + ]:   168298999 :   if (range != std::numeric_limits<decltype(range)>::max())
     231                 :   163050930 :     result = result % (range + 1);
     232                 :             : 
     233                 :   168298999 :   return static_cast<T>(static_cast<uint64_t>(min) + result);
     234                 :             : }
     235                 :             : 
     236                 :             : // Returns a floating point value in the range [Type's lowest, Type's max] by
     237                 :             : // consuming bytes from the input data. If there's no input data left, always
     238                 :             : // returns approximately 0.
     239                 :       65353 : template <typename T> T FuzzedDataProvider::ConsumeFloatingPoint() {
     240                 :       65353 :   return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
     241                 :       65353 :                                         std::numeric_limits<T>::max());
     242                 :             : }
     243                 :             : 
     244                 :             : // Returns a floating point value in the given range by consuming bytes from
     245                 :             : // the input data. If there's no input data left, returns |min|. Note that
     246                 :             : // |min| must be less than or equal to |max|.
     247                 :             : template <typename T>
     248                 :       65353 : T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) {
     249         [ -  + ]:       65353 :   if (min > max)
     250                 :           0 :     abort();
     251                 :             : 
     252                 :       65353 :   T range = .0;
     253                 :       65353 :   T result = min;
     254                 :       65353 :   constexpr T zero(.0);
     255   [ +  -  +  -  :       65353 :   if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
                   +  - ]
     256                 :             :     // The diff |max - min| would overflow the given floating point type. Use
     257                 :             :     // the half of the diff as the range and consume a bool to decide whether
     258                 :             :     // the result is in the first of the second part of the diff.
     259                 :       65353 :     range = (max / 2.0) - (min / 2.0);
     260         [ +  + ]:       65353 :     if (ConsumeBool()) {
     261                 :       47719 :       result += range;
     262                 :             :     }
     263                 :             :   } else {
     264                 :           0 :     range = max - min;
     265                 :             :   }
     266                 :             : 
     267                 :       65353 :   return result + range * ConsumeProbability<T>();
     268                 :             : }
     269                 :             : 
     270                 :             : // Returns a floating point number in the range [0.0, 1.0]. If there's no
     271                 :             : // input data left, always returns 0.
     272                 :       65353 : template <typename T> T FuzzedDataProvider::ConsumeProbability() {
     273                 :             :   static_assert(std::is_floating_point<T>::value,
     274                 :             :                 "A floating point type is required.");
     275                 :             : 
     276                 :             :   // Use different integral types for different floating point types in order
     277                 :             :   // to provide better density of the resulting values.
     278                 :             :   using IntegralType =
     279                 :             :       typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
     280                 :             :                                 uint64_t>::type;
     281                 :             : 
     282                 :       65353 :   T result = static_cast<T>(ConsumeIntegral<IntegralType>());
     283                 :       65353 :   result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
     284                 :       65353 :   return result;
     285                 :             : }
     286                 :             : 
     287                 :             : // Reads one byte and returns a bool, or false when no data remains.
     288                 :    62685539 : inline bool FuzzedDataProvider::ConsumeBool() {
     289 [ +  + ][ +  -  :    63555542 :   return 1 & ConsumeIntegral<uint8_t>();
             +  -  +  - ]
     290                 :             : }
     291                 :             : 
     292                 :             : // Returns an enum value. The enum must start at 0 and be contiguous. It must
     293                 :             : // also contain |kMaxValue| aliased to its largest (inclusive) value. Such as:
     294                 :             : // enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
     295                 :             : template <typename T> T FuzzedDataProvider::ConsumeEnum() {
     296                 :             :   static_assert(std::is_enum<T>::value, "|T| must be an enum type.");
     297                 :             :   return static_cast<T>(
     298                 :             :       ConsumeIntegralInRange<uint32_t>(0, static_cast<uint32_t>(T::kMaxValue)));
     299                 :             : }
     300                 :             : 
     301                 :             : // Returns a copy of the value selected from the given fixed-size |array|.
     302                 :             : template <typename T, size_t size>
     303                 :     3295492 : T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) {
     304                 :             :   static_assert(size > 0, "The array must be non empty.");
     305                 :     3295492 :   return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
     306                 :             : }
     307                 :             : 
     308                 :             : template <typename T, size_t size>
     309                 :      796484 : T FuzzedDataProvider::PickValueInArray(const std::array<T, size> &array) {
     310                 :             :   static_assert(size > 0, "The array must be non empty.");
     311                 :      796484 :   return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
     312                 :             : }
     313                 :             : 
     314                 :             : template <typename T>
     315         [ -  + ]:     8729368 : T FuzzedDataProvider::PickValueInArray(std::initializer_list<const T> list) {
     316                 :             :   // TODO(Dor1s): switch to static_assert once C++14 is allowed.
     317         [ -  + ]:     8729368 :   if (!list.size())
     318                 :           0 :     abort();
     319                 :             : 
     320                 :     8729368 :   return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
     321                 :             : }
     322                 :             : 
     323                 :             : // Writes |num_bytes| of input data to the given destination pointer. If there
     324                 :             : // is not enough data left, writes all remaining bytes. Return value is the
     325                 :             : // number of bytes written.
     326                 :             : // In general, it's better to avoid using this function, but it may be useful
     327                 :             : // in cases when it's necessary to fill a certain buffer or object with
     328                 :             : // fuzzing data.
     329                 :           0 : inline size_t FuzzedDataProvider::ConsumeData(void *destination,
     330                 :             :                                               size_t num_bytes) {
     331         [ #  # ]:           0 :   num_bytes = std::min(num_bytes, remaining_bytes_);
     332                 :           0 :   CopyAndAdvance(destination, num_bytes);
     333                 :           0 :   return num_bytes;
     334                 :             : }
     335                 :             : 
     336                 :             : // Private methods.
     337                 :     5200086 : inline void FuzzedDataProvider::CopyAndAdvance(void *destination,
     338                 :             :                                                size_t num_bytes) {
     339                 :     5200086 :   std::memcpy(destination, data_ptr_, num_bytes);
     340                 :     5200086 :   Advance(num_bytes);
     341                 :     5200086 : }
     342                 :             : 
     343                 :   995899932 : inline void FuzzedDataProvider::Advance(size_t num_bytes) {
     344         [ -  + ]:   995899932 :   if (num_bytes > remaining_bytes_)
     345                 :           0 :     abort();
     346                 :             : 
     347                 :   995899932 :   data_ptr_ += num_bytes;
     348                 :   995899932 :   remaining_bytes_ -= num_bytes;
     349                 :   995899932 : }
     350                 :             : 
     351                 :             : template <typename T>
     352                 :     5280392 : std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) {
     353                 :             :   static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
     354                 :             : 
     355                 :             :   // The point of using the size-based constructor below is to increase the
     356                 :             :   // odds of having a vector object with capacity being equal to the length.
     357                 :             :   // That part is always implementation specific, but at least both libc++ and
     358                 :             :   // libstdc++ allocate the requested number of bytes in that constructor,
     359                 :             :   // which seems to be a natural choice for other implementations as well.
     360                 :             :   // To increase the odds even more, we also call |shrink_to_fit| below.
     361                 :     5280392 :   std::vector<T> result(size);
     362         [ +  + ]:     5280392 :   if (size == 0) {
     363         [ -  + ]:       80306 :     if (num_bytes != 0)
     364                 :           0 :       abort();
     365                 :             :     return result;
     366                 :             :   }
     367                 :             : 
     368                 :     5200086 :   CopyAndAdvance(result.data(), num_bytes);
     369                 :             : 
     370                 :             :   // Even though |shrink_to_fit| is also implementation specific, we expect it
     371                 :             :   // to provide an additional assurance in case vector's constructor allocated
     372                 :             :   // a buffer which is larger than the actual amount of data we put inside it.
     373                 :     5200086 :   result.shrink_to_fit();
     374                 :     5200086 :   return result;
     375                 :             : }
     376                 :             : 
     377                 :             : template <typename TS, typename TU>
     378                 :   990573038 : TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) {
     379                 :             :   static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
     380                 :             :   static_assert(!std::numeric_limits<TU>::is_signed,
     381                 :             :                 "Source type must be unsigned.");
     382                 :             : 
     383                 :             :   // TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
     384                 :             :   if (std::numeric_limits<TS>::is_modulo)
     385                 :             :     return static_cast<TS>(value);
     386                 :             : 
     387                 :             :   // Avoid using implementation-defined unsigned to signed conversions.
     388                 :             :   // To learn more, see https://stackoverflow.com/questions/13150449.
     389                 :   990573038 :   if (value <= std::numeric_limits<TS>::max()) {
     390                 :             :     return static_cast<TS>(value);
     391                 :             :   } else {
     392                 :   990573038 :     constexpr auto TS_min = std::numeric_limits<TS>::min();
     393                 :             :     return TS_min + static_cast<TS>(value - TS_min);
     394                 :             :   }
     395                 :             : }
     396                 :             : 
     397                 :             : #endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
        

Generated by: LCOV version 2.0-1