Branch data Line data Source code
1 : : #ifndef _MINISKETCH_H_
2 : : #define _MINISKETCH_H_ 1
3 : :
4 : : #include <stdint.h>
5 : : #include <stdlib.h>
6 : :
7 : : #ifdef _MSC_VER
8 : : # include <BaseTsd.h>
9 : : typedef SSIZE_T ssize_t;
10 : : #else
11 : : # include <unistd.h>
12 : : #endif
13 : :
14 : : #ifndef MINISKETCH_API
15 : : # if defined(_WIN32)
16 : : # ifdef MINISKETCH_BUILD
17 : : # define MINISKETCH_API __declspec(dllexport)
18 : : # else
19 : : # define MINISKETCH_API
20 : : # endif
21 : : # elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(MINISKETCH_BUILD)
22 : : # define MINISKETCH_API __attribute__ ((visibility ("default")))
23 : : # else
24 : : # define MINISKETCH_API
25 : : # endif
26 : : #endif
27 : :
28 : : #ifdef __cplusplus
29 : : # if __cplusplus >= 201103L
30 : : # include <memory>
31 : : # include <vector>
32 : : # include <cassert>
33 : : # if __cplusplus >= 201703L
34 : : # include <optional>
35 : : # endif // __cplusplus >= 201703L
36 : : # endif // __cplusplus >= 201103L
37 : : extern "C" {
38 : : #endif // __cplusplus
39 : :
40 : : /** Opaque type for decoded sketches. */
41 : : typedef struct minisketch minisketch;
42 : :
43 : : /** Determine whether support for elements of `bits` bits was compiled in. */
44 : : MINISKETCH_API int minisketch_bits_supported(uint32_t bits);
45 : :
46 : : /** Determine the maximum number of implementations available.
47 : : *
48 : : * Multiple implementations may be available for a given element size, with
49 : : * different performance characteristics on different hardware.
50 : : *
51 : : * Each implementation is identified by a number from 0 to the output of this
52 : : * function call, inclusive. Note that not every combination of implementation
53 : : * and element size may exist (see further).
54 : : */
55 : : MINISKETCH_API uint32_t minisketch_implementation_max(void);
56 : :
57 : : /** Determine if the a combination of bits and implementation number is available.
58 : : *
59 : : * Returns 1 if it is, 0 otherwise.
60 : : */
61 : : MINISKETCH_API int minisketch_implementation_supported(uint32_t bits, uint32_t implementation);
62 : :
63 : : /** Construct a sketch for a given element size, implementation and capacity.
64 : : *
65 : : * If the combination of `bits` and `implementation` is unavailable, or when
66 : : * OOM occurs, NULL is returned. If minisketch_implementation_supported
67 : : * returns 1 for the specified bits and implementation, this will always succeed
68 : : * (except when allocation fails).
69 : : *
70 : : * If the result is not NULL, it must be destroyed using minisketch_destroy.
71 : : */
72 : : MINISKETCH_API minisketch* minisketch_create(uint32_t bits, uint32_t implementation, size_t capacity);
73 : :
74 : : /** Get the element size of a sketch in bits. */
75 : : MINISKETCH_API uint32_t minisketch_bits(const minisketch* sketch);
76 : :
77 : : /** Get the capacity of a sketch. */
78 : : MINISKETCH_API size_t minisketch_capacity(const minisketch* sketch);
79 : :
80 : : /** Get the implementation of a sketch. */
81 : : MINISKETCH_API uint32_t minisketch_implementation(const minisketch* sketch);
82 : :
83 : : /** Set the seed for randomizing algorithm choices to a fixed value.
84 : : *
85 : : * By default, sketches are initialized with a random seed. This is important
86 : : * to avoid scenarios where an attacker could force worst-case behavior.
87 : : *
88 : : * This function initializes the seed to a user-provided value (any 64-bit
89 : : * integer is acceptable, regardless of field size).
90 : : *
91 : : * When seed is -1, a fixed internal value with predictable behavior is
92 : : * used. It is only intended for testing.
93 : : */
94 : : MINISKETCH_API void minisketch_set_seed(minisketch* sketch, uint64_t seed);
95 : :
96 : : /** Clone a sketch.
97 : : *
98 : : * The result must be destroyed using minisketch_destroy.
99 : : */
100 : : MINISKETCH_API minisketch* minisketch_clone(const minisketch* sketch);
101 : :
102 : : /** Destroy a sketch.
103 : : *
104 : : * The pointer that was passed in may not be used anymore afterwards.
105 : : */
106 : : MINISKETCH_API void minisketch_destroy(minisketch* sketch);
107 : :
108 : : /** Compute the size in bytes for serializing a given sketch. */
109 : : MINISKETCH_API size_t minisketch_serialized_size(const minisketch* sketch);
110 : :
111 : : /** Serialize a sketch to bytes. */
112 : : MINISKETCH_API void minisketch_serialize(const minisketch* sketch, unsigned char* output);
113 : :
114 : : /** Deserialize a sketch from bytes. */
115 : : MINISKETCH_API void minisketch_deserialize(minisketch* sketch, const unsigned char* input);
116 : :
117 : : /** Add an element to a sketch.
118 : : *
119 : : * If the element to be added is too large for the sketch, the most significant
120 : : * bits of the element are dropped. More precisely, if the element size of
121 : : * `sketch` is b bits, then this function adds the unsigned integer represented
122 : : * by the b least significant bits of `element` to `sketch`.
123 : : *
124 : : * If the element to be added is 0 (after potentially dropping the most significant
125 : : * bits), then this function is a no-op. Sketches cannot contain an element with
126 : : * the value 0.
127 : : *
128 : : * Note that adding the same element a second time removes it again.
129 : : */
130 : : MINISKETCH_API void minisketch_add_uint64(minisketch* sketch, uint64_t element);
131 : :
132 : : /** Merge the elements of another sketch into this sketch.
133 : : *
134 : : * After merging, `sketch` will contain every element that existed in one but not
135 : : * both of the input sketches. It can be seen as an exclusive or operation on
136 : : * the set elements. If the capacity of `other_sketch` is lower than `sketch`'s,
137 : : * merging reduces the capacity of `sketch` to that of `other_sketch`.
138 : : *
139 : : * This function returns the capacity of `sketch` after merging has been performed
140 : : * (where this capacity is at least 1), or 0 to indicate that merging has failed because
141 : : * the two input sketches differ in their element size or implementation. If 0 is
142 : : * returned, `sketch` (and its capacity) have not been modified.
143 : : *
144 : : * It is also possible to perform this operation directly on the serializations
145 : : * of two sketches with the same element size and capacity by performing a bitwise XOR
146 : : * of the serializations.
147 : : */
148 : : MINISKETCH_API size_t minisketch_merge(minisketch* sketch, const minisketch* other_sketch);
149 : :
150 : : /** Decode a sketch.
151 : : *
152 : : * `output` is a pointer to an array of `max_element` uint64_t's, which will be
153 : : * filled with the elements in this sketch.
154 : : *
155 : : * The return value is the number of decoded elements, or -1 if decoding failed.
156 : : */
157 : : MINISKETCH_API ssize_t minisketch_decode(const minisketch* sketch, size_t max_elements, uint64_t* output);
158 : :
159 : : /** Compute the capacity needed to achieve a certain rate of false positives.
160 : : *
161 : : * A sketch with capacity c and no more than c elements can always be decoded
162 : : * correctly. However, if it has more than c elements, or contains just random
163 : : * bytes, it is possible that it will still decode, but the result will be
164 : : * nonsense. This can be counteracted by increasing the capacity slightly.
165 : : *
166 : : * Given a field size bits, an intended number of elements that can be decoded
167 : : * max_elements, and a false positive probability of 1 in 2**fpbits, this
168 : : * function computes the necessary capacity. It is only guaranteed to be
169 : : * accurate up to fpbits=256.
170 : : */
171 : : MINISKETCH_API size_t minisketch_compute_capacity(uint32_t bits, size_t max_elements, uint32_t fpbits);
172 : :
173 : : /** Compute what max_elements can be decoded for a certain rate of false positives.
174 : : *
175 : : * This is the inverse operation of minisketch_compute_capacity. It determines,
176 : : * given a field size bits, a capacity of a sketch, and an acceptable false
177 : : * positive probability of 1 in 2**fpbits, what the maximum allowed
178 : : * max_elements value is. If no value of max_elements would give the desired
179 : : * false positive probability, 0 is returned.
180 : : *
181 : : * Note that this is not an exact inverse of minisketch_compute_capacity. For
182 : : * example, with bits=32, fpbits=16, and max_elements=8,
183 : : * minisketch_compute_capacity will return 9, as capacity 8 would only have a
184 : : * false positive chance of 1 in 2^15.3. Increasing the capacity to 9 however
185 : : * decreases the fp chance to 1 in 2^47.3, enough for max_elements=9 (with fp
186 : : * chance of 1 in 2^18.5). Therefore, minisketch_compute_max_elements with
187 : : * capacity=9 will return 9.
188 : : */
189 : : MINISKETCH_API size_t minisketch_compute_max_elements(uint32_t bits, size_t capacity, uint32_t fpbits);
190 : :
191 : : #ifdef __cplusplus
192 : : }
193 : :
194 : : #if __cplusplus >= 201103L
195 : : /** Simple RAII C++11 wrapper around the minisketch API. */
196 [ + - - - ]: 544 : class Minisketch
[ + - + -
- + + - +
- - - - -
- - - - -
- ]
197 : : {
198 : : struct Deleter
199 : : {
200 : 422 : void operator()(minisketch* ptr) const
201 : : {
202 : 422 : minisketch_destroy(ptr);
203 : 422 : }
204 : : };
205 : :
206 : : std::unique_ptr<minisketch, Deleter> m_minisketch;
207 : :
208 : : public:
209 : : /** Check whether the library supports fields of the given size. */
210 : : static bool BitsSupported(uint32_t bits) noexcept { return minisketch_bits_supported(bits); }
211 : :
212 : : /** Get the highest supported implementation number. */
213 : 1 : static uint32_t MaxImplementation() noexcept { return minisketch_implementation_max(); }
214 : :
215 : : /** Check whether the library supports fields with a given size and implementation number.
216 : : * If a particular field size `bits` is supported, implementation 0 is always supported for it.
217 : : * Higher implementation numbers may or may not be available as well, up to MaxImplementation().
218 : : */
219 : 23 : static bool ImplementationSupported(uint32_t bits, uint32_t implementation) noexcept { return minisketch_implementation_supported(bits, implementation); }
220 : :
221 : : /** Given field size and a maximum number of decodable elements n, compute what capacity c to
222 : : * use so that sketches with more elements than n have a chance no higher than 2^-fpbits of
223 : : * being decoded incorrectly (and will instead fail when decoding for up to n elements).
224 : : *
225 : : * See minisketch_compute_capacity for more details. */
226 : 0 : static size_t ComputeCapacity(uint32_t bits, size_t max_elements, uint32_t fpbits) noexcept { return minisketch_compute_capacity(bits, max_elements, fpbits); }
227 : :
228 : : /** Reverse operation of ComputeCapacity. See minisketch_compute_max_elements. */
229 : : static size_t ComputeMaxElements(uint32_t bits, size_t capacity, uint32_t fpbits) noexcept { return minisketch_compute_max_elements(bits, capacity, fpbits); }
230 : :
231 : : /** Construct a clone of the specified sketch. */
232 : : Minisketch(const Minisketch& sketch) noexcept
233 : : {
234 : : if (sketch.m_minisketch) {
235 : : m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
236 : : }
237 : : }
238 : :
239 : : /** Make this Minisketch a clone of the specified one. */
240 : : Minisketch& operator=(const Minisketch& sketch) noexcept
241 : : {
242 : : if (this != &sketch && sketch.m_minisketch) {
243 : : m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_clone(sketch.m_minisketch.get()));
244 : : }
245 : : return *this;
246 : : }
247 : :
248 : : /** Check whether this Minisketch object is valid. */
249 : : explicit operator bool() const noexcept { return bool{m_minisketch}; }
250 : :
251 : : /** Default constructor is deleted. */
252 : : Minisketch() noexcept = delete;
253 : :
254 : : /** Move constructor. */
255 : 100 : Minisketch(Minisketch&&) noexcept = default;
256 : :
257 : : /** Move assignment. */
258 : : Minisketch& operator=(Minisketch&&) noexcept = default;
259 : :
260 : : /** Construct a Minisketch object with the specified parameters.
261 : : *
262 : : * If bits is not BitsSupported(), or the combination of bits and capacity is not
263 : : * ImplementationSupported(), or OOM occurs internally, an invalid Minisketch
264 : : * object will be constructed. Use operator bool() to check that this isn't the
265 : : * case before performing any other operations. */
266 : 422 : Minisketch(uint32_t bits, uint32_t implementation, size_t capacity) noexcept
267 : 422 : {
268 [ - + ]: 422 : m_minisketch = std::unique_ptr<minisketch, Deleter>(minisketch_create(bits, implementation, capacity));
269 : 422 : }
270 : :
271 : : /** Create a Minisketch object sufficiently large for the specified number of elements at given fpbits.
272 : : * It may construct an invalid object, which you may need to check for. */
273 : 0 : static Minisketch CreateFP(uint32_t bits, uint32_t implementation, size_t max_elements, uint32_t fpbits) noexcept
274 : : {
275 : 0 : return Minisketch(bits, implementation, ComputeCapacity(bits, max_elements, fpbits));
276 : : }
277 : :
278 : : /** Return the field size for a (valid) Minisketch object. */
279 : : uint32_t GetBits() const noexcept { return minisketch_bits(m_minisketch.get()); }
280 : :
281 : : /** Return the capacity for a (valid) Minisketch object. */
282 : : size_t GetCapacity() const noexcept { return minisketch_capacity(m_minisketch.get()); }
283 : :
284 : : /** Return the implementation number for a (valid) Minisketch object. */
285 : : uint32_t GetImplementation() const noexcept { return minisketch_implementation(m_minisketch.get()); }
286 : :
287 : : /** Set the seed for a (valid) Minisketch object. See minisketch_set_seed(). */
288 : : Minisketch& SetSeed(uint64_t seed) noexcept
289 : : {
290 : : minisketch_set_seed(m_minisketch.get(), seed);
291 : : return *this;
292 : : }
293 : :
294 : : /** Add (or remove, if already present) an element to a (valid) Minisketch object.
295 : : * See minisketch_add_uint64(). */
296 : 1058220 : Minisketch& Add(uint64_t element) noexcept
297 : : {
298 : 1058220 : minisketch_add_uint64(m_minisketch.get(), element);
299 : 1058220 : return *this;
300 : : }
301 : :
302 : : /** Merge sketch into *this; both have to be valid Minisketch objects.
303 : : * See minisketch_merge for details. */
304 : 100 : Minisketch& Merge(const Minisketch& sketch) noexcept
305 : : {
306 : 100 : minisketch_merge(m_minisketch.get(), sketch.m_minisketch.get());
307 [ + - ]: 100 : return *this;
308 : : }
309 : :
310 : : /** Decode this (valid) Minisketch object into the result vector, up to as many elements as the
311 : : * vector's size permits. */
312 : : bool Decode(std::vector<uint64_t>& result) const
313 : : {
314 : : ssize_t ret = minisketch_decode(m_minisketch.get(), result.size(), result.data());
315 : : if (ret == -1) return false;
316 : : result.resize(ret);
317 : : return true;
318 : : }
319 : :
320 : : /** Get the serialized size in bytes for this (valid) Minisketch object.. */
321 : 400 : size_t GetSerializedSize() const noexcept { return minisketch_serialized_size(m_minisketch.get()); }
322 : :
323 : : /** Serialize this (valid) Minisketch object as a byte vector. */
324 : 200 : std::vector<unsigned char> Serialize() const
325 : : {
326 : 200 : std::vector<unsigned char> result(GetSerializedSize());
327 [ + - ]: 200 : minisketch_serialize(m_minisketch.get(), result.data());
328 : 200 : return result;
329 : 0 : }
330 : :
331 : : /** Deserialize into this (valid) Minisketch from an object containing its bytes (which has data()
332 : : * and size() members). */
333 : : template<typename T>
334 : 200 : Minisketch& Deserialize(
335 : : const T& obj,
336 : : typename std::enable_if<
337 : : std::is_convertible<typename std::remove_pointer<decltype(obj.data())>::type (*)[], const unsigned char (*)[]>::value &&
338 : : std::is_convertible<decltype(obj.size()), std::size_t>::value,
339 : : std::nullptr_t
340 : : >::type = nullptr) noexcept
341 : : {
342 [ - + - + ]: 200 : assert(GetSerializedSize() == obj.size());
343 : 200 : minisketch_deserialize(m_minisketch.get(), obj.data());
344 : 200 : return *this;
345 : : }
346 : :
347 : : #if __cplusplus >= 201703L
348 : : /** C++17 only: like Decode(), but up to a specified number of elements into an optional vector. */
349 : 122 : std::optional<std::vector<uint64_t>> Decode(size_t max_elements) const
350 : : {
351 : 122 : std::vector<uint64_t> result(max_elements);
352 [ + - ]: 122 : ssize_t ret = minisketch_decode(m_minisketch.get(), max_elements, result.data());
353 [ - + ]: 122 : if (ret == -1) return {};
354 [ + - ]: 122 : result.resize(ret);
355 : 122 : return result;
356 : 122 : }
357 : :
358 : : /** C++17 only: similar to Decode(), but with specified false positive probability. */
359 : : std::optional<std::vector<uint64_t>> DecodeFP(uint32_t fpbits) const
360 : : {
361 : : return Decode(ComputeMaxElements(GetBits(), GetCapacity(), fpbits));
362 : : }
363 : : #endif // __cplusplus >= 201703L
364 : : };
365 : : #endif // __cplusplus >= 201103L
366 : : #endif // __cplusplus
367 : :
368 : : #endif // _MINISKETCH_H_
|