// // LinearHashTable.h // // Library: Foundation // Package: Hashing // Module: LinearHashTable // // Definition of the LinearHashTable class. // // Copyright (c) 2006, Applied Informatics Software Engineering GmbH. // and Contributors. // // SPDX-License-Identifier: BSL-1.0 // #ifndef Foundation_LinearHashTable_INCLUDED #define Foundation_LinearHashTable_INCLUDED #include "Poco/Foundation.h" #include "Poco/Hash.h" #include #include #include namespace Poco { template > class LinearHashTable /// This class implements a linear hash table. /// /// In a linear hash table, the available address space /// grows or shrinks dynamically. A linear hash table thus /// supports any number of insertions or deletions without /// lookup or insertion performance deterioration. /// /// Linear hashing was discovered by Witold Litwin in 1980 /// and described in the paper LINEAR HASHING: A NEW TOOL FOR FILE AND TABLE ADDRESSING. /// /// For more information on linear hashing, see . /// /// The LinearHashTable is not thread safe. /// /// Value must support comparison for equality. /// /// Find, insert and delete operations are basically O(1) with regard /// to the total number of elements in the table, and O(N) with regard /// to the number of elements in the bucket where the element is stored. /// On average, every bucket stores one element; the exact number depends /// on the quality of the hash function. In most cases, the maximum number of /// elements in a bucket should not exceed 3. { public: using ValueType = Value; using Reference = Value &; using ConstReference = const Value &; using Pointer = Value *; using ConstPointer = const Value *; using Hash = HashFunc; using Bucket = std::vector; using BucketVec = std::vector; using BucketIterator = typename Bucket::iterator; using BucketVecIterator = typename BucketVec::iterator; class ConstIterator { public: using iterator_category = std::forward_iterator_tag; using value_type = Value; using difference_type = ptrdiff_t; using pointer = Value*; using reference = Value&; ConstIterator(): _initialized(false) { } ConstIterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt): _vecIt(vecIt), _endIt(endIt), _buckIt(buckIt), _initialized(true) { } ConstIterator(const ConstIterator& it): _vecIt(it._vecIt), _endIt(it._endIt), _buckIt(it._buckIt), _initialized(it._initialized) { } ConstIterator& operator = (const ConstIterator& it) { ConstIterator tmp(it); swap(tmp); return *this; } void swap(ConstIterator& it) noexcept { using std::swap; // uninitialized iterators crash when swapped if (_initialized) { swap(_vecIt, it._vecIt); swap(_endIt, it._endIt); swap(_buckIt, it._buckIt); swap(_initialized, it._initialized); } else { _vecIt = it._vecIt; _endIt = it._endIt; _buckIt = it._buckIt; _initialized = it._initialized; } } bool operator == (const ConstIterator& it) const { return _vecIt == it._vecIt && (_vecIt == _endIt || _buckIt == it._buckIt); } bool operator != (const ConstIterator& it) const { return _vecIt != it._vecIt || (_vecIt != _endIt && _buckIt != it._buckIt); } const typename Bucket::value_type& operator * () const { return *_buckIt; } const typename Bucket::value_type* operator -> () const { return &*_buckIt; } ConstIterator& operator ++ () // prefix { if (_vecIt != _endIt) { ++_buckIt; while (_vecIt != _endIt && _buckIt == _vecIt->end()) { ++_vecIt; if (_vecIt != _endIt) _buckIt = _vecIt->begin(); } } return *this; } ConstIterator operator ++ (int) // postfix { ConstIterator tmp(*this); ++*this; return tmp; } protected: BucketVecIterator _vecIt; BucketVecIterator _endIt; BucketIterator _buckIt; bool _initialized; friend class LinearHashTable; }; class Iterator: public ConstIterator { public: Iterator() = default; Iterator(const BucketVecIterator& vecIt, const BucketVecIterator& endIt, const BucketIterator& buckIt): ConstIterator(vecIt, endIt, buckIt) { } Iterator(const Iterator& it): ConstIterator(it) { } Iterator& operator = (const Iterator& it) { Iterator tmp(it); ConstIterator::swap(tmp); return *this; } void swap(Iterator& it) noexcept { ConstIterator::swap(it); } typename Bucket::value_type& operator * () { return *this->_buckIt; } const typename Bucket::value_type& operator * () const { return *this->_buckIt; } typename Bucket::value_type* operator -> () { return &*this->_buckIt; } const typename Bucket::value_type* operator -> () const { return &*this->_buckIt; } Iterator& operator ++ () // prefix { ConstIterator::operator ++ (); return *this; } Iterator operator ++ (int) // postfix { Iterator tmp(*this); ++*this; return tmp; } friend class LinearHashTable; }; LinearHashTable(std::size_t initialReserve = 64) /// Creates the LinearHashTable, using the given initialReserve. { _buckets.reserve(calcSize(initialReserve)); _buckets.push_back(Bucket()); } LinearHashTable(const LinearHashTable& table): _buckets(table._buckets), _split(table._split), _front(table._front), _size(table._size) /// Creates the LinearHashTable by copying another one. { } ~LinearHashTable() = default; /// Destroys the LinearHashTable. LinearHashTable& operator = (const LinearHashTable& table) /// Assigns another LinearHashTable. { LinearHashTable tmp(table); swap(tmp); return *this; } void swap(LinearHashTable& table) noexcept /// Swaps the LinearHashTable with another one. { using std::swap; swap(_buckets, table._buckets); swap(_split, table._split); swap(_front, table._front); swap(_size, table._size); } ConstIterator begin() const /// Returns an iterator pointing to the first entry, if one exists. { BucketVecIterator it(_buckets.begin()); BucketVecIterator itEnd(_buckets.end()); while (it != itEnd && it->empty()) { ++it; } if (it == itEnd) return end(); else return ConstIterator(it, itEnd, it->begin()); } ConstIterator end() const /// Returns an iterator pointing to the end of the table. { return ConstIterator(_buckets.end(), _buckets.end(), _buckets.front().end()); } Iterator begin() /// Returns an iterator pointing to the first entry, if one exists. { BucketVecIterator it(_buckets.begin()); BucketVecIterator itEnd(_buckets.end()); while (it != itEnd && it->empty()) { ++it; } if (it == itEnd) return end(); else return Iterator(it, itEnd, it->begin()); } Iterator end() /// Returns an iterator pointing to the end of the table. { return Iterator(_buckets.end(), _buckets.end(), _buckets.front().end()); } ConstIterator find(const Value& value) const /// Finds an entry in the table. { std::size_t addr = bucketAddress(value); BucketVecIterator it(_buckets.begin() + addr); BucketIterator buckIt(std::find(it->begin(), it->end(), value)); if (buckIt != it->end()) return ConstIterator(it, _buckets.end(), buckIt); else return end(); } Iterator find(const Value& value) /// Finds an entry in the table. { std::size_t addr = bucketAddress(value); BucketVecIterator it(_buckets.begin() + addr); BucketIterator buckIt(std::find(it->begin(), it->end(), value)); if (buckIt != it->end()) return Iterator(it, _buckets.end(), buckIt); else return end(); } std::size_t count(const Value& value) const /// Returns the number of elements with the given /// value, with is either 1 or 0. { return find(value) != end() ? 1 : 0; } std::pair insert(const Value& value) /// Inserts an element into the table. /// /// If the element already exists in the table, /// a pair(iterator, false) with iterator pointing to the /// existing element is returned. /// Otherwise, the element is inserted an a /// pair(iterator, true) with iterator /// pointing to the new element is returned. { std::size_t hash = _hash(value); std::size_t addr = bucketAddressForHash(hash); BucketVecIterator it(_buckets.begin() + addr); BucketIterator buckIt(std::find(it->begin(), it->end(), value)); if (buckIt == it->end()) { split(); addr = bucketAddressForHash(hash); it = _buckets.begin() + addr; buckIt = it->insert(it->end(), value); ++_size; return std::make_pair(Iterator(it, _buckets.end(), buckIt), true); } else { return std::make_pair(Iterator(it, _buckets.end(), buckIt), false); } } void erase(Iterator it) /// Erases the element pointed to by it. { if (it != end()) { it._vecIt->erase(it._buckIt); --_size; merge(); } } void erase(const Value& value) /// Erases the element with the given value, if it exists. { Iterator it = find(value); erase(it); } void clear() /// Erases all elements. { LinearHashTable emptyTable; swap(emptyTable); } std::size_t size() const /// Returns the number of elements in the table. { return _size; } bool empty() const /// Returns true iff the table is empty. { return _size == 0; } std::size_t buckets() const /// Returns the number of allocated buckets. { return _buckets.size(); } protected: std::size_t bucketAddress(const Value& value) const { std::size_t n = _hash(value); if (n % _front >= _split) return n % _front; else return n % (2*_front); } std::size_t bucketAddressForHash(std::size_t hash) { if (hash % _front >= _split) return hash % _front; else return hash % (2*_front); } void split() { if (_split == _front) { _split = 0; _front *= 2; _buckets.reserve(_front*2); } Bucket tmp; _buckets.push_back(tmp); _buckets[_split].swap(tmp); ++_split; for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) { using std::swap; std::size_t addr = bucketAddress(*it); _buckets[addr].push_back(Value()); swap(*it, _buckets[addr].back()); } } void merge() { if (_split == 0) { _front /= 2; _split = _front; } --_split; Bucket tmp; tmp.swap(_buckets.back()); _buckets.pop_back(); for (BucketIterator it = tmp.begin(); it != tmp.end(); ++it) { using std::swap; std::size_t addr = bucketAddress(*it); _buckets[addr].push_back(Value()); swap(*it, _buckets[addr].back()); } } static std::size_t calcSize(std::size_t initialSize) { std::size_t size = 32; while (size < initialSize) size *= 2; return size; } private: // Evil hack: _buckets must be mutable because both ConstIterator and Iterator hold // ordinary iterator's (not const_iterator's). mutable BucketVec _buckets; std::size_t _split{0}; std::size_t _front{1}; std::size_t _size{0}; HashFunc _hash; }; } // namespace Poco #endif // Foundation_LinearHashTable_INCLUDED