VectorSearch/lib_vector_search/include/bucket_finder.h

50 lines
1.5 KiB
C++

#pragma once
#include "finder.h"
#include "word_list.h"
#include <unordered_map>
/** A Bucket contains a partial WordRefList, split-up and (hash-)mapped by their
* first characters.
*
* It's meant to be read and written by a single thread without the need for
* synchronization.
*/
class Bucket {
private:
std::unordered_map<char, WordRefList> directory_;
public:
/// Inserts references to all words from word_list between first_index and
/// last_index, including first_index, excluding last_index.
void insert(const WordList &word_list, size_t first_index, size_t last_index);
/// Find all words that start with search_term
/// @return A list with references to the results.
WordRefList find_prefix(std::string_view search_term) const;
};
/** This class provides efficient, parallel search over a list of strings.
*
* References to all input strings are stored in a tree-like structure that
* provides fast and lock-free parallel insertion and parallel (with minimal
* synchronization) search for all words that start with a given term.
*/
class BucketFinder : public Finder {
private:
std::vector<Bucket> buckets_;
public:
/// Creates a BucketFinder over all words in word_list.
BucketFinder(const WordList &word_list);
/// Find all words that start with search_term
/// @return A list with references to the results.
WordRefList find_prefix(std::string_view search_term) const override;
private:
/// Inserts references to all words from word_list.
void insert(const WordList &word_list);
};