VectorSearch/lib_vector_search/include/bucket_finder.h

50 lines
1.5 KiB
C
Raw Normal View History

2024-03-21 12:23:26 +00:00
#pragma once
#include "finder.h"
#include "word_list.h"
2024-03-22 14:37:13 +00:00
#include <unordered_map>
2024-03-21 12:23:26 +00:00
2024-03-23 11:01:46 +00:00
/** A Bucket contains a partial WordRefList, split-up and (hash-)mapped by their
* first characters.
*
* It's meant to be read and written by a single thread without the need for
* synchronization.
*/
2024-03-21 12:23:26 +00:00
class Bucket {
private:
2024-03-22 14:37:13 +00:00
std::unordered_map<char, WordRefList> directory_;
2024-03-21 12:23:26 +00:00
public:
2024-03-23 10:24:46 +00:00
/// Inserts references to all words from word_list between first_index and
/// last_index, including first_index, excluding last_index.
2024-03-21 12:23:26 +00:00
void insert(const WordList &word_list, size_t first_index, size_t last_index);
2024-03-23 10:24:46 +00:00
/// Find all words that start with search_term
/// @return A list with references to the results.
WordRefList find_prefix(std::string_view search_term) const;
2024-03-21 12:23:26 +00:00
};
2024-03-23 10:24:46 +00:00
/** This class provides efficient, parallel search over a list of strings.
*
* References to all input strings are stored in a tree-like structure that
* provides fast and lock-free parallel insertion and parallel (with minimal
* synchronization) search for all words that start with a given term.
*/
2024-03-21 12:23:26 +00:00
class BucketFinder : public Finder {
private:
std::vector<Bucket> buckets_;
public:
2024-03-23 10:24:46 +00:00
/// Creates a BucketFinder over all words in word_list.
2024-03-21 12:23:26 +00:00
BucketFinder(const WordList &word_list);
2024-03-23 10:24:46 +00:00
/// Find all words that start with search_term
/// @return A list with references to the results.
WordRefList find_prefix(std::string_view search_term) const override;
2024-03-23 10:24:46 +00:00
private:
/// Inserts references to all words from word_list.
void insert(const WordList &word_list);
2024-03-21 12:23:26 +00:00
};