docs: describe BucketFinder
This commit is contained in:
parent
c1b4fc67e9
commit
e4c643880a
2 changed files with 35 additions and 14 deletions
|
@ -10,17 +10,34 @@ private:
|
|||
std::unordered_map<char, WordRefList> directory_;
|
||||
|
||||
public:
|
||||
/// Inserts references to all words from word_list between first_index and
|
||||
/// last_index, including first_index, excluding last_index.
|
||||
void insert(const WordList &word_list, size_t first_index, size_t last_index);
|
||||
|
||||
/// Find all words that start with search_term
|
||||
/// @return A list with references to the results.
|
||||
WordRefList find_prefix(std::string_view search_term) const;
|
||||
};
|
||||
|
||||
/** This class provides efficient, parallel search over a list of strings.
|
||||
*
|
||||
* References to all input strings are stored in a tree-like structure that
|
||||
* provides fast and lock-free parallel insertion and parallel (with minimal
|
||||
* synchronization) search for all words that start with a given term.
|
||||
*/
|
||||
class BucketFinder : public Finder {
|
||||
private:
|
||||
std::vector<Bucket> buckets_;
|
||||
|
||||
public:
|
||||
/// Creates a BucketFinder over all words in word_list.
|
||||
BucketFinder(const WordList &word_list);
|
||||
|
||||
/// Find all words that start with search_term
|
||||
/// @return A list with references to the results.
|
||||
WordRefList find_prefix(std::string_view search_term) const override;
|
||||
|
||||
private:
|
||||
/// Inserts references to all words from word_list.
|
||||
void insert(const WordList &word_list);
|
||||
};
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
#include "bucket_finder.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <strings.h>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
void Bucket::insert(const WordList &word_list, size_t first_index,
|
||||
size_t last_index) {
|
||||
|
@ -28,34 +31,35 @@ WordRefList Bucket::find_prefix(std::string_view search_term) const {
|
|||
return result;
|
||||
}
|
||||
|
||||
BucketFinder::BucketFinder(const WordList &word_list) {
|
||||
BucketFinder::BucketFinder(const WordList &word_list) { insert(word_list); }
|
||||
|
||||
void BucketFinder::insert(const WordList &word_list) {
|
||||
if (word_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t max_threads = std::thread::hardware_concurrency();
|
||||
const size_t word_list_size = word_list.size();
|
||||
const size_t bucket_count =
|
||||
std::min<size_t>(std::thread::hardware_concurrency(), word_list_size);
|
||||
const size_t bucket_count = std::min<size_t>(max_threads, word_list_size);
|
||||
const size_t bucket_size = word_list_size / bucket_count;
|
||||
|
||||
buckets_.resize(bucket_count);
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::vector<std::thread> insert_threads;
|
||||
for (auto bucket_index = 0; bucket_index < bucket_count; ++bucket_index) {
|
||||
auto &bucket = buckets_[bucket_index];
|
||||
|
||||
bool is_last_bucket = bucket_index == bucket_count - 1;
|
||||
const bool is_last_bucket = bucket_index == bucket_count - 1;
|
||||
const size_t first_word_index = bucket_index * bucket_size;
|
||||
const size_t last_word_index =
|
||||
is_last_bucket ? word_list_size : first_word_index + bucket_size;
|
||||
|
||||
const size_t first_index = bucket_index * bucket_size;
|
||||
const size_t last_index =
|
||||
is_last_bucket ? word_list_size : first_index + bucket_size;
|
||||
|
||||
threads.emplace_back([&, first_index, last_index] {
|
||||
bucket.insert(word_list, first_index, last_index);
|
||||
insert_threads.emplace_back([&, first_word_index, last_word_index] {
|
||||
bucket.insert(word_list, first_word_index, last_word_index);
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &thread : threads) {
|
||||
for (auto &thread : insert_threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
@ -69,7 +73,7 @@ WordRefList BucketFinder::find_prefix(std::string_view search_term) const {
|
|||
threads.emplace_back([&] {
|
||||
auto thread_search_results = bucket.find_prefix(search_term);
|
||||
if (!thread_search_results.empty()) {
|
||||
std::lock_guard result_lock(search_results_mutex);
|
||||
const std::lock_guard result_lock(search_results_mutex);
|
||||
std::move(thread_search_results.begin(), thread_search_results.end(),
|
||||
std::back_inserter(search_results));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue