2024-03-21 12:23:26 +00:00
|
|
|
#include "bucket_finder.h"
|
|
|
|
|
2024-03-23 10:24:46 +00:00
|
|
|
#include <algorithm>
|
|
|
|
#include <iterator>
|
2024-03-21 12:23:26 +00:00
|
|
|
#include <mutex>
|
2024-03-23 10:24:46 +00:00
|
|
|
#include <string>
|
2024-03-21 12:23:26 +00:00
|
|
|
#include <thread>
|
2024-03-23 10:24:46 +00:00
|
|
|
#include <vector>
|
2024-03-21 12:23:26 +00:00
|
|
|
|
2024-03-23 11:22:53 +00:00
|
|
|
// class Bucket
|
|
|
|
|
2024-03-21 12:23:26 +00:00
|
|
|
void Bucket::insert(const WordList &word_list, size_t first_index,
|
|
|
|
size_t last_index) {
|
2024-03-22 09:59:35 +00:00
|
|
|
for (auto index = first_index; index < last_index; ++index) {
|
2024-03-21 12:23:26 +00:00
|
|
|
const auto ¤t_word = word_list[index];
|
2024-03-22 09:59:35 +00:00
|
|
|
directory_[current_word.front()].push_back(¤t_word);
|
2024-03-21 12:23:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-21 19:16:00 +00:00
|
|
|
WordRefList Bucket::find_prefix(std::string_view search_term) const {
|
2024-03-22 09:59:35 +00:00
|
|
|
auto directory_it = directory_.find(search_term.front());
|
|
|
|
if (directory_it == directory_.cend()) {
|
2024-03-21 12:23:26 +00:00
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
2024-03-21 19:16:00 +00:00
|
|
|
WordRefList result;
|
2024-03-22 09:59:35 +00:00
|
|
|
for (const auto *word : directory_it->second) {
|
2024-03-21 12:23:26 +00:00
|
|
|
if (word->starts_with(search_term)) {
|
2024-03-21 19:16:00 +00:00
|
|
|
result.push_back(word);
|
2024-03-21 12:23:26 +00:00
|
|
|
}
|
|
|
|
}
|
2024-03-21 19:16:00 +00:00
|
|
|
|
2024-03-21 12:23:26 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2024-03-23 11:22:53 +00:00
|
|
|
// class BucketFinder
|
|
|
|
|
2024-03-23 10:24:46 +00:00
|
|
|
BucketFinder::BucketFinder(const WordList &word_list) { insert(word_list); }
|
|
|
|
|
|
|
|
void BucketFinder::insert(const WordList &word_list) {
|
2024-03-21 12:23:26 +00:00
|
|
|
if (word_list.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2024-03-23 10:24:46 +00:00
|
|
|
const size_t max_threads = std::thread::hardware_concurrency();
|
2024-03-21 12:23:26 +00:00
|
|
|
const size_t word_list_size = word_list.size();
|
2024-03-23 10:24:46 +00:00
|
|
|
const size_t bucket_count = std::min<size_t>(max_threads, word_list_size);
|
2024-03-21 12:23:26 +00:00
|
|
|
const size_t bucket_size = word_list_size / bucket_count;
|
|
|
|
|
|
|
|
buckets_.resize(bucket_count);
|
|
|
|
|
2024-03-23 10:24:46 +00:00
|
|
|
std::vector<std::thread> insert_threads;
|
2024-03-21 12:23:26 +00:00
|
|
|
for (auto bucket_index = 0; bucket_index < bucket_count; ++bucket_index) {
|
2024-03-23 11:22:53 +00:00
|
|
|
auto &thread_bucket = buckets_[bucket_index];
|
2024-03-21 12:23:26 +00:00
|
|
|
|
2024-03-23 10:24:46 +00:00
|
|
|
const bool is_last_bucket = bucket_index == bucket_count - 1;
|
|
|
|
const size_t first_word_index = bucket_index * bucket_size;
|
|
|
|
const size_t last_word_index =
|
|
|
|
is_last_bucket ? word_list_size : first_word_index + bucket_size;
|
2024-03-21 19:56:48 +00:00
|
|
|
|
2024-03-23 10:24:46 +00:00
|
|
|
insert_threads.emplace_back([&, first_word_index, last_word_index] {
|
2024-03-23 11:22:53 +00:00
|
|
|
thread_bucket.insert(word_list, first_word_index, last_word_index);
|
2024-03-21 12:23:26 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-03-23 10:24:46 +00:00
|
|
|
for (auto &thread : insert_threads) {
|
2024-03-21 12:23:26 +00:00
|
|
|
thread.join();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-21 19:16:00 +00:00
|
|
|
WordRefList BucketFinder::find_prefix(std::string_view search_term) const {
|
2024-03-21 19:56:48 +00:00
|
|
|
WordRefList search_results;
|
|
|
|
std::mutex search_results_mutex;
|
2024-03-21 12:23:26 +00:00
|
|
|
|
2024-03-23 11:22:53 +00:00
|
|
|
std::vector<std::thread> search_threads;
|
2024-03-21 12:23:26 +00:00
|
|
|
for (const auto &bucket : buckets_) {
|
2024-03-23 11:22:53 +00:00
|
|
|
search_threads.emplace_back([&] {
|
2024-03-21 19:56:48 +00:00
|
|
|
auto thread_search_results = bucket.find_prefix(search_term);
|
|
|
|
if (!thread_search_results.empty()) {
|
2024-03-23 10:24:46 +00:00
|
|
|
const std::lock_guard result_lock(search_results_mutex);
|
2024-03-21 19:56:48 +00:00
|
|
|
std::move(thread_search_results.begin(), thread_search_results.end(),
|
|
|
|
std::back_inserter(search_results));
|
2024-03-21 12:23:26 +00:00
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-03-23 11:22:53 +00:00
|
|
|
for (auto &thread : search_threads) {
|
2024-03-21 12:23:26 +00:00
|
|
|
thread.join();
|
|
|
|
}
|
|
|
|
|
2024-03-21 19:56:48 +00:00
|
|
|
return search_results;
|
2024-03-21 12:23:26 +00:00
|
|
|
};
|