VectorSearch/lib_vector_search/src/bucket_finder.cpp

85 lines
2.3 KiB
C++
Raw Normal View History

2024-03-21 12:23:26 +00:00
#include "bucket_finder.h"
#include <mutex>
#include <strings.h>
#include <thread>
void Bucket::insert(const WordList &word_list, size_t first_index,
size_t last_index) {
2024-03-22 09:59:35 +00:00
for (auto index = first_index; index < last_index; ++index) {
2024-03-21 12:23:26 +00:00
const auto &current_word = word_list[index];
2024-03-22 09:59:35 +00:00
directory_[current_word.front()].push_back(&current_word);
2024-03-21 12:23:26 +00:00
}
}
WordRefList Bucket::find_prefix(std::string_view search_term) const {
2024-03-22 09:59:35 +00:00
auto directory_it = directory_.find(search_term.front());
if (directory_it == directory_.cend()) {
2024-03-21 12:23:26 +00:00
return {};
}
WordRefList result;
2024-03-22 09:59:35 +00:00
for (const auto *word : directory_it->second) {
2024-03-21 12:23:26 +00:00
if (word->starts_with(search_term)) {
result.push_back(word);
2024-03-21 12:23:26 +00:00
}
}
2024-03-21 12:23:26 +00:00
return result;
}
BucketFinder::BucketFinder(const WordList &word_list) {
if (word_list.empty()) {
return;
}
const size_t word_list_size = word_list.size();
const size_t bucket_count =
std::min<size_t>(std::thread::hardware_concurrency(), word_list_size);
const size_t bucket_size = word_list_size / bucket_count;
buckets_.resize(bucket_count);
2024-03-21 19:56:48 +00:00
std::vector<std::thread> threads;
2024-03-21 12:23:26 +00:00
for (auto bucket_index = 0; bucket_index < bucket_count; ++bucket_index) {
auto &bucket = buckets_[bucket_index];
2024-03-21 19:56:48 +00:00
bool is_last_bucket = bucket_index == bucket_count - 1;
const size_t first_index = bucket_index * bucket_size;
const size_t last_index =
is_last_bucket ? word_list_size : first_index + bucket_size;
threads.emplace_back([&, first_index, last_index] {
2024-03-21 12:23:26 +00:00
bucket.insert(word_list, first_index, last_index);
});
}
2024-03-21 19:56:48 +00:00
for (auto &thread : threads) {
2024-03-21 12:23:26 +00:00
thread.join();
}
}
WordRefList BucketFinder::find_prefix(std::string_view search_term) const {
2024-03-21 19:56:48 +00:00
WordRefList search_results;
std::mutex search_results_mutex;
2024-03-21 12:23:26 +00:00
2024-03-21 19:56:48 +00:00
std::vector<std::thread> threads;
2024-03-21 12:23:26 +00:00
for (const auto &bucket : buckets_) {
2024-03-21 19:56:48 +00:00
threads.emplace_back([&] {
auto thread_search_results = bucket.find_prefix(search_term);
if (!thread_search_results.empty()) {
std::lock_guard result_lock(search_results_mutex);
std::move(thread_search_results.begin(), thread_search_results.end(),
std::back_inserter(search_results));
2024-03-21 12:23:26 +00:00
}
});
}
2024-03-21 19:56:48 +00:00
for (auto &thread : threads) {
2024-03-21 12:23:26 +00:00
thread.join();
}
2024-03-21 19:56:48 +00:00
return search_results;
2024-03-21 12:23:26 +00:00
};