feat: add bucket finder
This commit is contained in:
parent
4b42f4c12a
commit
32be1eaa95
6 changed files with 126 additions and 1 deletions
84
lib_vector_search/src/bucket_finder.cpp
Normal file
84
lib_vector_search/src/bucket_finder.cpp
Normal file
|
@ -0,0 +1,84 @@
|
|||
#include "bucket_finder.h"
|
||||
|
||||
#include <mutex>
|
||||
#include <strings.h>
|
||||
#include <thread>
|
||||
|
||||
void Bucket::insert(const WordList &word_list, size_t first_index,
|
||||
size_t last_index) {
|
||||
for (size_t index = first_index; index < last_index; ++index) {
|
||||
const auto ¤t_word = word_list[index];
|
||||
groups_[current_word.front()].push_back(¤t_word);
|
||||
}
|
||||
}
|
||||
|
||||
std::forward_list<const std::string *>
|
||||
Bucket::find_prefix(std::string_view search_term) const {
|
||||
const auto group = groups_.find(search_term.front());
|
||||
if (group == groups_.cend()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::forward_list<const std::string *> result;
|
||||
for (const auto *word : group->second) {
|
||||
if (word->starts_with(search_term)) {
|
||||
result.push_front(word);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
BucketFinder::BucketFinder(const WordList &word_list) {
|
||||
if (word_list.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t word_list_size = word_list.size();
|
||||
const size_t bucket_count =
|
||||
std::min<size_t>(std::thread::hardware_concurrency(), word_list_size);
|
||||
const size_t bucket_size = word_list_size / bucket_count;
|
||||
|
||||
buckets_.resize(bucket_count);
|
||||
|
||||
std::vector<std::thread> insert_threads;
|
||||
for (auto bucket_index = 0; bucket_index < bucket_count; ++bucket_index) {
|
||||
const size_t first_index = bucket_index * bucket_size;
|
||||
const size_t last_index = (bucket_index == bucket_count - 1)
|
||||
? word_list_size
|
||||
: (bucket_index + 1) * bucket_size;
|
||||
|
||||
auto &bucket = buckets_[bucket_index];
|
||||
|
||||
insert_threads.emplace_back([&bucket, &word_list, first_index, last_index] {
|
||||
bucket.insert(word_list, first_index, last_index);
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &thread : insert_threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
std::forward_list<const std::string *>
|
||||
BucketFinder::find_prefix(std::string_view search_term) const {
|
||||
std::forward_list<const std::string *> result;
|
||||
std::mutex result_mutex;
|
||||
|
||||
std::vector<std::thread> search_threads;
|
||||
for (const auto &bucket : buckets_) {
|
||||
|
||||
search_threads.emplace_back([&] {
|
||||
auto thread_result = bucket.find_prefix(search_term);
|
||||
if (!thread_result.empty()) {
|
||||
std::lock_guard<std::mutex> result_lock(result_mutex);
|
||||
result.merge(thread_result);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &thread : search_threads) {
|
||||
thread.join();
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue