From 95cc7223e8604ea621b692501057189f186cf252 Mon Sep 17 00:00:00 2001 From: Michael Mandl Date: Thu, 21 Mar 2024 09:57:13 +0100 Subject: [PATCH] refactor: extract WordRefList and thread finder --- lib_vector_search/include/grouped_finder.h | 5 +-- lib_vector_search/include/parallel_finder.h | 2 +- .../include/sorted_linear_finder.h | 4 +- lib_vector_search/include/word_list.h | 15 +++++++ lib_vector_search/src/grouped_finder.cpp | 39 ++++++----------- lib_vector_search/src/parallel_finder.cpp | 43 ++++++------------- .../src/sorted_linear_finder.cpp | 1 + lib_vector_search/src/word_list.cpp | 38 ++++++++++++++++ 8 files changed, 84 insertions(+), 63 deletions(-) diff --git a/lib_vector_search/include/grouped_finder.h b/lib_vector_search/include/grouped_finder.h index b562330..572395a 100644 --- a/lib_vector_search/include/grouped_finder.h +++ b/lib_vector_search/include/grouped_finder.h @@ -4,15 +4,14 @@ #include "word_list.h" #include -#include class GroupedFinder : public Finder { private: - std::map> groups_; + std::map groups_; public: GroupedFinder(const WordList &word_list); virtual std::forward_list - find_prefix(std::string_view search_term) const override; + find_prefix(std::string_view search_prefix) const override; }; diff --git a/lib_vector_search/include/parallel_finder.h b/lib_vector_search/include/parallel_finder.h index 55c776b..543096b 100644 --- a/lib_vector_search/include/parallel_finder.h +++ b/lib_vector_search/include/parallel_finder.h @@ -11,5 +11,5 @@ public: ParallelFinder(const WordList &word_list); std::forward_list - find_prefix(std::string_view search_term) const override; + find_prefix(std::string_view search_prefix) const override; }; diff --git a/lib_vector_search/include/sorted_linear_finder.h b/lib_vector_search/include/sorted_linear_finder.h index 866c2b2..b2437d0 100644 --- a/lib_vector_search/include/sorted_linear_finder.h +++ b/lib_vector_search/include/sorted_linear_finder.h @@ -3,11 +3,9 @@ #include "finder.h" #include "word_list.h" -#include - class SortedLinearFinder : public Finder { private: - std::vector word_list_; + WordRefList word_list_; public: SortedLinearFinder(const WordList &word_list); diff --git a/lib_vector_search/include/word_list.h b/lib_vector_search/include/word_list.h index ca52d9a..ba8b22f 100644 --- a/lib_vector_search/include/word_list.h +++ b/lib_vector_search/include/word_list.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include #include @@ -11,4 +13,17 @@ public: static WordList fourCaps(); static WordList fromFile(const std::filesystem::path &path); + + static void find_prefix_in_range( + const WordList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex); +}; + +class WordRefList : public std::vector { +public: + static void find_prefix_in_range( + const WordRefList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex); }; diff --git a/lib_vector_search/src/grouped_finder.cpp b/lib_vector_search/src/grouped_finder.cpp index 39b8b43..412ebae 100644 --- a/lib_vector_search/src/grouped_finder.cpp +++ b/lib_vector_search/src/grouped_finder.cpp @@ -5,8 +5,8 @@ #include #include -using std::mutex, std::vector, std::thread, std::lock_guard, std::string, - std::forward_list, std::string_view; +using std::mutex, std::vector, std::thread, std::string, std::forward_list, + std::string_view; GroupedFinder::GroupedFinder(const WordList &word_list) { for (const auto &word : word_list) { @@ -15,8 +15,8 @@ GroupedFinder::GroupedFinder(const WordList &word_list) { } std::forward_list -GroupedFinder::find_prefix(std::string_view search_term) const { - const auto group = groups_.find(search_term.front()); +GroupedFinder::find_prefix(std::string_view search_prefix) const { + const auto group = groups_.find(search_prefix.front()); if (group == groups_.cend()) { return {}; } @@ -27,41 +27,26 @@ GroupedFinder::find_prefix(std::string_view search_term) const { const auto thread_count = std::min(std::thread::hardware_concurrency(), word_list_size); - forward_list matching_words; - mutex matching_words_mutex; + forward_list result; + mutex result_mutex; vector search_threads; for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) { - const size_t first_word_index = - thread_index * (word_list_size / thread_count); - const size_t last_word_index = + const size_t first_index = thread_index * (word_list_size / thread_count); + + const size_t last_index = (thread_index == thread_count - 1) ? word_list_size : (thread_index + 1) * (word_list_size / thread_count); search_threads.emplace_back( - [](const vector &word_list, - const string_view &search_term, forward_list &result, - size_t start_index, size_t end_index, mutex &result_mutex) { - forward_list thread_results; - for (size_t index = start_index; index < end_index; ++index) { - const auto ¤t_word = word_list[index]; - if (current_word->starts_with(search_term)) { - thread_results.push_front(current_word); - } - } - if (!thread_results.empty()) { - const lock_guard lock(result_mutex); - result.merge(thread_results); - } - }, - cref(word_list), cref(search_term), ref(matching_words), - first_word_index, last_word_index, ref(matching_words_mutex)); + WordRefList::find_prefix_in_range, cref(word_list), cref(search_prefix), + first_index, last_index, ref(result), ref(result_mutex)); } for (auto &thread : search_threads) { thread.join(); } - return matching_words; + return result; } diff --git a/lib_vector_search/src/parallel_finder.cpp b/lib_vector_search/src/parallel_finder.cpp index 1b7effe..15c6a06 100644 --- a/lib_vector_search/src/parallel_finder.cpp +++ b/lib_vector_search/src/parallel_finder.cpp @@ -3,51 +3,36 @@ #include #include -using std::mutex, std::thread, std::lock_guard, std::vector, std::forward_list, - std::string, std::string_view; +using std::mutex, std::thread, std::vector, std::forward_list, std::string, + std::string_view; ParallelFinder::ParallelFinder(const WordList &word_list) : word_list_(word_list) {} forward_list -ParallelFinder::find_prefix(string_view search_term) const { +ParallelFinder::find_prefix(string_view search_prefix) const { forward_list result; mutex result_mutex; - const auto word_list_size = word_list_.size(); + const size_t word_list_size = word_list_.size(); + const size_t thread_count = + std::min(thread::hardware_concurrency(), word_list_size); - const size_t thread_count = thread::hardware_concurrency(); - - vector threads; + vector search_threads; for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) { - const size_t first_word_index = - thread_index * (word_list_size / thread_count); - const size_t last_word_index = + const size_t first_index = thread_index * (word_list_size / thread_count); + + const size_t last_index = (thread_index == thread_count - 1) ? word_list_size : (thread_index + 1) * (word_list_size / thread_count); - threads.emplace_back( - [](const WordList &word_list, const string_view &search_term, - forward_list &result, size_t start_index, - size_t end_index, mutex &result_mutex) { - forward_list thread_results; - for (size_t index = start_index; index < end_index; ++index) { - const auto ¤t_word = word_list[index]; - if (current_word.starts_with(search_term)) { - thread_results.push_front(¤t_word); - } - } - if (!thread_results.empty()) { - const lock_guard lock(result_mutex); - result.merge(thread_results); - } - }, - cref(word_list_), cref(search_term), ref(result), first_word_index, - last_word_index, ref(result_mutex)); + search_threads.emplace_back( + WordList::find_prefix_in_range, cref(word_list_), cref(search_prefix), + first_index, last_index, ref(result), ref(result_mutex)); } - for (auto &thread : threads) { + for (auto &thread : search_threads) { thread.join(); } diff --git a/lib_vector_search/src/sorted_linear_finder.cpp b/lib_vector_search/src/sorted_linear_finder.cpp index db11154..6e8fbc1 100644 --- a/lib_vector_search/src/sorted_linear_finder.cpp +++ b/lib_vector_search/src/sorted_linear_finder.cpp @@ -14,6 +14,7 @@ SortedLinearFinder::SortedLinearFinder(const WordList &word_list) { word_list_.begin(), word_list_.end(), [](const string *left, const string *right) { return *left < *right; }); } + forward_list SortedLinearFinder::find_prefix(string_view search_term) const { forward_list matching_words; diff --git a/lib_vector_search/src/word_list.cpp b/lib_vector_search/src/word_list.cpp index db01a19..dc91ede 100644 --- a/lib_vector_search/src/word_list.cpp +++ b/lib_vector_search/src/word_list.cpp @@ -57,3 +57,41 @@ WordList WordList::fromFile(const std::filesystem::path &path) { return word_list; } + +void WordList::find_prefix_in_range( + const WordList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex) { + std::forward_list local_results; + + for (size_t index = start_index; index < end_index; ++index) { + const auto ¤t_word = word_list[index]; + if (current_word.starts_with(search_prefix)) { + local_results.push_front(¤t_word); + } + } + + if (!local_results.empty()) { + const std::lock_guard lock(result_mutex); + result.merge(local_results); + } +}; + +void WordRefList::find_prefix_in_range( + const WordRefList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex) { + std::forward_list local_results; + + for (size_t index = start_index; index < end_index; ++index) { + const auto *current_word = word_list[index]; + if (current_word->starts_with(search_prefix)) { + local_results.push_front(current_word); + } + } + + if (!local_results.empty()) { + const std::lock_guard lock(result_mutex); + result.merge(local_results); + } +};