From 95cc7223e8604ea621b692501057189f186cf252 Mon Sep 17 00:00:00 2001 From: Michael Mandl Date: Thu, 21 Mar 2024 09:57:13 +0100 Subject: [PATCH 1/4] refactor: extract WordRefList and thread finder --- lib_vector_search/include/grouped_finder.h | 5 +-- lib_vector_search/include/parallel_finder.h | 2 +- .../include/sorted_linear_finder.h | 4 +- lib_vector_search/include/word_list.h | 15 +++++++ lib_vector_search/src/grouped_finder.cpp | 39 ++++++----------- lib_vector_search/src/parallel_finder.cpp | 43 ++++++------------- .../src/sorted_linear_finder.cpp | 1 + lib_vector_search/src/word_list.cpp | 38 ++++++++++++++++ 8 files changed, 84 insertions(+), 63 deletions(-) diff --git a/lib_vector_search/include/grouped_finder.h b/lib_vector_search/include/grouped_finder.h index b562330..572395a 100644 --- a/lib_vector_search/include/grouped_finder.h +++ b/lib_vector_search/include/grouped_finder.h @@ -4,15 +4,14 @@ #include "word_list.h" #include -#include class GroupedFinder : public Finder { private: - std::map> groups_; + std::map groups_; public: GroupedFinder(const WordList &word_list); virtual std::forward_list - find_prefix(std::string_view search_term) const override; + find_prefix(std::string_view search_prefix) const override; }; diff --git a/lib_vector_search/include/parallel_finder.h b/lib_vector_search/include/parallel_finder.h index 55c776b..543096b 100644 --- a/lib_vector_search/include/parallel_finder.h +++ b/lib_vector_search/include/parallel_finder.h @@ -11,5 +11,5 @@ public: ParallelFinder(const WordList &word_list); std::forward_list - find_prefix(std::string_view search_term) const override; + find_prefix(std::string_view search_prefix) const override; }; diff --git a/lib_vector_search/include/sorted_linear_finder.h b/lib_vector_search/include/sorted_linear_finder.h index 866c2b2..b2437d0 100644 --- a/lib_vector_search/include/sorted_linear_finder.h +++ b/lib_vector_search/include/sorted_linear_finder.h @@ -3,11 +3,9 @@ #include "finder.h" #include "word_list.h" -#include - class SortedLinearFinder : public Finder { private: - std::vector word_list_; + WordRefList word_list_; public: SortedLinearFinder(const WordList &word_list); diff --git a/lib_vector_search/include/word_list.h b/lib_vector_search/include/word_list.h index ca52d9a..ba8b22f 100644 --- a/lib_vector_search/include/word_list.h +++ b/lib_vector_search/include/word_list.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include #include #include @@ -11,4 +13,17 @@ public: static WordList fourCaps(); static WordList fromFile(const std::filesystem::path &path); + + static void find_prefix_in_range( + const WordList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex); +}; + +class WordRefList : public std::vector { +public: + static void find_prefix_in_range( + const WordRefList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex); }; diff --git a/lib_vector_search/src/grouped_finder.cpp b/lib_vector_search/src/grouped_finder.cpp index 39b8b43..412ebae 100644 --- a/lib_vector_search/src/grouped_finder.cpp +++ b/lib_vector_search/src/grouped_finder.cpp @@ -5,8 +5,8 @@ #include #include -using std::mutex, std::vector, std::thread, std::lock_guard, std::string, - std::forward_list, std::string_view; +using std::mutex, std::vector, std::thread, std::string, std::forward_list, + std::string_view; GroupedFinder::GroupedFinder(const WordList &word_list) { for (const auto &word : word_list) { @@ -15,8 +15,8 @@ GroupedFinder::GroupedFinder(const WordList &word_list) { } std::forward_list -GroupedFinder::find_prefix(std::string_view search_term) const { - const auto group = groups_.find(search_term.front()); +GroupedFinder::find_prefix(std::string_view search_prefix) const { + const auto group = groups_.find(search_prefix.front()); if (group == groups_.cend()) { return {}; } @@ -27,41 +27,26 @@ GroupedFinder::find_prefix(std::string_view search_term) const { const auto thread_count = std::min(std::thread::hardware_concurrency(), word_list_size); - forward_list matching_words; - mutex matching_words_mutex; + forward_list result; + mutex result_mutex; vector search_threads; for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) { - const size_t first_word_index = - thread_index * (word_list_size / thread_count); - const size_t last_word_index = + const size_t first_index = thread_index * (word_list_size / thread_count); + + const size_t last_index = (thread_index == thread_count - 1) ? word_list_size : (thread_index + 1) * (word_list_size / thread_count); search_threads.emplace_back( - [](const vector &word_list, - const string_view &search_term, forward_list &result, - size_t start_index, size_t end_index, mutex &result_mutex) { - forward_list thread_results; - for (size_t index = start_index; index < end_index; ++index) { - const auto ¤t_word = word_list[index]; - if (current_word->starts_with(search_term)) { - thread_results.push_front(current_word); - } - } - if (!thread_results.empty()) { - const lock_guard lock(result_mutex); - result.merge(thread_results); - } - }, - cref(word_list), cref(search_term), ref(matching_words), - first_word_index, last_word_index, ref(matching_words_mutex)); + WordRefList::find_prefix_in_range, cref(word_list), cref(search_prefix), + first_index, last_index, ref(result), ref(result_mutex)); } for (auto &thread : search_threads) { thread.join(); } - return matching_words; + return result; } diff --git a/lib_vector_search/src/parallel_finder.cpp b/lib_vector_search/src/parallel_finder.cpp index 1b7effe..15c6a06 100644 --- a/lib_vector_search/src/parallel_finder.cpp +++ b/lib_vector_search/src/parallel_finder.cpp @@ -3,51 +3,36 @@ #include #include -using std::mutex, std::thread, std::lock_guard, std::vector, std::forward_list, - std::string, std::string_view; +using std::mutex, std::thread, std::vector, std::forward_list, std::string, + std::string_view; ParallelFinder::ParallelFinder(const WordList &word_list) : word_list_(word_list) {} forward_list -ParallelFinder::find_prefix(string_view search_term) const { +ParallelFinder::find_prefix(string_view search_prefix) const { forward_list result; mutex result_mutex; - const auto word_list_size = word_list_.size(); + const size_t word_list_size = word_list_.size(); + const size_t thread_count = + std::min(thread::hardware_concurrency(), word_list_size); - const size_t thread_count = thread::hardware_concurrency(); - - vector threads; + vector search_threads; for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) { - const size_t first_word_index = - thread_index * (word_list_size / thread_count); - const size_t last_word_index = + const size_t first_index = thread_index * (word_list_size / thread_count); + + const size_t last_index = (thread_index == thread_count - 1) ? word_list_size : (thread_index + 1) * (word_list_size / thread_count); - threads.emplace_back( - [](const WordList &word_list, const string_view &search_term, - forward_list &result, size_t start_index, - size_t end_index, mutex &result_mutex) { - forward_list thread_results; - for (size_t index = start_index; index < end_index; ++index) { - const auto ¤t_word = word_list[index]; - if (current_word.starts_with(search_term)) { - thread_results.push_front(¤t_word); - } - } - if (!thread_results.empty()) { - const lock_guard lock(result_mutex); - result.merge(thread_results); - } - }, - cref(word_list_), cref(search_term), ref(result), first_word_index, - last_word_index, ref(result_mutex)); + search_threads.emplace_back( + WordList::find_prefix_in_range, cref(word_list_), cref(search_prefix), + first_index, last_index, ref(result), ref(result_mutex)); } - for (auto &thread : threads) { + for (auto &thread : search_threads) { thread.join(); } diff --git a/lib_vector_search/src/sorted_linear_finder.cpp b/lib_vector_search/src/sorted_linear_finder.cpp index db11154..6e8fbc1 100644 --- a/lib_vector_search/src/sorted_linear_finder.cpp +++ b/lib_vector_search/src/sorted_linear_finder.cpp @@ -14,6 +14,7 @@ SortedLinearFinder::SortedLinearFinder(const WordList &word_list) { word_list_.begin(), word_list_.end(), [](const string *left, const string *right) { return *left < *right; }); } + forward_list SortedLinearFinder::find_prefix(string_view search_term) const { forward_list matching_words; diff --git a/lib_vector_search/src/word_list.cpp b/lib_vector_search/src/word_list.cpp index db01a19..dc91ede 100644 --- a/lib_vector_search/src/word_list.cpp +++ b/lib_vector_search/src/word_list.cpp @@ -57,3 +57,41 @@ WordList WordList::fromFile(const std::filesystem::path &path) { return word_list; } + +void WordList::find_prefix_in_range( + const WordList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex) { + std::forward_list local_results; + + for (size_t index = start_index; index < end_index; ++index) { + const auto ¤t_word = word_list[index]; + if (current_word.starts_with(search_prefix)) { + local_results.push_front(¤t_word); + } + } + + if (!local_results.empty()) { + const std::lock_guard lock(result_mutex); + result.merge(local_results); + } +}; + +void WordRefList::find_prefix_in_range( + const WordRefList &word_list, const std::string_view &search_prefix, + size_t start_index, size_t end_index, + std::forward_list &result, std::mutex &result_mutex) { + std::forward_list local_results; + + for (size_t index = start_index; index < end_index; ++index) { + const auto *current_word = word_list[index]; + if (current_word->starts_with(search_prefix)) { + local_results.push_front(current_word); + } + } + + if (!local_results.empty()) { + const std::lock_guard lock(result_mutex); + result.merge(local_results); + } +}; From 3b1446f049808864e2e7825bf18f045a0d604c97 Mon Sep 17 00:00:00 2001 From: Michael Mandl Date: Thu, 21 Mar 2024 10:58:38 +0100 Subject: [PATCH 2/4] refactor: add WordRefList from WordList constructor --- lib_vector_search/include/word_list.h | 2 ++ lib_vector_search/src/sorted_linear_finder.cpp | 8 ++------ lib_vector_search/src/word_list.cpp | 6 ++++++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib_vector_search/include/word_list.h b/lib_vector_search/include/word_list.h index ba8b22f..3c0b5d9 100644 --- a/lib_vector_search/include/word_list.h +++ b/lib_vector_search/include/word_list.h @@ -22,6 +22,8 @@ public: class WordRefList : public std::vector { public: + WordRefList(const WordList &source); + static void find_prefix_in_range( const WordRefList &word_list, const std::string_view &search_prefix, size_t start_index, size_t end_index, diff --git a/lib_vector_search/src/sorted_linear_finder.cpp b/lib_vector_search/src/sorted_linear_finder.cpp index 6e8fbc1..3648bb2 100644 --- a/lib_vector_search/src/sorted_linear_finder.cpp +++ b/lib_vector_search/src/sorted_linear_finder.cpp @@ -1,15 +1,11 @@ #include "sorted_linear_finder.h" #include -#include using std::forward_list, std::string, std::string_view; -SortedLinearFinder::SortedLinearFinder(const WordList &word_list) { - std::transform(word_list.cbegin(), word_list.cend(), - std::back_inserter(word_list_), - [](const string &word) { return &word; }); - +SortedLinearFinder::SortedLinearFinder(const WordList &word_list) + : word_list_(word_list) { std::sort( word_list_.begin(), word_list_.end(), [](const string *left, const string *right) { return *left < *right; }); diff --git a/lib_vector_search/src/word_list.cpp b/lib_vector_search/src/word_list.cpp index dc91ede..e810451 100644 --- a/lib_vector_search/src/word_list.cpp +++ b/lib_vector_search/src/word_list.cpp @@ -77,6 +77,12 @@ void WordList::find_prefix_in_range( } }; +WordRefList::WordRefList(const WordList &source) { + for (const auto &word : source) { + push_back(&word); + } +} + void WordRefList::find_prefix_in_range( const WordRefList &word_list, const std::string_view &search_prefix, size_t start_index, size_t end_index, From 029196237db5c7665a3688c82d521fecce8830eb Mon Sep 17 00:00:00 2001 From: Michael Mandl Date: Thu, 21 Mar 2024 10:59:20 +0100 Subject: [PATCH 3/4] refactor: clean up namespace --- lib_vector_search/src/grouped_finder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib_vector_search/src/grouped_finder.cpp b/lib_vector_search/src/grouped_finder.cpp index 412ebae..bb8ee88 100644 --- a/lib_vector_search/src/grouped_finder.cpp +++ b/lib_vector_search/src/grouped_finder.cpp @@ -14,8 +14,8 @@ GroupedFinder::GroupedFinder(const WordList &word_list) { } } -std::forward_list -GroupedFinder::find_prefix(std::string_view search_prefix) const { +forward_list +GroupedFinder::find_prefix(string_view search_prefix) const { const auto group = groups_.find(search_prefix.front()); if (group == groups_.cend()) { return {}; From 4b42f4c12a319b80c44dbc76736ca3ea2503a33d Mon Sep 17 00:00:00 2001 From: Michael Mandl Date: Thu, 21 Mar 2024 10:59:35 +0100 Subject: [PATCH 4/4] feat: add harder test-cases for sorted finders --- vector_search_cli/main.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vector_search_cli/main.cpp b/vector_search_cli/main.cpp index 9922090..117b81f 100644 --- a/vector_search_cli/main.cpp +++ b/vector_search_cli/main.cpp @@ -30,9 +30,8 @@ void test_finder_search(Finder &finder, string_view name, auto result = finder.find_prefix(search_term); find_timer.stop(); - cout << name << "(" << search_term << ") took " << find_timer << endl; - cout << "result list is " << std::distance(result.cbegin(), result.cend()) - << " element(s) long" << endl; + cout << name << "(" << search_term << ") took " << find_timer << " for " + << std::distance(result.cbegin(), result.cend()) << " results" << endl; } template @@ -44,7 +43,8 @@ void test_finder(const WordList &word_list, string_view finder_name) { constructor_timer.stop(); cout << finder_name << " constructor took " << constructor_timer << endl; - for (const auto &search_term : {"A", "AB", "ABC", "ABCD"}) { + for (const auto &search_term : + {"A", "Z", "AB", "ZY", "ABC", "ZYX", "ABCD", "ZYXW"}) { test_finder_search(finder, finder_name, search_term); } }