From c1b4fc67e92efd12d9c0ce1face53effe80cb904 Mon Sep 17 00:00:00 2001 From: Michael Mandl Date: Sat, 23 Mar 2024 11:24:06 +0100 Subject: [PATCH] refactor: integrate finder-threads as lambdas --- lib_vector_search/include/word_list.h | 15 -------- lib_vector_search/src/grouped_finder.cpp | 40 +++++++++++++-------- lib_vector_search/src/parallel_finder.cpp | 35 +++++++++++++------ lib_vector_search/src/word_list.cpp | 42 ----------------------- 4 files changed, 50 insertions(+), 82 deletions(-) diff --git a/lib_vector_search/include/word_list.h b/lib_vector_search/include/word_list.h index f197170..3894a15 100644 --- a/lib_vector_search/include/word_list.h +++ b/lib_vector_search/include/word_list.h @@ -1,12 +1,9 @@ #pragma once #include -#include #include #include -class WordRefList; - class WordList : public std::vector { public: WordList &multiply(size_t factor); @@ -15,22 +12,10 @@ public: static WordList oneCap(); static WordList fourCaps(); static WordList fromFile(const std::filesystem::path &path); - - static void find_prefix_in_range(const WordList &word_list, - const std::string_view &search_prefix, - size_t start_index, size_t end_index, - WordRefList &result, - std::mutex &result_mutex); }; class WordRefList : public std::vector { public: WordRefList() = default; WordRefList(const WordList &source); - - static void find_prefix_in_range(const WordRefList &word_list, - const std::string_view &search_prefix, - size_t start_index, size_t end_index, - WordRefList &result, - std::mutex &result_mutex); }; diff --git a/lib_vector_search/src/grouped_finder.cpp b/lib_vector_search/src/grouped_finder.cpp index 990b95c..d901dda 100644 --- a/lib_vector_search/src/grouped_finder.cpp +++ b/lib_vector_search/src/grouped_finder.cpp @@ -19,31 +19,43 @@ WordRefList GroupedFinder::find_prefix(string_view search_prefix) const { } const auto word_list = group->second; - const auto word_list_size = word_list.size(); - - const auto thread_count = + const size_t word_list_size = word_list.size(); + const size_t thread_count = std::min(std::thread::hardware_concurrency(), word_list_size); + const size_t words_per_thread = word_list_size / thread_count; - WordRefList result; - mutex result_mutex; + WordRefList search_results; + mutex search_results_mutex; vector search_threads; for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) { - const size_t first_index = thread_index * (word_list_size / thread_count); + const bool is_last_thread = thread_index == thread_count - 1; + const size_t start_index = thread_index * words_per_thread; + const size_t end_index = + is_last_thread ? word_list_size : start_index + words_per_thread; - const size_t last_index = - (thread_index == thread_count - 1) - ? word_list_size - : (thread_index + 1) * (word_list_size / thread_count); + search_threads.emplace_back([&, start_index, end_index] { + WordRefList local_results; - search_threads.emplace_back( - WordRefList::find_prefix_in_range, cref(word_list), cref(search_prefix), - first_index, last_index, ref(result), ref(result_mutex)); + for (size_t word_index = start_index; word_index < end_index; + ++word_index) { + const auto *current_word = word_list[word_index]; + if (current_word->starts_with(search_prefix)) { + local_results.push_back(current_word); + } + } + + if (!local_results.empty()) { + const std::lock_guard lock(search_results_mutex); + std::move(local_results.begin(), local_results.end(), + std::back_inserter(search_results)); + } + }); } for (auto &thread : search_threads) { thread.join(); } - return result; + return search_results; } diff --git a/lib_vector_search/src/parallel_finder.cpp b/lib_vector_search/src/parallel_finder.cpp index b398b2e..8d3367a 100644 --- a/lib_vector_search/src/parallel_finder.cpp +++ b/lib_vector_search/src/parallel_finder.cpp @@ -9,30 +9,43 @@ ParallelFinder::ParallelFinder(const WordList &word_list) : word_list_(word_list) {} WordRefList ParallelFinder::find_prefix(string_view search_prefix) const { - WordRefList result; - mutex result_mutex; + WordRefList search_results; + mutex search_results_mutex; const size_t word_list_size = word_list_.size(); const size_t thread_count = std::min(thread::hardware_concurrency(), word_list_size); + const size_t words_per_thread = word_list_size / thread_count; vector search_threads; for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) { - const size_t first_index = thread_index * (word_list_size / thread_count); + const bool is_last_thread = thread_index == thread_count - 1; + const size_t start_index = thread_index * words_per_thread; + const size_t end_index = + is_last_thread ? word_list_size : start_index + words_per_thread; - const size_t last_index = - (thread_index == thread_count - 1) - ? word_list_size - : (thread_index + 1) * (word_list_size / thread_count); + search_threads.emplace_back([&, start_index, end_index] { + WordRefList local_results; - search_threads.emplace_back( - WordList::find_prefix_in_range, cref(word_list_), cref(search_prefix), - first_index, last_index, ref(result), ref(result_mutex)); + for (size_t word_index = start_index; word_index < end_index; + ++word_index) { + const auto ¤t_word = word_list_[word_index]; + if (current_word.starts_with(search_prefix)) { + local_results.push_back(¤t_word); + } + } + + if (!local_results.empty()) { + const std::lock_guard lock(search_results_mutex); + std::move(local_results.begin(), local_results.end(), + std::back_inserter(search_results)); + } + }); } for (auto &thread : search_threads) { thread.join(); } - return result; + return search_results; } diff --git a/lib_vector_search/src/word_list.cpp b/lib_vector_search/src/word_list.cpp index 039ed83..e1baf7c 100644 --- a/lib_vector_search/src/word_list.cpp +++ b/lib_vector_search/src/word_list.cpp @@ -71,50 +71,8 @@ WordList WordList::fromFile(const std::filesystem::path &path) { return word_list; } -void WordList::find_prefix_in_range(const WordList &word_list, - const std::string_view &search_prefix, - size_t start_index, size_t end_index, - WordRefList &result, - std::mutex &result_mutex) { - WordRefList local_results; - - for (size_t index = start_index; index < end_index; ++index) { - const auto ¤t_word = word_list[index]; - if (current_word.starts_with(search_prefix)) { - local_results.push_back(¤t_word); - } - } - - if (!local_results.empty()) { - const std::lock_guard lock(result_mutex); - std::move(local_results.begin(), local_results.end(), - std::back_inserter(result)); - } -}; - WordRefList::WordRefList(const WordList &source) { for (const auto &word : source) { push_back(&word); } } - -void WordRefList::find_prefix_in_range(const WordRefList &word_list, - const std::string_view &search_prefix, - size_t start_index, size_t end_index, - WordRefList &result, - std::mutex &result_mutex) { - WordRefList local_results; - - for (size_t index = start_index; index < end_index; ++index) { - const auto *current_word = word_list[index]; - if (current_word->starts_with(search_prefix)) { - local_results.push_back(current_word); - } - } - - if (!local_results.empty()) { - const std::lock_guard lock(result_mutex); - std::move(local_results.begin(), local_results.end(), - std::back_inserter(result)); - } -};