Compare commits
2 Commits
2ebc2107ba
...
e4c643880a
Author | SHA1 | Date |
---|---|---|
mandlm | e4c643880a | |
mandlm | c1b4fc67e9 |
|
@ -10,17 +10,34 @@ private:
|
||||||
std::unordered_map<char, WordRefList> directory_;
|
std::unordered_map<char, WordRefList> directory_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/// Inserts references to all words from word_list between first_index and
|
||||||
|
/// last_index, including first_index, excluding last_index.
|
||||||
void insert(const WordList &word_list, size_t first_index, size_t last_index);
|
void insert(const WordList &word_list, size_t first_index, size_t last_index);
|
||||||
|
|
||||||
|
/// Find all words that start with search_term
|
||||||
|
/// @return A list with references to the results.
|
||||||
WordRefList find_prefix(std::string_view search_term) const;
|
WordRefList find_prefix(std::string_view search_term) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** This class provides efficient, parallel search over a list of strings.
|
||||||
|
*
|
||||||
|
* References to all input strings are stored in a tree-like structure that
|
||||||
|
* provides fast and lock-free parallel insertion and parallel (with minimal
|
||||||
|
* synchronization) search for all words that start with a given term.
|
||||||
|
*/
|
||||||
class BucketFinder : public Finder {
|
class BucketFinder : public Finder {
|
||||||
private:
|
private:
|
||||||
std::vector<Bucket> buckets_;
|
std::vector<Bucket> buckets_;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/// Creates a BucketFinder over all words in word_list.
|
||||||
BucketFinder(const WordList &word_list);
|
BucketFinder(const WordList &word_list);
|
||||||
|
|
||||||
|
/// Find all words that start with search_term
|
||||||
|
/// @return A list with references to the results.
|
||||||
WordRefList find_prefix(std::string_view search_term) const override;
|
WordRefList find_prefix(std::string_view search_term) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Inserts references to all words from word_list.
|
||||||
|
void insert(const WordList &word_list);
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,12 +1,9 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <mutex>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
class WordRefList;
|
|
||||||
|
|
||||||
class WordList : public std::vector<std::string> {
|
class WordList : public std::vector<std::string> {
|
||||||
public:
|
public:
|
||||||
WordList &multiply(size_t factor);
|
WordList &multiply(size_t factor);
|
||||||
|
@ -15,22 +12,10 @@ public:
|
||||||
static WordList oneCap();
|
static WordList oneCap();
|
||||||
static WordList fourCaps();
|
static WordList fourCaps();
|
||||||
static WordList fromFile(const std::filesystem::path &path);
|
static WordList fromFile(const std::filesystem::path &path);
|
||||||
|
|
||||||
static void find_prefix_in_range(const WordList &word_list,
|
|
||||||
const std::string_view &search_prefix,
|
|
||||||
size_t start_index, size_t end_index,
|
|
||||||
WordRefList &result,
|
|
||||||
std::mutex &result_mutex);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class WordRefList : public std::vector<const std::string *> {
|
class WordRefList : public std::vector<const std::string *> {
|
||||||
public:
|
public:
|
||||||
WordRefList() = default;
|
WordRefList() = default;
|
||||||
WordRefList(const WordList &source);
|
WordRefList(const WordList &source);
|
||||||
|
|
||||||
static void find_prefix_in_range(const WordRefList &word_list,
|
|
||||||
const std::string_view &search_prefix,
|
|
||||||
size_t start_index, size_t end_index,
|
|
||||||
WordRefList &result,
|
|
||||||
std::mutex &result_mutex);
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,8 +1,11 @@
|
||||||
#include "bucket_finder.h"
|
#include "bucket_finder.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <strings.h>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
void Bucket::insert(const WordList &word_list, size_t first_index,
|
void Bucket::insert(const WordList &word_list, size_t first_index,
|
||||||
size_t last_index) {
|
size_t last_index) {
|
||||||
|
@ -28,34 +31,35 @@ WordRefList Bucket::find_prefix(std::string_view search_term) const {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
BucketFinder::BucketFinder(const WordList &word_list) {
|
BucketFinder::BucketFinder(const WordList &word_list) { insert(word_list); }
|
||||||
|
|
||||||
|
void BucketFinder::insert(const WordList &word_list) {
|
||||||
if (word_list.empty()) {
|
if (word_list.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const size_t max_threads = std::thread::hardware_concurrency();
|
||||||
const size_t word_list_size = word_list.size();
|
const size_t word_list_size = word_list.size();
|
||||||
const size_t bucket_count =
|
const size_t bucket_count = std::min<size_t>(max_threads, word_list_size);
|
||||||
std::min<size_t>(std::thread::hardware_concurrency(), word_list_size);
|
|
||||||
const size_t bucket_size = word_list_size / bucket_count;
|
const size_t bucket_size = word_list_size / bucket_count;
|
||||||
|
|
||||||
buckets_.resize(bucket_count);
|
buckets_.resize(bucket_count);
|
||||||
|
|
||||||
std::vector<std::thread> threads;
|
std::vector<std::thread> insert_threads;
|
||||||
for (auto bucket_index = 0; bucket_index < bucket_count; ++bucket_index) {
|
for (auto bucket_index = 0; bucket_index < bucket_count; ++bucket_index) {
|
||||||
auto &bucket = buckets_[bucket_index];
|
auto &bucket = buckets_[bucket_index];
|
||||||
|
|
||||||
bool is_last_bucket = bucket_index == bucket_count - 1;
|
const bool is_last_bucket = bucket_index == bucket_count - 1;
|
||||||
|
const size_t first_word_index = bucket_index * bucket_size;
|
||||||
|
const size_t last_word_index =
|
||||||
|
is_last_bucket ? word_list_size : first_word_index + bucket_size;
|
||||||
|
|
||||||
const size_t first_index = bucket_index * bucket_size;
|
insert_threads.emplace_back([&, first_word_index, last_word_index] {
|
||||||
const size_t last_index =
|
bucket.insert(word_list, first_word_index, last_word_index);
|
||||||
is_last_bucket ? word_list_size : first_index + bucket_size;
|
|
||||||
|
|
||||||
threads.emplace_back([&, first_index, last_index] {
|
|
||||||
bucket.insert(word_list, first_index, last_index);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto &thread : threads) {
|
for (auto &thread : insert_threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -69,7 +73,7 @@ WordRefList BucketFinder::find_prefix(std::string_view search_term) const {
|
||||||
threads.emplace_back([&] {
|
threads.emplace_back([&] {
|
||||||
auto thread_search_results = bucket.find_prefix(search_term);
|
auto thread_search_results = bucket.find_prefix(search_term);
|
||||||
if (!thread_search_results.empty()) {
|
if (!thread_search_results.empty()) {
|
||||||
std::lock_guard result_lock(search_results_mutex);
|
const std::lock_guard result_lock(search_results_mutex);
|
||||||
std::move(thread_search_results.begin(), thread_search_results.end(),
|
std::move(thread_search_results.begin(), thread_search_results.end(),
|
||||||
std::back_inserter(search_results));
|
std::back_inserter(search_results));
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,31 +19,43 @@ WordRefList GroupedFinder::find_prefix(string_view search_prefix) const {
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto word_list = group->second;
|
const auto word_list = group->second;
|
||||||
const auto word_list_size = word_list.size();
|
const size_t word_list_size = word_list.size();
|
||||||
|
const size_t thread_count =
|
||||||
const auto thread_count =
|
|
||||||
std::min<size_t>(std::thread::hardware_concurrency(), word_list_size);
|
std::min<size_t>(std::thread::hardware_concurrency(), word_list_size);
|
||||||
|
const size_t words_per_thread = word_list_size / thread_count;
|
||||||
|
|
||||||
WordRefList result;
|
WordRefList search_results;
|
||||||
mutex result_mutex;
|
mutex search_results_mutex;
|
||||||
|
|
||||||
vector<thread> search_threads;
|
vector<thread> search_threads;
|
||||||
for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) {
|
for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) {
|
||||||
const size_t first_index = thread_index * (word_list_size / thread_count);
|
const bool is_last_thread = thread_index == thread_count - 1;
|
||||||
|
const size_t start_index = thread_index * words_per_thread;
|
||||||
|
const size_t end_index =
|
||||||
|
is_last_thread ? word_list_size : start_index + words_per_thread;
|
||||||
|
|
||||||
const size_t last_index =
|
search_threads.emplace_back([&, start_index, end_index] {
|
||||||
(thread_index == thread_count - 1)
|
WordRefList local_results;
|
||||||
? word_list_size
|
|
||||||
: (thread_index + 1) * (word_list_size / thread_count);
|
|
||||||
|
|
||||||
search_threads.emplace_back(
|
for (size_t word_index = start_index; word_index < end_index;
|
||||||
WordRefList::find_prefix_in_range, cref(word_list), cref(search_prefix),
|
++word_index) {
|
||||||
first_index, last_index, ref(result), ref(result_mutex));
|
const auto *current_word = word_list[word_index];
|
||||||
|
if (current_word->starts_with(search_prefix)) {
|
||||||
|
local_results.push_back(current_word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!local_results.empty()) {
|
||||||
|
const std::lock_guard lock(search_results_mutex);
|
||||||
|
std::move(local_results.begin(), local_results.end(),
|
||||||
|
std::back_inserter(search_results));
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto &thread : search_threads) {
|
for (auto &thread : search_threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return search_results;
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,30 +9,43 @@ ParallelFinder::ParallelFinder(const WordList &word_list)
|
||||||
: word_list_(word_list) {}
|
: word_list_(word_list) {}
|
||||||
|
|
||||||
WordRefList ParallelFinder::find_prefix(string_view search_prefix) const {
|
WordRefList ParallelFinder::find_prefix(string_view search_prefix) const {
|
||||||
WordRefList result;
|
WordRefList search_results;
|
||||||
mutex result_mutex;
|
mutex search_results_mutex;
|
||||||
|
|
||||||
const size_t word_list_size = word_list_.size();
|
const size_t word_list_size = word_list_.size();
|
||||||
const size_t thread_count =
|
const size_t thread_count =
|
||||||
std::min<size_t>(thread::hardware_concurrency(), word_list_size);
|
std::min<size_t>(thread::hardware_concurrency(), word_list_size);
|
||||||
|
const size_t words_per_thread = word_list_size / thread_count;
|
||||||
|
|
||||||
vector<thread> search_threads;
|
vector<thread> search_threads;
|
||||||
for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) {
|
for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) {
|
||||||
const size_t first_index = thread_index * (word_list_size / thread_count);
|
const bool is_last_thread = thread_index == thread_count - 1;
|
||||||
|
const size_t start_index = thread_index * words_per_thread;
|
||||||
|
const size_t end_index =
|
||||||
|
is_last_thread ? word_list_size : start_index + words_per_thread;
|
||||||
|
|
||||||
const size_t last_index =
|
search_threads.emplace_back([&, start_index, end_index] {
|
||||||
(thread_index == thread_count - 1)
|
WordRefList local_results;
|
||||||
? word_list_size
|
|
||||||
: (thread_index + 1) * (word_list_size / thread_count);
|
|
||||||
|
|
||||||
search_threads.emplace_back(
|
for (size_t word_index = start_index; word_index < end_index;
|
||||||
WordList::find_prefix_in_range, cref(word_list_), cref(search_prefix),
|
++word_index) {
|
||||||
first_index, last_index, ref(result), ref(result_mutex));
|
const auto ¤t_word = word_list_[word_index];
|
||||||
|
if (current_word.starts_with(search_prefix)) {
|
||||||
|
local_results.push_back(¤t_word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!local_results.empty()) {
|
||||||
|
const std::lock_guard lock(search_results_mutex);
|
||||||
|
std::move(local_results.begin(), local_results.end(),
|
||||||
|
std::back_inserter(search_results));
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto &thread : search_threads) {
|
for (auto &thread : search_threads) {
|
||||||
thread.join();
|
thread.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return search_results;
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,50 +71,8 @@ WordList WordList::fromFile(const std::filesystem::path &path) {
|
||||||
return word_list;
|
return word_list;
|
||||||
}
|
}
|
||||||
|
|
||||||
void WordList::find_prefix_in_range(const WordList &word_list,
|
|
||||||
const std::string_view &search_prefix,
|
|
||||||
size_t start_index, size_t end_index,
|
|
||||||
WordRefList &result,
|
|
||||||
std::mutex &result_mutex) {
|
|
||||||
WordRefList local_results;
|
|
||||||
|
|
||||||
for (size_t index = start_index; index < end_index; ++index) {
|
|
||||||
const auto ¤t_word = word_list[index];
|
|
||||||
if (current_word.starts_with(search_prefix)) {
|
|
||||||
local_results.push_back(¤t_word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!local_results.empty()) {
|
|
||||||
const std::lock_guard<std::mutex> lock(result_mutex);
|
|
||||||
std::move(local_results.begin(), local_results.end(),
|
|
||||||
std::back_inserter(result));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
WordRefList::WordRefList(const WordList &source) {
|
WordRefList::WordRefList(const WordList &source) {
|
||||||
for (const auto &word : source) {
|
for (const auto &word : source) {
|
||||||
push_back(&word);
|
push_back(&word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WordRefList::find_prefix_in_range(const WordRefList &word_list,
|
|
||||||
const std::string_view &search_prefix,
|
|
||||||
size_t start_index, size_t end_index,
|
|
||||||
WordRefList &result,
|
|
||||||
std::mutex &result_mutex) {
|
|
||||||
WordRefList local_results;
|
|
||||||
|
|
||||||
for (size_t index = start_index; index < end_index; ++index) {
|
|
||||||
const auto *current_word = word_list[index];
|
|
||||||
if (current_word->starts_with(search_prefix)) {
|
|
||||||
local_results.push_back(current_word);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!local_results.empty()) {
|
|
||||||
const std::lock_guard<std::mutex> lock(result_mutex);
|
|
||||||
std::move(local_results.begin(), local_results.end(),
|
|
||||||
std::back_inserter(result));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
Loading…
Reference in New Issue