似乎使用基于vector<wstring>
和基于wcscmp
的自定义比较器对wmemcmp
进行排序比默认行为要快得多(约1000毫秒)。
64位版本的内部版本(VS2015):
Default1: 3304.39 ms
wcscmp1 : 2323.26 ms
wmemcmp1: 2300.11 ms
Default2: 3239.75 ms
wcscmp2 : 2303.37 ms
wmemcmp2: 2338.55 ms
Default3: 3293.73 ms
wcscmp3 : 2303.82 ms
wmemcmp3: 2313.88 ms
编译代码:
///////////////////////////////////////////////////////////////////////////////
//
// Compare sorting a vector<wstring> with:
// - default comparator
// - custom wcscmp()-based comparator
// - custom wmemcmp()-based comparator
//
///////////////////////////////////////////////////////////////////////////////
#include <string.h> // wcscmp, wmemcmp
#include <algorithm> // std::shuffle, std::sort
#include <iostream> // std::cout
#include <random> // std::mt19937
#include <string> // std::wstring
#include <vector> // std::vector
#include <Windows.h> // Windows Platform SDK
using namespace std;
//=============================================================================
// Performance Counter Helpers
//=============================================================================
long long Counter()
{
LARGE_INTEGER li;
::QueryPerformanceCounter(&li);
return li.QuadPart;
}
long long Frequency()
{
LARGE_INTEGER li;
::QueryPerformanceFrequency(&li);
return li.QuadPart;
}
void PrintTime(const long long start, const long long finish, const char * const s)
{
cout << s << ": " << (finish - start) * 1000.0 / Frequency() << " ms \n";
}
//=============================================================================
// Performance Tests
//=============================================================================
bool CompareUsingWcscmp(const std::wstring& a, const std::wstring& b) noexcept
{
// a < b
return wcscmp(a.c_str(), b.c_str()) < 0;
}
bool CompareUsingWmemcmp(const std::wstring& a, const std::wstring& b) noexcept
{
const size_t count = min(a.size(), b.size());
return wmemcmp(a.data(), b.data(), count) < 0;
}
int main()
{
// Build a vector of strings generated starting from "Lorem Ipsum"
const auto shuffled = []() -> vector<wstring>
{
const wstring lorem[] =
{
L"Lorem ipsum dolor sit amet, consectetuer adipiscing elit.",
L"Maecenas porttitor congue massa. Fusce posuere, magna sed",
L"pulvinar ultricies, purus lectus malesuada libero,",
L"sit amet commodo magna eros quis urna.",
L"Nunc viverra imperdiet enim. Fusce est. Vivamus a tellus.",
L"Pellentesque habitant morbi tristique senectus et netus et",
L"malesuada fames ac turpis egestas. Proin pharetra nonummy pede.",
L"Mauris et orci. [*** add more chars to prevent SSO ***]"
};
vector<wstring> v;
#ifdef _DEBUG
constexpr int kTestIterationCount = 1000;
#else
constexpr int kTestIterationCount = 200'000;
#endif
for (int i = 0; i < kTestIterationCount; ++i)
{
for (const auto & s : lorem)
{
v.push_back(s + L" (#" + to_wstring(i) + L")");
}
}
mt19937 prng(1980);
shuffle(v.begin(), v.end(), prng);
return v;
}();
long long start = 0;
long long finish = 0;
vector<wstring> v1 = shuffled;
vector<wstring> w1 = shuffled;
vector<wstring> z1 = shuffled;
vector<wstring> v2 = shuffled;
vector<wstring> w2 = shuffled;
vector<wstring> z2 = shuffled;
vector<wstring> v3 = shuffled;
vector<wstring> w3 = shuffled;
vector<wstring> z3 = shuffled;
start = Counter();
sort(v1.begin(), v1.end());
finish = Counter();
PrintTime(start, finish, "Default1");
start = Counter();
sort(w1.begin(), w1.end(), CompareUsingWcscmp);
finish = Counter();
PrintTime(start, finish, "wcscmp1 ");
start = Counter();
sort(z1.begin(), z1.end(), CompareUsingWmemcmp);
finish = Counter();
PrintTime(start, finish, "wmemcmp1");
cout << '\n';
start = Counter();
sort(v2.begin(), v2.end());
finish = Counter();
PrintTime(start, finish, "Default2");
start = Counter();
sort(w2.begin(), w2.end(), CompareUsingWcscmp);
finish = Counter();
PrintTime(start, finish, "wcscmp2 ");
start = Counter();
sort(z2.begin(), z2.end(), CompareUsingWmemcmp);
finish = Counter();
PrintTime(start, finish, "wmemcmp2");
cout << '\n';
start = Counter();
sort(v3.begin(), v3.end());
finish = Counter();
PrintTime(start, finish, "Default3");
start = Counter();
sort(w3.begin(), w3.end(), CompareUsingWcscmp);
finish = Counter();
PrintTime(start, finish, "wcscmp3 ");
start = Counter();
sort(z3.begin(), z3.end(), CompareUsingWmemcmp);
finish = Counter();
PrintTime(start, finish, "wmemcmp3");
}
///////////////////////////////////////////////////////////////////////////////
附言我知道
wcscmp
不适用于包含嵌入式null的wstrings。但这不是问题的重点。 最佳答案
这可能是库的std::less<std::wstring>
(这是std::sort()
的默认比较器)的弱点。为了进行比较,我制作了您的测试的便携式版本:
#include <cstring>
#include <algorithm>
#include <chrono>
#include <functional>
#include <iostream>
#include <random>
#include <string>
#include <vector>
// override this by compiling with (e.g.)
// g++ -DITERATION_COUNT=1000
#ifndef ITERATION_COUNT
#define ITERATION_COUNT 200000
#endif
template<typename T>
struct time_printer
{
T func;
friend std::ostream& operator<<(std::ostream& os, const time_printer& p)
{
using Duration = std::chrono::duration<double, std::chrono::milliseconds::period>;
auto begin = std::chrono::steady_clock::now();
p.func();
auto end = std::chrono::steady_clock::now();
Duration time_taken = end - begin;
return os << time_taken.count();
}
};
template<typename T>
time_printer<T> print_time(T fun) { return {fun}; }
int main()
{
// Build a vector of strings generated starting from "Lorem Ipsum"
const auto shuffled = []() {
static const std::wstring lorem[] = {
L"Lorem ipsum dolor sit amet, consectetuer adipiscing elit.",
L"Maecenas porttitor congue massa. Fusce posuere, magna sed",
L"pulvinar ultricies, purus lectus malesuada libero,",
L"sit amet commodo magna eros quis urna.",
L"Nunc viverra imperdiet enim. Fusce est. Vivamus a tellus.",
L"Pellentesque habitant morbi tristique senectus et netus et",
L"malesuada fames ac turpis egestas. Proin pharetra nonummy pede.",
L"Mauris et orci. [*** add more chars to prevent SSO ***]"
};
std::vector<std::wstring> v;
auto const kTestIterationCount = ITERATION_COUNT;
v.reserve(std::size(lorem) * kTestIterationCount);
for (int i = 0; i < kTestIterationCount; ++i) {
auto const suffix = L" (#" + std::to_wstring(i) + L")";
for (auto const& s: lorem) {
v.push_back(s + suffix);
}
}
std::shuffle(v.begin(), v.end(), std::mt19937{1980});
return v;
}();
// name, function
using comparator = std::pair<const char *, std::function<bool(const std::wstring&,const std::wstring&)>>;
static const comparator comparators[] = {
{" default", [](const auto& a, const auto& b){return a < b;} },
{"std_less", std::less<std::wstring>{} },
{" wcscmp", [](const auto& a, const auto& b){return std::wcscmp(a.c_str(), b.c_str()) < 0;} },
{" wmemcmp", [](const auto& a, const auto& b){return std::wmemcmp(a.data(), b.data(), std::min(a.size(), b.size())) < 0;;} }
};
static const auto passes = 3u;
static const auto ncomp = std::size(comparators);
std::vector<std::vector<std::wstring>> inputs(ncomp * passes, shuffled);
for (auto i = 0u; i < inputs.size(); ++i) {
auto pass = i % ncomp;
auto round = i / ncomp + 1;
std::cout << comparators[pass].first << " round " << round << ": "
<< print_time([&]{std::sort(inputs[i].begin(), inputs[i].end(), comparators[pass].second);})
<< std::endl;
}
// make sure they all sorted correctly
return std::count_if(inputs.begin(), inputs.end(),
[](auto const& v){ return !std::is_sorted(v.begin(), v.end());});
}
使用Intel i7-6700的Linux上的GCC 8,
-O3 -march=native
进行编译时,使用native或std::wmemcmp()
可获得最佳结果,而使用std::wcscmp()
可获得最差的结果:default round 1: 1734.87
wcscmp round 1: 2315.48
wmemcmp round 1: 1699.22
default round 2: 1727.92
wcscmp round 2: 2305.81
wmemcmp round 2: 1635.28
default round 3: 1719.26
wcscmp round 3: 2286.19
wmemcmp round 3: 1638.17
关于c++ - 为什么使用自定义wcscmp和wmemcmp比较器对vector <wstring>进行排序比默认速度快得多?,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/49752748/