本文介绍了集的快速交集:C ++与C#的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧! 问题描述 在运行Vista x64 Business和Visual Studio 2008 SP1的计算机(四核,8GB内存)上,我试图非常快速地将两组数字相交。 这里是C#输出:(发布版本) 在4741.407 ms中找到了1000个交叉点 以下是两种不同方法(发布x64版本)的初始C ++输出: 在21580.7ms中找到交集(使用unordered_map)1000次在22366.6ms中找到1000个交集(使用set_intersection) 以下是最新的C ++输出,三种方法(发布x64版本): 最新基准: 在28827.6ms中找到504个值的交集(使用unordered_map)1000次,找到495个交集值(使用set_intersection)1000次,在9817.69ms 中找到504个值(使用unordered_set)1000次,在24769.1ms中 因此,set_intersection方法现在比C#慢大约2倍,但比初始C ++方法快2倍。 最新的C ++代码: 代码: // MapPerformance.cpp:定义控制台应用程序的入口点。 // #include stdafx.h #include< hash_map> #include< vector> #include< iostream> #include< time.h> #include< algorithm> #include< set> #include< unordered_set> #include< boost\unordered\unordered_map.hpp> #include timer.h 使用命名空间std; 使用命名空间stdext; 使用命名空间提升; 使用名称空间tr1; int runIntersectionTest2(const vector< set1,const vector< int& set2)) { // hash_map< int,int>地图; // map< int,int>地图; unordered_set< int> theSet; theSet.insert(set1.begin(),set1.end()); int相交大小= 0; vector< int> :: const_iterator set2_end = set2.end(); for(vector< int> :: const_iterator迭代器= set2.begin();迭代器!= set2_end; ++迭代器) { if(theSet.find(*迭代器)!= theSet.end()) {sectionSize ++; } } 返回交集大小; } int runIntersectionTest(const vector< int& set1,const vector< int>& set2) { // hash_map< int,int> ;地图; // map< int,int>地图; unordered_map< int,int>地图; vector< int> :: const_iterator set1_end = set1.end(); //现在通过填充地图来使这两个集合相交(vector< int> :: const_iterator iterator = set1.begin();迭代器!= set1_end; ++ iterator) { int值= *迭代器; theMap [value] = 1; } int相交大小= 0; vector< int> :: const_iterator set2_end = set2.end(); for(vector< int> :: const_iterator迭代器= set2.begin();迭代器!= set2_end; ++迭代器) { int值= *迭代器; unordered_map< int,int> :: iterator foundValue = theMap.find(value); if(foundValue!= theMap.end()) { theMap [value] = 2; intersectionSize ++; } } 返回交集大小; } int runSetIntersection(const vector< int>& set1_unsorted,const vector< int>& set2_unsorted) { //创建两个向量 std :: vector< int> set1(set1_unsorted.size()); std :: vector< int> set2(set2_unsorted.size()); //将未排序的数据复制到其中 std :: copy(set1_unsorted.begin(),set1_unsorted.end(),set1.begin()); std :: copy(set2_unsorted.begin(),set2_unsorted.end(),set2.begin()); //对数据进行排序 sort(set1.begin(),set1.end()); sort(set2.begin(),set2.end()); vector< int>路口; junction.reserve(1000); set_intersection(set1.begin(),set1.end(),set2.begin(),set2.end(),back_inserter(intersection)); return交集.size(); } void createSets(vector∫& set1,vector< int>& set2) { srand(time(NULL)); set1.reserve(100000); set2.reserve(1000); //为set1 创建100,000个值,用于(int i = 0; i { int = 1000000000 + i; set1.push_back(value); } //尝试使我们的一半值与浮动比率相交= 200000.0f / RAND_MAX; //为set2 创建1,000个值,用于(int i = 0; i< 1000; i ++) { int random = rand ()*比率+1; int值= 1000000000 +随机; set2.push_back(value); } //确保set1处于随机顺序(未排序) random_shuffle(set1.begin(),set1.end()); } int _tmain(int argc,_TCHAR * argv []) { int交集大小= 0; vector< int> set1,set2; createSets(set1,set2); 计时器计时器; for(int i = 0; i< 1000; i ++) {intersectionSize = runIntersectionTest(set1,set2); } timer.Stop(); cout<< 找到了<<相交尺寸<<值(使用unordered_map)1000次,以<< timer.GetMilliseconds()<< ms<<恩德尔 timer.Reset(); for(int i = 0; i< 1000; i ++) {intersectionSize = runSetIntersection(set1,set2); } timer.Stop(); cout<< 找到了<<相交尺寸<<值(使用set_intersection)1000次,以<< timer.GetMilliseconds()<< ms<<恩德尔 timer.Reset(); for(int i = 0; i< 1000; i ++) { junctionSize = runIntersectionTest2(set1,set2); } timer.Stop(); cout<< 找到了<<相交尺寸<<值(使用unordered_set)1000次,以<< timer.GetMilliseconds()<< ms<<恩德尔 getchar(); 返回0; } C#代码: 使用系统; 使用System.Collections.Generic; 使用System.Linq; 使用System.Text; 命名空间DictionaryPerformance { class Program { static void Main(string [] args) { List< int> set1 =新的List< int>(100000); List< int> set2 = new List< int>(1000); //为set1 创建100,000个值,用于(int i = 0; i { int值= 1000000000 + i; set1.Add(value); } Random random = new Random(DateTime.Now.Millisecond); //为set2 创建1,000个值,用于(int i = 0; i< 1000; i ++) { int值= 1000000000 +(随机。 Next()%200000 +1); set2.Add(value); } 长期= System.Diagnostics.Stopwatch.GetTimestamp(); for(int i = 0; i< 1000; i ++) { runIntersectionTest(set1,set2); } 长持续时间= System.Diagnostics.Stopwatch.GetTimestamp()-开始; Console.WriteLine(String.Format(在{0}毫秒中找到交集1000次,((浮动持续时间* 1000.0f)/ System.Diagnostics.Stopwatch.Frequency)); Console.ReadKey(); } static int runIntersectionTest(List< int> set1,List< int> set2) { Dictionary< int,int> theMap = new Dictionary< int,int>(100000); //现在,通过填充地图 foreach(set1中的int value),将两个集合相交。 { theMap [value] = 1; } int相交大小= 0; foreach(set2中的int值) { int计数; if(theMap.TryGetValue(value,out count))) { theMap [value] = 2; sectionSize ++; } } 返回交集大小; } } } C ++代码: // MapPerformance.cpp:定义控制台应用程序的入口点。 // #include stdafx.h #include< hash_map> #include< vector> #include< iostream> #include< time.h> #include< algorithm> #include< set> #include< boost\unordered\unordered_map.hpp> #include timer.h 使用命名空间std; 使用命名空间stdext; 使用命名空间提升; int runIntersectionTest(vector< int> set1,vector< int> set2) { // hash_map< int,int>地图; // map< int,int>地图; unordered_map< int,int>地图; //现在通过填充地图来将这两个集合相交(vector< int> :: iterator iterator = set1.begin(); iterator!= set1.end(); iterator ++ ) { int值= *迭代器; theMap [value] = 1; } int相交大小= 0; for(vector< int> :: iterator迭代器= set2.begin();迭代器!= set2.end();迭代器++) { int值= *迭代器; unordered_map< int,int> :: iterator foundValue = theMap.find(value); if(foundValue!= theMap.end()) { theMap [value] = 2; intersectionSize ++; } } 返回交集大小; } int runSetIntersection(set< int< set1,set< int< set2)) { set< int>路口; set_intersection(set1.begin(),set1.end(),set2.begin(),set2.end(),inserter(intersection,intersection.end())); return交集.size(); } int _tmain(int argc,_TCHAR * argv []) { srand(time(NULL)); vector< int> set1; vector< int> set2; set1.reserve(10000); set2.reserve(1000); //为set1 创建100,000个值,用于(int i = 0; i { int = 1000000000 + i; set1.push_back(value); } //为set2 创建1,000个值,用于(int i = 0; i< 1000; i ++) { int random = rand()%200000 +1; random * = 10; int值= 1000000000 +随机; set2.push_back(value); } 计时器计时器; for(int i = 0; i< 1000; i ++) { runIntersectionTest(set1,set2); } timer.Stop(); cout<< 在<<中找到了相交1000次(使用unordered_map)。 timer.GetMilliseconds()<< ms<<恩德尔 set< int> set21; set< int> set22; //为set1 创建100,000个值,用于(int i = 0; i { int = 1000000000 + i; set21.insert(value); } //为set2 创建1,000个值,用于(int i = 0; i< 1000; i ++) { int random = rand()%200000 +1; random * = 10; int值= 1000000000 +随机; set22.insert(value); } timer.Reset(); for(int i = 0; i { runSetIntersection(set21,set22); } timer.Stop(); cout<< 找到交集(使用set_intersection)1000次,在<< timer.GetMilliseconds()<< ms<<恩德尔 getchar(); 返回0; } 好,这是最新,但有一些更改: 现在已正确设置了C ++集,因此它们有50%的交集(如C#) Set1被重新排序,因此未排序,set2尚未排序 set_intersection实现现在使用矢量,并首先对其进行排序 C ++(发行版,x64)结果: 找到了503个值的相交(使用unordered_map)1000次,在35131.1ms 中找到494个值的相交(使用set_intersection)1000次,在10317ms 所以它比C#慢2倍。 @Jalf:您得到一些非常快速的数字,在这里我做错了什么吗? C ++代码: // MapPerformance.cpp:定义控制台应用程序的入口点。 // #include stdafx.h #include< hash_map> #include< vector> #include< iostream> #include< time.h> #include< algorithm> #include< set> #include< boost\unordered\unordered_map.hpp> #include timer.h 使用命名空间std; 使用命名空间stdext; 使用命名空间提升; int runIntersectionTest(const vector< int& set1,const vector< int& set2)) { // hash_map< int,int>地图; // map< int,int>地图; unordered_map< int,int>地图; vector< int> :: const_iterator set1_end = set1.end(); //现在通过填充地图来使这两个集合相交(vector< int> :: const_iterator iterator = set1.begin();迭代器!= set1_end; ++ iterator) { int值= *迭代器; theMap [value] = 1; } int相交大小= 0; vector< int> :: const_iterator set2_end = set2.end(); for(vector< int> :: const_iterator迭代器= set2.begin();迭代器!= set2_end; ++迭代器) { int值= *迭代器; unordered_map< int,int> :: iterator foundValue = theMap.find(value); if(foundValue!= theMap.end()) { theMap [value] = 2; intersectionSize ++; } } 返回交集大小; } int runSetIntersection(const vector set1_unsorted,const vector set2_unsorted) { //创建两个向量 std :: vector< int> set1(set1_unsorted.size()); std :: vector< int> set2(set2_unsorted.size()); //将未排序的数据复制到其中 std :: copy(set1_unsorted.begin(),set1_unsorted.end(),set1.begin()); std :: copy(set2_unsorted.begin(),set2_unsorted.end(),set2.begin()); //对数据进行排序 sort(set1.begin(),set1.end()); sort(set2.begin(),set2.end()); vector< int>路口; junction.reserve(1000); set_intersection(set1.begin(),set1.end(),set2.begin(),set2.end(),inserter(intersection,intersection.end())); 返回交集.size(); } void createSets(vector∫& set1,vector< int>& set2) { srand(time(NULL)); set1.reserve(100000); set2.reserve(1000); //为set1 创建100,000个值,用于(int i = 0; i { int = 1000000000 + i; set1.push_back(value); } //尝试使我们的一半值与浮动比率相交= 200000.0f / RAND_MAX; //为set2 创建1,000个值,用于(int i = 0; i< 1000; i ++) { int random = rand ()*比率+1; int值= 1000000000 +随机; set2.push_back(value); } //确保set1处于随机顺序(未排序) random_shuffle(set1.begin(),set1.end()); } int _tmain(int argc,_TCHAR * argv []) { int交集大小= 0; vector< int> set1,set2; createSets(set1,set2); 计时器计时器; for(int i = 0; i< 1000; i ++) {intersectionSize = runIntersectionTest(set1,set2); } timer.Stop(); cout<< 找到了<<相交尺寸<<值(使用unordered_map)1000次,以<< timer.GetMilliseconds()<< ms<<恩德尔 timer.Reset(); for(int i = 0; i< 1000; i ++) {intersectionSize = runSetIntersection(set1,set2); } timer.Stop(); cout<< 找到了<<相交尺寸<<值(使用set_intersection)1000次,以<< timer.GetMilliseconds()<< ms<<恩德尔 getchar(); 返回0; } 解决方案测试。 首先,您不是测试集合交集,而是创建几个数组,用随机数填充它们,然后执行集合交集。您应该只计时您真正感兴趣的部分代码。即使您想做这些事情,也不应在此处对其进行基准测试。一次测量一件事,以减少不确定性。如果希望C ++实现的性能更好,则首先需要知道它的哪一部分比预期的要慢。 第二,您应该运行测试多次,以考虑可能的缓存效果和其他不确定性。 (并且可能总共输出1000次运行的总时间,而不是每次输出一次。这样可以减少计时器的不确定性,因为计时器的分辨率可能有限,并且在0-20ms范围内使用时报告的结果不准确。 / p> 据我所知,应该对set_intersection的输入进行排序,而set2不会这样。似乎没有理由使用 unordered_map ,当 unordered_set 与您正在做的事情更相配时。 关于所需的设置代码,请注意,您可能不需要填充矢量以运行交叉点,您自己的实现和 set_intersection 已经在迭代器上工作,因此您只需将它们的一对迭代器传递给您输入中已经存在的数据结构即可。 A有关您的代码的一些其他特定注释: 使用 ++ iterator 代替 iterator ++ 而不是在每次循环迭代时都调用vector.end(),而是调用一次并缓存结果 使用排序向量vs std :: set vs unordered_set (不是 unordered_map )的实验 编辑: 我没有尝试过C#版本,因此我无法正确比较数字,但这是经过修改的测试。每个处理器在带有4GB RAM的Core 2 Quad 2.5GHz上运行1000次: std :: set_intersection on std :: set :tr1 :: unordered_set上的2606ms std :: set_intersection:1014ms排序后的矢量上的 std :: set_intersection:171ms std :: set_intersection在未排序的矢量上:10140ms 最后一个有点不公平,因为它必须同时复制和排序向量。理想情况下,只有排序应该是基准的一部分。我尝试创建一个使用1000个未排序向量数组的版本(因此,我不必在每次迭代中都复制未排序数据),但是性能大致相同,或稍差一些,因为这将导致持续的高速缓存未命中,所以我又恢复为该版本 我的代码: #define _SECURE_SCL 0 #include< ctime> #include< vector> #include< set> #include< iostream> #include< algorithm> #include< unordered_set> #include< windows.h> 模板< typename T,typename OutIter> void stl_intersect(const T& set1,const T& set2,OutIter out){ std :: set_intersection(set1.begin(),set1.end(),set2.begin(),set2。 end(),out); } 模板< typename T,typename OutIter> void sort_stl_intersect(T& set1,T& set2,OutIter out){ std :: sort(set1.begin(),set1.end()); std :: sort(set2.begin(),set2.end()); std :: set_intersection(set1.begin(),set1.end(),set2.begin(),set2.end(),输出); } 模板< typename T> void init_sorted_vec(T first,T last){ for(T cur = first; cur!= last; ++ cur) { int i = cur-first; int值= 1000000000 + i; * cur =值; } } 模板< typename T> void init_unsorted_vec(T first,T last){ for(T cur = first; cur!= last; ++ cur) { int i = rand()% 200000 +1; i * = 10; int值= 1000000000 + i; * cur =值; } } struct resize_and_shuffle { resize_and_shuffle(int size):size(size){} void operator()( std :: vector< int& vec){ vec.resize(size); } int大小; }; int main() { srand(time(NULL)); std :: vector< int>出(100000); std :: vector< int> sortedvec1(100000); std :: vector< int> sortedvec2(1000); init_sorted_vec(sortedvec1.begin(),sortedvec1.end()); init_unsorted_vec(sortedvec2.begin(),sortedvec2.end()); std :: sort(sortedvec2.begin(),sortedvec2.end()); std :: vector< int> unsortedvec1(sortedvec1.begin(),sortedvec1.end()); std :: vector< int> unsortedvec2(sortedvec2.begin(),sortedvec2.end()); std :: random_shuffle(unsortedvec1.begin(),unsortedvec1.end()); std :: random_shuffle(unsortedvec2.begin(),unsortedvec2.end()); std :: vector< int> vecs1 [1000]; std :: vector< int> vecs2 [1000]; std :: fill(vecs1,vecs1 + 1000,unsortedvec1); std :: fill(vecs2,vecs2 + 1000,unsortedvec2); std :: set< int> set1(sortedvec1.begin(),sortedvec1.end()); std :: set< int> set2(sortedvec2.begin(),sortedvec2.end()); std :: tr1 :: unordered_set< int> uset1(sortedvec1.begin(),sortedvec1.end()); std :: tr1 :: unordered_set< int> uset2(sortedvec2.begin(),sortedvec2.end()); DWORD开始,停止; DWORD delta [4]; start = GetTickCount(); for(int i = 0; i< 1000; ++ i){ stl_intersect(set1,set2,out.begin()); } stop = GetTickCount(); delta [0] =停止-开始; start = GetTickCount(); for(int i = 0; i< 1000; ++ i){ stl_intersect(uset1,uset2,out.begin()); } stop = GetTickCount(); delta [1] =停止-开始; start = GetTickCount(); for(int i = 0; i< 1000; ++ i){ stl_intersect(sortedvec1,sortedvec2,out.begin()); } stop = GetTickCount(); delta [2] =停止-开始; start = GetTickCount(); for(int i = 0; i< 1000; ++ i){ sort_stl_intersect(vecs1 [i],vecs1 [i],out.begin()); } stop = GetTickCount(); delta [3] =停止-开始; std :: cout<< std :: set_intersection on std :: set:<< delta [0]<< ms\n; std :: cout<< tr1 :: unordered_set上的std :: set_intersection:<< delta [1]<< ms\n; std :: cout<< std :: set_intersection在排序的向量上:<< delta [2]<< ms\n; std :: cout<< 未排序向量上的std :: set_intersection:<< delta [3]<< ms\n; 返回0; } 没有理由说明C ++应该总是比C#更快。 C#具有一些关键优势,需要格外小心才能与C ++竞争。 我可以想到的主要方面是,动态分配在.NET-land上非常便宜。每当C ++向量,set或unordered_set(或任何其他容器)需要调整大小或扩展时,这都是非常昂贵的 malloc 操作。在.NET中,堆分配只不过是向指针添加偏移量而已。 因此,如果您想要C ++版本参与竞争,则可能必须解决这样,允许您在不执行实际堆分配的情况下调整容器的大小,可能是通过对容器使用自定义分配器(也许boost :: pool可能是个不错的选择,或者您可以尝试自己滚动) 另一个问题是 set_difference 仅适用于排序输入,并且为了重现涉及排序的测试结果,我们必须重新编写每次迭代中未排序数据的副本非常昂贵(尽管再次使用自定义分配器会有所帮助)。我不知道您的输入采用什么形式,但是有可能您可以直接对输入进行排序而不复制它,然后直接在其上运行 set_difference 。 (如果您的输入至少是数组或STL容器,那将很容易做到。) STL的主要优势之一是它是如此灵活,它几乎可以处理任何输入序列。在C#中,您几乎必须将输入复制到列表或字典之类的东西中,但是在C ++中,您可能可以运行 std :: sort 和 set_intersection 放在原始输入上。 最后,当然,尝试通过探查器运行代码,看看确切的位置时间正在花。您可能还想尝试通过GCC运行代码。我的印象是,MSVC中的STL性能有时有点古怪。也许值得在另一个编译器下进行测试,只是看看您那里是否有类似的计时。 最后,您可能会发现这些博客文章与C ++ vs C#的性能有关: http://blogs.msdn.com/ricom/ archive / 2005/05/10 / 416151.aspx 从本质上讲,其士气是,您可以在C ++中获得更好的性能,但是令人惊讶的工作量。 On my machine (Quad core, 8gb ram), running Vista x64 Business, with Visual Studio 2008 SP1, I am trying to intersect two sets of numbers very quickly.I've implemented two approaches in C++, and one in C#. The C# approach is faster so far, I'd like to improve the C++ approach so its faster than C#, which I expect C++ can do.Here is the C# output: (Release build)Found the intersection 1000 times, in 4741.407 msHere is the initial C++ output, for two different approaches (Release x64 build):Found the intersection (using unordered_map) 1000 times, in 21580.7msFound the intersection (using set_intersection) 1000 times, in 22366.6msHere is the latest C++ output, for three approaches (Release x64 build):Latest benchmark:Found the intersection of 504 values (using unordered_map) 1000 times, in 28827.6msFound the intersection of 495 values (using set_intersection) 1000 times, in 9817.69msFound the intersection of 504 values (using unordered_set) 1000 times, in 24769.1msSo, the set_intersection approach is now approx 2x slower than C#, but 2x faster than the initial C++ approaches.Latest C++ code:Code:// MapPerformance.cpp : Defines the entry point for the console application.//#include "stdafx.h"#include <hash_map>#include <vector>#include <iostream>#include <time.h>#include <algorithm>#include <set>#include <unordered_set>#include <boost\unordered\unordered_map.hpp>#include "timer.h"using namespace std;using namespace stdext;using namespace boost;using namespace tr1;int runIntersectionTest2(const vector<int>& set1, const vector<int>& set2){ // hash_map<int,int> theMap; // map<int,int> theMap; unordered_set<int> theSet; theSet.insert( set1.begin(), set1.end() ); int intersectionSize = 0; vector<int>::const_iterator set2_end = set2.end(); for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator ) { if ( theSet.find(*iterator) != theSet.end() ) { intersectionSize++; } } return intersectionSize;}int runIntersectionTest(const vector<int>& set1, const vector<int>& set2){ // hash_map<int,int> theMap; // map<int,int> theMap; unordered_map<int,int> theMap; vector<int>::const_iterator set1_end = set1.end(); // Now intersect the two sets by populating the map for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator ) { int value = *iterator; theMap[value] = 1; } int intersectionSize = 0; vector<int>::const_iterator set2_end = set2.end(); for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator ) { int value = *iterator; unordered_map<int,int>::iterator foundValue = theMap.find(value); if ( foundValue != theMap.end() ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize;}int runSetIntersection(const vector<int>& set1_unsorted, const vector<int>& set2_unsorted){ // Create two vectors std::vector<int> set1(set1_unsorted.size()); std::vector<int> set2(set2_unsorted.size()); // Copy the unsorted data into them std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin()); std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin()); // Sort the data sort(set1.begin(),set1.end()); sort(set2.begin(),set2.end()); vector<int> intersection; intersection.reserve(1000); set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), back_inserter(intersection)); return intersection.size(); }void createSets( vector<int>& set1, vector<int>& set2 ){ srand ( time(NULL) ); set1.reserve(100000); set2.reserve(1000); // Create 100,000 values for set1 for ( int i = 0; i < 100000; i++ ) { int value = 1000000000 + i; set1.push_back(value); } // Try to get half of our values intersecting float ratio = 200000.0f / RAND_MAX; // Create 1,000 values for set2 for ( int i = 0; i < 1000; i++ ) { int random = rand() * ratio + 1; int value = 1000000000 + random; set2.push_back(value); } // Make sure set1 is in random order (not sorted) random_shuffle(set1.begin(),set1.end());}int _tmain(int argc, _TCHAR* argv[]){ int intersectionSize = 0; vector<int> set1, set2; createSets( set1, set2 ); Timer timer; for ( int i = 0; i < 1000; i++ ) { intersectionSize = runIntersectionTest(set1, set2); } timer.Stop(); cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl; timer.Reset(); for ( int i = 0; i < 1000; i++ ) { intersectionSize = runSetIntersection(set1,set2); } timer.Stop(); cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl; timer.Reset(); for ( int i = 0; i < 1000; i++ ) { intersectionSize = runIntersectionTest2(set1,set2); } timer.Stop(); cout << "Found the intersection of " << intersectionSize << " values (using unordered_set) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl; getchar(); return 0;}C# code:using System;using System.Collections.Generic;using System.Linq;using System.Text;namespace DictionaryPerformance{ class Program { static void Main(string[] args) { List<int> set1 = new List<int>(100000); List<int> set2 = new List<int>(1000); // Create 100,000 values for set1 for (int i = 0; i < 100000; i++) { int value = 1000000000 + i; set1.Add(value); } Random random = new Random(DateTime.Now.Millisecond); // Create 1,000 values for set2 for (int i = 0; i < 1000; i++) { int value = 1000000000 + (random.Next() % 200000 + 1); set2.Add(value); } long start = System.Diagnostics.Stopwatch.GetTimestamp(); for (int i = 0; i < 1000; i++) { runIntersectionTest(set1,set2); } long duration = System.Diagnostics.Stopwatch.GetTimestamp() - start; Console.WriteLine(String.Format("Found the intersection 1000 times, in {0} ms", ((float) duration * 1000.0f) / System.Diagnostics.Stopwatch.Frequency)); Console.ReadKey(); } static int runIntersectionTest(List<int> set1, List<int> set2) { Dictionary<int,int> theMap = new Dictionary<int,int>(100000); // Now intersect the two sets by populating the map foreach( int value in set1 ) { theMap[value] = 1; } int intersectionSize = 0; foreach ( int value in set2 ) { int count; if ( theMap.TryGetValue(value, out count ) ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize; } }}C++ code:// MapPerformance.cpp : Defines the entry point for the console application.//#include "stdafx.h"#include <hash_map>#include <vector>#include <iostream>#include <time.h>#include <algorithm>#include <set>#include <boost\unordered\unordered_map.hpp>#include "timer.h"using namespace std;using namespace stdext;using namespace boost;int runIntersectionTest(vector<int> set1, vector<int> set2){ // hash_map<int,int> theMap; // map<int,int> theMap; unordered_map<int,int> theMap; // Now intersect the two sets by populating the map for ( vector<int>::iterator iterator = set1.begin(); iterator != set1.end(); iterator++ ) { int value = *iterator; theMap[value] = 1; } int intersectionSize = 0; for ( vector<int>::iterator iterator = set2.begin(); iterator != set2.end(); iterator++ ) { int value = *iterator; unordered_map<int,int>::iterator foundValue = theMap.find(value); if ( foundValue != theMap.end() ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize;}int runSetIntersection(set<int> set1, set<int> set2){ set<int> intersection; set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end())); return intersection.size(); }int _tmain(int argc, _TCHAR* argv[]){ srand ( time(NULL) ); vector<int> set1; vector<int> set2; set1.reserve(10000); set2.reserve(1000); // Create 100,000 values for set1 for ( int i = 0; i < 100000; i++ ) { int value = 1000000000 + i; set1.push_back(value); } // Create 1,000 values for set2 for ( int i = 0; i < 1000; i++ ) { int random = rand() % 200000 + 1; random *= 10; int value = 1000000000 + random; set2.push_back(value); } Timer timer; for ( int i = 0; i < 1000; i++ ) { runIntersectionTest(set1, set2); } timer.Stop(); cout << "Found the intersection (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl; set<int> set21; set<int> set22; // Create 100,000 values for set1 for ( int i = 0; i < 100000; i++ ) { int value = 1000000000 + i; set21.insert(value); } // Create 1,000 values for set2 for ( int i = 0; i < 1000; i++ ) { int random = rand() % 200000 + 1; random *= 10; int value = 1000000000 + random; set22.insert(value); } timer.Reset(); for ( int i = 0; i < 1000; i++ ) { runSetIntersection(set21,set22); } timer.Stop(); cout << "Found the intersection (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl; getchar(); return 0;}Ok, here is the latest, with some changes:The C++ sets are now properly setup so they have a 50% intersection (like the C#)Set1 is shuffled so its not sorted, set2 was already not sortedThe set_intersection implementation now uses vectors, and sorts them firstC++ (Release, x64) Results:Found the intersection of 503 values (using unordered_map) 1000 times, in 35131.1msFound the intersection of 494 values (using set_intersection) 1000 times, in 10317msSo its 2x slower than C#. @Jalf: You're getting some pretty fast numbers, is there something I'm doing wrong here? C++ Code:// MapPerformance.cpp : Defines the entry point for the console application.//#include "stdafx.h"#include <hash_map>#include <vector>#include <iostream>#include <time.h>#include <algorithm>#include <set>#include <boost\unordered\unordered_map.hpp>#include "timer.h"using namespace std;using namespace stdext;using namespace boost;int runIntersectionTest(const vector<int>& set1, const vector<int>& set2){ // hash_map<int,int> theMap; // map<int,int> theMap; unordered_map<int,int> theMap; vector<int>::const_iterator set1_end = set1.end(); // Now intersect the two sets by populating the map for ( vector<int>::const_iterator iterator = set1.begin(); iterator != set1_end; ++iterator ) { int value = *iterator; theMap[value] = 1; } int intersectionSize = 0; vector<int>::const_iterator set2_end = set2.end(); for ( vector<int>::const_iterator iterator = set2.begin(); iterator != set2_end; ++iterator ) { int value = *iterator; unordered_map<int,int>::iterator foundValue = theMap.find(value); if ( foundValue != theMap.end() ) { theMap[value] = 2; intersectionSize++; } } return intersectionSize;}int runSetIntersection(const vector<int> set1_unsorted, const vector<int> set2_unsorted){ // Create two vectors std::vector<int> set1(set1_unsorted.size()); std::vector<int> set2(set2_unsorted.size()); // Copy the unsorted data into them std::copy(set1_unsorted.begin(), set1_unsorted.end(), set1.begin()); std::copy(set2_unsorted.begin(), set2_unsorted.end(), set2.begin()); // Sort the data sort(set1.begin(),set1.end()); sort(set2.begin(),set2.end()); vector<int> intersection; intersection.reserve(1000); set_intersection(set1.begin(),set1.end(), set2.begin(), set2.end(), inserter(intersection, intersection.end())); return intersection.size(); }void createSets( vector<int>& set1, vector<int>& set2 ){ srand ( time(NULL) ); set1.reserve(100000); set2.reserve(1000); // Create 100,000 values for set1 for ( int i = 0; i < 100000; i++ ) { int value = 1000000000 + i; set1.push_back(value); } // Try to get half of our values intersecting float ratio = 200000.0f / RAND_MAX; // Create 1,000 values for set2 for ( int i = 0; i < 1000; i++ ) { int random = rand() * ratio + 1; int value = 1000000000 + random; set2.push_back(value); } // Make sure set1 is in random order (not sorted) random_shuffle(set1.begin(),set1.end());}int _tmain(int argc, _TCHAR* argv[]){ int intersectionSize = 0; vector<int> set1, set2; createSets( set1, set2 ); Timer timer; for ( int i = 0; i < 1000; i++ ) { intersectionSize = runIntersectionTest(set1, set2); } timer.Stop(); cout << "Found the intersection of " << intersectionSize << " values (using unordered_map) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl; timer.Reset(); for ( int i = 0; i < 1000; i++ ) { intersectionSize = runSetIntersection(set1,set2); } timer.Stop(); cout << "Found the intersection of " << intersectionSize << " values (using set_intersection) 1000 times, in " << timer.GetMilliseconds() << "ms" << endl; getchar(); return 0;} 解决方案 There are several problems with your test.First, you are not testing set intersection, but "create a couple of arrays, fill them with random numbers, and then perform set intersection". You should only time the portion of the code you're actually interested in. Even if you're going to want to do those things, they should not be benchmarked here. Measure one thing at a time, to reduce uncertainty. If you want your C++ implementation to perform better, you first need to know which part of it is slower than expected. Which means you have to separate setup code from intersection test.Second, you should run the test a large number of times to take possible caching effects and other uncertainties into account. (And probably output one total time for, say, 1000 runs, rather than an individual time for each. That way you reduce the uncertainty from the timer which might have limited resolution and report inaccurate results when used in the 0-20ms range.Further, as far as I can read from the docs, the input to set_intersection should be sorted, which set2 won't be. An there seems to be no reason to use unordered_map, when unordered_set would be a far better match for what you're doing.About the setup code being needed, note that you probably don't need to populate vectors in order to run the intersection. Both your own implementation and set_intersection work on iterators already, so you can simply pass them a pair of iterators to the data structures your inputs are in already.A few more specific comments on your code:Use ++iterator instead of iterator++rather than calling vector.end() at each loop iteration, call it once and cache the resultexperiment with using sorted vectors vs std::set vs unordered_set (not unordered_map)Edit:I haven't tried your C# version, so I can't compare the numbers properly, but here's my modified test. Each is run 1000 times, on a Core 2 Quad 2.5GHz with 4GB RAM:std::set_intersection on std::set: 2606msstd::set_intersection on tr1::unordered_set: 1014msstd::set_intersection on sorted vectors: 171msstd::set_intersection on unsorted vectors: 10140msThe last one is a bit unfair, because it has to both copy and sort the vectors. Ideally, only the sort should be part of the benchmark. I tried creating a version that used an array of 1000 unsorted vectors (so I woudln't have to copy the unsorted data in each iteration), but the performance was about the same, or a bit worse, because this would cause constant cache misses, so I reverted back to this versionAnd my code:#define _SECURE_SCL 0#include <ctime>#include <vector>#include <set>#include <iostream>#include <algorithm>#include <unordered_set>#include <windows.h>template <typename T, typename OutIter>void stl_intersect(const T& set1, const T& set2, OutIter out){ std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), out);}template <typename T, typename OutIter>void sort_stl_intersect(T& set1, T& set2, OutIter out){ std::sort(set1.begin(), set1.end()); std::sort(set2.begin(), set2.end()); std::set_intersection(set1.begin(), set1.end(), set2.begin(), set2.end(), out);}template <typename T>void init_sorted_vec(T first, T last){ for ( T cur = first; cur != last; ++cur) { int i = cur - first; int value = 1000000000 + i; *cur = value; }}template <typename T>void init_unsorted_vec(T first, T last){ for ( T cur = first; cur != last; ++cur) { int i = rand() % 200000 + 1; i *= 10; int value = 1000000000 + i; *cur = value; }}struct resize_and_shuffle { resize_and_shuffle(int size) : size(size) {} void operator()(std::vector<int>& vec){ vec.resize(size); } int size;};int main(){ srand ( time(NULL) ); std::vector<int> out(100000); std::vector<int> sortedvec1(100000); std::vector<int> sortedvec2(1000); init_sorted_vec(sortedvec1.begin(), sortedvec1.end()); init_unsorted_vec(sortedvec2.begin(), sortedvec2.end()); std::sort(sortedvec2.begin(), sortedvec2.end()); std::vector<int> unsortedvec1(sortedvec1.begin(), sortedvec1.end()); std::vector<int> unsortedvec2(sortedvec2.begin(), sortedvec2.end()); std::random_shuffle(unsortedvec1.begin(), unsortedvec1.end()); std::random_shuffle(unsortedvec2.begin(), unsortedvec2.end()); std::vector<int> vecs1[1000]; std::vector<int> vecs2[1000]; std::fill(vecs1, vecs1 + 1000, unsortedvec1); std::fill(vecs2, vecs2 + 1000, unsortedvec2); std::set<int> set1(sortedvec1.begin(), sortedvec1.end()); std::set<int> set2(sortedvec2.begin(), sortedvec2.end()); std::tr1::unordered_set<int> uset1(sortedvec1.begin(), sortedvec1.end()); std::tr1::unordered_set<int> uset2(sortedvec2.begin(), sortedvec2.end()); DWORD start, stop; DWORD delta[4]; start = GetTickCount(); for (int i = 0; i < 1000; ++i){ stl_intersect(set1, set2, out.begin()); } stop = GetTickCount(); delta[0] = stop - start; start = GetTickCount(); for (int i = 0; i < 1000; ++i){ stl_intersect(uset1, uset2, out.begin()); } stop = GetTickCount(); delta[1] = stop - start; start = GetTickCount(); for (int i = 0; i < 1000; ++i){ stl_intersect(sortedvec1, sortedvec2, out.begin()); } stop = GetTickCount(); delta[2] = stop - start; start = GetTickCount(); for (int i = 0; i < 1000; ++i){ sort_stl_intersect(vecs1[i], vecs1[i], out.begin()); } stop = GetTickCount(); delta[3] = stop - start; std::cout << "std::set_intersection on std::set: " << delta[0] << "ms\n"; std::cout << "std::set_intersection on tr1::unordered_set: " << delta[1] << "ms\n"; std::cout << "std::set_intersection on sorted vectors: " << delta[2] << "ms\n"; std::cout << "std::set_intersection on unsorted vectors: " << delta[3] << "ms\n"; return 0;}There's no reason why C++ should always be faster than C#. C# has a few key advantages that require a lot of care to compete with in C++.The primary one I can think of is that dynamic allocations are ridiculously cheap in .NET-land. Every time a C++ vector, set or unordered_set (or any other container) has to resize or expand, it is a very costly malloc operation. In .NET, a heap allocation is little more than adding an offset to a pointer.So if you want the C++ version to compete, you'll probably have to solve that, allowing your containers to resize without having to perform actual heap allocations, probably by using custom allocators for the containers (perhaps boost::pool might be a good bet, or you can try rolling your own)Another issue is that set_difference only works on sorted input, and in order to reproduce tests results that involve a sort, we have to make a fresh copy of the unsorted data in each iteration, which is costly (although again, using custom allocators will help a lot). I don't know what form your input takes, but it is possible that you can sort your input directly, without copying it, and then run set_difference directly on that. (That would be easy to do if your input is an array or a STL container at least.)One of the key advantages of the STL is that it is so flexible, it can work on pretty much any input sequence. In C#, you pretty much have to copy the input to a List or Dictionary or something, but in C++, you might be able to get away with running std::sort and set_intersection on the raw input.Finally, of course, try running the code through a profiler and see exactly where the time is being spent. You might also want to try running the code through GCC instead. It's my impression that STL performance in MSVC is sometimes a bit quirky. It might be worth testing under another compiler just to see if you get similar timings there.Finally, you might find these blog posts relevant for performance of C++ vs C#:http://blogs.msdn.com/ricom/archive/2005/05/10/416151.aspxThe morale of those is essentially that yes, you can get better performance in C++, but it is a surprising amount of work. 这篇关于集的快速交集:C ++与C#的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!
10-16 02:09