c++ - 基于CUDA推力的TCP流分组分组方法

我有捕获的数据包的tcpdumps(.pcap)文件以及数百万个数据包。我需要将这些网络数据包分组为TCP流。

例:
让我们考虑以下数据包
否=> source_ip，destination_ip，source_port，destination_port

1 => ip1，ip2，s1，s2

2 => ip1，ip3，s3，s4

3 => ip2，ip1，s2，s1

4 => ip3，ip1，s4，s3

现在，在上述四个数据包的示例中，数据包1,3和2,4是相同流的数据包。即我需要将以下数据包解析为[[1,3]，[2,4]]。

我的方法:

由于(ip1，ip2，s1，s2)和(ip2，ip1，s2，s1)表示相同的流，所以我决定将它们都进行哈希处理并将其命名为forward_hash和反向哈希，因为它们表示相同方向上相反方向流动的数据包。

我使用索引数组来跟踪替换和排序过程中的数据包。在最终排序之后，提取相同哈希的开始和结束并将其用于索引数组以获取表示该流的数据包索引

keys is the forward_hash of each packets,
count is number of packets,
packet_ids is the id of each packet corresponding to each of the hash

    thrust::device_vector<unsigned long long> d_keys(keys,(keys+count));
            thrust::device_vector<unsigned long long> d_ids(packet_ids,(packet_ids+count));
            // now sort the ids according to the keys
            thrust::sort_by_key(d_keys.begin(), d_keys.end(), d_ids.begin());
// after sorting, now we need to find the index of each hash
thrust::device_vector<unsigned long long> u_keys(count);
        thrust::device_vector<unsigned long long> output(count);

        thrust::pair<thrust::device_vector<unsigned long long>::iterator, thrust::device_vector<unsigned long long>::iterator> new_end;
        new_end = thrust::reduce_by_key(d_keys.begin(), d_keys.end(),thrust::make_constant_iterator(1),u_keys.begin(),output.begin());
// now we need to find starting index to each hash
....

我已经尝试为唯一的正向和反向哈希实现哈希表查找，但是在排序之前用正向哈希替换每个反向哈希...但是性能相当慢。一世
有什么帮助吗？

谢谢

最佳答案

我提出了另一种方法，该方法首先在每个数据包中对进行排序，然后对数据包进行排序。

示例代码执行以下步骤:

为了识别相同TCP流的数据包，我们需要对数据包进行排序。
在我们这样做之前，我们需要确保中的每个发送的数据包的源和目的地都已排序。
示例:20:1 -> 10:4变成10:4 -> 20:1

现在我们可以对数据包进行排序，以便将相同流的数据包分组。
此代码假定输入数据包按时间排序。我们应用稳定排序，以便将排序保持在每个流中。

我们需要找出每个TCP流的起始位置。该步骤的结果是索引，该索引指向已排序的数据包列表中TCP流的开始。

根据您需要结果的方式，我们可以生成有关流的其他信息，例如每个流的包数。

可能的改进:

如果您知道IP地址仅在某个有限范围内，则可以仅使用16位来表示它们。
然后，您可以将发送方地址，发送方端口，接收方地址，接收方端口压缩为64位整数，这将提高排序性能。

编译并运行
nvcc -std=c++11 sort_packets.cu -o sort_packets && ./sort_packets

输出
input data d_src_addr: 20 10 20 20 30 30 10 20 30 20 d_src_port: 1 2 3 1 2 2 6 1 1 1 d_dst_addr: 10 20 30 10 20 20 30 10 10 10 d_dst_port: 4 2 3 4 5 5 1 4 6 4 packets after sort_within_packet d_src_addr: 10 10 20 10 20 20 10 10 10 10 d_src_port: 4 2 3 4 5 5 6 4 6 4 d_dst_addr: 20 20 30 20 30 30 30 20 30 20 d_dst_port: 1 2 3 1 2 2 1 1 1 1 after stable_sort d_orig_ind: 1 0 3 7 9 6 8 2 4 5 packets after stable_sort d_src_addr: 10 10 10 10 10 10 10 20 20 20 d_src_port: 2 4 4 4 4 6 6 3 5 5 d_dst_addr: 20 20 20 20 20 30 30 30 30 30 d_dst_port: 2 1 1 1 1 1 1 3 2 2 after copy_if d_start_indices: 0 1 5 7 8 d_stream_lengths: 1 4 2 1 2 group of streams referencing the original indices [1] [0,3,7,9] [6,8] [2] [4,5]

sort_packets.cu
#include <stdint.h> #include <iostream> #include <thrust/device_vector.h> #include <thrust/iterator/zip_iterator.h> #include <thrust/iterator/transform_iterator.h> #include <thrust/iterator/counting_iterator.h> #include <thrust/sort.h> #include <thrust/sequence.h> #include <thrust/copy.h> #include <thrust/functional.h> #include <thrust/adjacent_difference.h> #include <thrust/scatter.h> #define PRINTER(name) print(#name, (name)) template <template <typename...> class V, typename T, typename ...Args> void print(const char* name, const V<T,Args...> & v) { std::cout << name << ":\t"; thrust::copy(v.begin(), v.end(), std::ostream_iterator<T>(std::cout, "\t")); std::cout << std::endl; } typedef thrust::tuple<uint32_t, uint16_t, uint32_t, uint16_t> Packet; struct sort_within_packet : public thrust::unary_function<Packet, Packet> { __host__ __device__ Packet operator()(Packet p) const { if (thrust::get<0>(p) > thrust::get<2>(p)) { Packet copy(p); thrust::get<0>(p) = thrust::get<2>(copy); thrust::get<1>(p) = thrust::get<3>(copy); thrust::get<2>(p) = thrust::get<0>(copy); thrust::get<3>(p) = thrust::get<1>(copy); } return p; } }; struct find_start_indices : public thrust::unary_function<thrust::tuple<Packet, Packet>, bool> { __host__ __device__ bool operator()(thrust::tuple<Packet, Packet> p) { return (thrust::get<0>(p) != thrust::get<1>(p)); } }; template<typename... Iterators> __host__ __device__ thrust::zip_iterator<thrust::tuple<Iterators...>> zip(Iterators... its) { return thrust::make_zip_iterator(thrust::make_tuple(its...)); } int main() { // in this example we just have 10 packets const int N = 10; // demo data // this example uses very simple "IP addresses" uint32_t srcAddrArray[N] = {20, 10, 20, 20, 30, 30, 10, 20, 30, 20}; uint16_t srcPortArray[N] = {1 , 2 , 3 , 1 , 2 , 2 , 6 , 1 , 1 , 1 }; uint32_t dstAddrArray[N] = {10, 20, 30, 10, 20, 20, 30, 10, 10, 10}; uint16_t dstPortArray[N] = {4 , 2 , 3 , 4 , 5 , 5 , 1 , 4 , 6 , 4 }; // upload data to GPU thrust::device_vector<uint32_t> d_src_addr(srcAddrArray, srcAddrArray+N); thrust::device_vector<uint16_t> d_src_port(srcPortArray, srcPortArray+N); thrust::device_vector<uint32_t> d_dst_addr(dstAddrArray, dstAddrArray+N); thrust::device_vector<uint16_t> d_dst_port(dstPortArray, dstPortArray+N); thrust::device_vector<uint32_t> d_orig_ind(N); thrust::sequence(d_orig_ind.begin(), d_orig_ind.end()); std::cout << "input data" << std::endl; PRINTER(d_src_addr); PRINTER(d_src_port); PRINTER(d_dst_addr); PRINTER(d_dst_port); std::cout << std::endl; // 1. sort within packet auto zip_begin = zip(d_src_addr.begin(), d_src_port.begin(), d_dst_addr.begin(), d_dst_port.begin()); auto zip_end = zip(d_src_addr.end(), d_src_port.end(), d_dst_addr.end(), d_dst_port.end()); thrust::transform(zip_begin, zip_end, zip_begin, sort_within_packet()); std::cout << "packets after sort_within_packet" << std::endl; PRINTER(d_src_addr); PRINTER(d_src_port); PRINTER(d_dst_addr); PRINTER(d_dst_port); std::cout << std::endl; // 2. sort packets thrust::stable_sort(zip(d_src_addr.begin(), d_src_port.begin(), d_dst_addr.begin(), d_dst_port.begin(), d_orig_ind.begin()), zip(d_src_addr.end(), d_src_port.end(), d_dst_addr.end(), d_dst_port.end(), d_orig_ind.end())); std::cout << "after stable_sort" << std::endl; PRINTER(d_orig_ind); std::cout << std::endl; std::cout << "packets after stable_sort" << std::endl; PRINTER(d_src_addr); PRINTER(d_src_port); PRINTER(d_dst_addr); PRINTER(d_dst_port); std::cout << std::endl; // 3. find stard indices of each stream thrust::device_vector<uint32_t> d_start_indices(N); using namespace thrust::placeholders; thrust::device_vector<uint32_t>::iterator copyEnd = thrust::copy_if(thrust::make_counting_iterator(1), thrust::make_counting_iterator(N), thrust::make_transform_iterator( zip( zip(d_src_addr.begin(), d_src_port.begin(), d_dst_addr.begin(), d_dst_port.begin()), zip(d_src_addr.begin()+1, d_src_port.begin()+1, d_dst_addr.begin()+1, d_dst_port.begin()+1) ), find_start_indices() ), d_start_indices.begin()+1, _1); uint32_t streamCount = copyEnd-d_start_indices.begin(); d_start_indices.resize(streamCount); std::cout << "after copy_if" << std::endl; PRINTER(d_start_indices); // 4. generate some additional information about the result and print result formatted thrust::device_vector<uint32_t> d_stream_lengths(streamCount+1); thrust::adjacent_difference(d_start_indices.begin(), d_start_indices.end(), d_stream_lengths.begin()); d_stream_lengths.erase(d_stream_lengths.begin()); d_stream_lengths.back() = N-d_start_indices.back(); PRINTER(d_stream_lengths); thrust::host_vector<uint32_t> h_start_indices = d_start_indices; thrust::host_vector<uint32_t> h_orig_ind = d_orig_ind; auto index = h_start_indices.begin(); index++; std::cout << std::endl << "group of streams referencing the original indices"<< std::endl << "[" << h_orig_ind[0]; for(int i=1; i<N;++i) { if (i == *index) { index++; std::cout << "]\t["; } else { std::cout << ","; } std::cout << h_orig_ind[i]; } std::cout << "]" << std::endl; return 0; }