我正在使用C++中的多线程使用Eratosthenes筛子来制作质数筛子,但是当我使用多个线程时,结果不一致。它必须能够通过的范围是1-2 ^ 32。当以较小的范围(例如1-1024)运行时,通常会提供正确数量的质数,但是随着范围变大,误差范围也会增大。我假设这是一个竞争条件,因为我不使用互斥锁(并且不希望这样做,因为程序的设置没有必要),或者我发送数据的方式有问题线程功能。我仍然习惯于C++,它是指针/引用。它找到的质数始终大于或等于实际数,从不小于。对于复合数字,位图中的位索引设置为1。由于缺乏C / C++经验,这可能只是我忽略的一些愚蠢的小错误。如果我不清楚任何事情,请告诉我。感谢您的光临。
#include <iostream>
#include <pthread.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sched.h>
#include <vector>
#include <math.h>
using namespace std;
#define NUM_OF_THREADS 4
#define UPPER_LIMIT pow(2, 30)
#define SQRT_UPPER_LIMIT sqrt(UPPER_LIMIT)
typedef struct {
int thread_index;
unsigned long long prime;
unsigned long long marked_up_to;
pthread_t thread;
bool thread_is_done;
} thread_info_t;
typedef struct {
unsigned long long x;
unsigned long long y;
}indexes_t;
static const unsigned long bitmasks[] = {
0x8000000000000000, 0x4000000000000000, 0x2000000000000000, 0x1000000000000000,
0x800000000000000, 0x400000000000000, 0x200000000000000, 0x100000000000000,
0x80000000000000, 0x40000000000000, 0x20000000000000, 0x10000000000000,
0x8000000000000, 0x4000000000000, 0x2000000000000, 0x1000000000000,
0x800000000000, 0x400000000000, 0x200000000000, 0x100000000000,
0x80000000000, 0x40000000000, 0x20000000000, 0x10000000000,
0x8000000000, 0x4000000000, 0x2000000000, 0x1000000000,
0x800000000, 0x400000000, 0x200000000, 0x100000000,
0x80000000, 0x40000000, 0x20000000, 0x10000000,
0x8000000, 0x4000000, 0x2000000, 0x1000000,
0x800000, 0x400000, 0x200000, 0x100000,
0x80000, 0x40000, 0x20000, 0x10000,
0x8000, 0x4000, 0x2000, 0x1000,
0x800, 0x400, 0x200, 0x100,
0x80, 0x40, 0x20, 0x10,
0x8, 0x4, 0x2, 0x1
};
clock_t start;
clock_t stop;
static unsigned long *bitmap; //array of longs
static int bits_in_element = sizeof(unsigned long long)*8;
static thread_info_t info[NUM_OF_THREADS];
indexes_t bit_indexes_from_number(unsigned long long number);
void print_bitmap();
bool check_if_bit_index_is_prime(unsigned long long i, unsigned long long j);
static void * threadFunc(void *arg)
{
thread_info_t *thread_info = (thread_info_t *)arg;
unsigned long long prime = thread_info->prime;
unsigned long long comp_number = prime+prime;
int thread_index = thread_info->thread_index;
indexes_t comp_index;
for(; comp_number <= UPPER_LIMIT; comp_number += prime) // get rid of prime multiples
{
comp_index = bit_indexes_from_number(comp_number);
bitmap[comp_index.x] |= bitmasks[comp_index.y];
info[thread_index].marked_up_to = comp_number; // so main thread only checks for primes past what's been marked
}
thread_info->thread_is_done = true;
return NULL;
}
int main (int argc, char * argv[])
{
long ncpus;
double total_time;
unsigned long long num_to_check;
thread_info_t *thread_to_use;
int thread_ret_val;
start = clock();
for(int i = 0; i < NUM_OF_THREADS; i++)
{
info[i].thread_index = i;
info[i].marked_up_to = 2;
info[i].thread_is_done = true;
}
for(int i = 0; i < NUM_OF_THREADS; i++)
{
bitmap = (unsigned long *)malloc(sizeof(unsigned long *) * UPPER_LIMIT/8);
bitmap[0] |= (bitmasks[0] | bitmasks[1]);
}
for(unsigned long long i = 0; i <= SQRT_UPPER_LIMIT/bits_in_element; i++)// go thru elements in array
{
for(unsigned long long j = (i == 0 ? 2 : 0); j < bits_in_element; j++) //go thru bits in elements
{
num_to_check = (i * bits_in_element) + j;
//make sure all threads are past num_to_check
for(int k = 0; ; k++)
{
if(k == NUM_OF_THREADS)
k = 0;
if(info[k].marked_up_to >= num_to_check)
break;
}
if(check_if_bit_index_is_prime(i, j)) //check if bit index is prime
{
for(int k = 0; ; k++) //wait for a finished thread to use
{
if(k == NUM_OF_THREADS)
k = 0;
if(info[k].thread_is_done)
{
thread_to_use = &info[k];
info[k].thread_is_done = false;
info[k].prime = (i * bits_in_element) + j;
break;
}
}
thread_ret_val = pthread_create(&thread_to_use->thread, NULL, threadFunc, (void *)thread_to_use); //thread gets rid of multiples
if(thread_ret_val != 0)
{
cerr << "thread error: " << strerror(thread_ret_val) << "\n";
return -1;
}
}
}
}
for(int i = 0; i < NUM_OF_THREADS; i++)
{
printf("waiting on %d\n", i);
thread_ret_val = pthread_join(info[i].thread, NULL);
if(thread_ret_val != 0)
{
cout << strerror(thread_ret_val);
}
}
stop = clock();
total_time = (double)(stop - start) / (double)CLOCKS_PER_SEC;
ncpus = sysconf(_SC_NPROCESSORS_ONLN);
/* Print performance results */
printf ("Total time using %d threads : %.6f seconds\n",
NUM_OF_THREADS, total_time / (NUM_OF_THREADS < ncpus ? NUM_OF_THREADS : ncpus));
print_bitmap();
return 1;
}
indexes_t bit_indexes_from_number(unsigned long long number)
{
indexes_t indexes;
indexes.x = ceill(number / bits_in_element); //ceiling or not??
if(indexes.x == 0)
indexes.y = number;
else
indexes.y = number - indexes.x*bits_in_element;
return indexes;
}
void print_bitmap()
{
int count = 0;
for(int i = 0; i <= UPPER_LIMIT; i++)
{
if(check_if_bit_index_is_prime(bit_indexes_from_number(i).x, bit_indexes_from_number(i).y))
{
count++;
}
}
cout << "number of primes between 1 and " << UPPER_LIMIT << ": " << count << "\n";
}
bool check_if_bit_index_is_prime(unsigned long long i, unsigned long long j)
{
if(bitmap[i] & bitmasks[j])
return false;
else
return true;
}
最佳答案
主要的主要问题是您的thread_info_t。您需要确保此结构中成员更改的原子性。这不是原子的事实很可能导致您的问题过多。确保这些操作是原子的将依赖于c++ 11的std::atomic或特定于平台的细节。当然,您可以使用锁来确保一次只有一个线程接触thread_info_t。
以下代码也存在一些问题:
bitmap = (unsigned long *)malloc(sizeof(unsigned long *) * UPPER_LIMIT/8);
bitmap[0] |= (bitmasks[0] | bitmasks[1]);
在这段代码中有两个错误。首先,内存泄漏,除最后一个分配外,每个分配都丢失,没有指针指向它,也没有释放空间。
其次,在为位图分配内存后,数据是不确定的。因此,位图[0] | =(bitmasks [0] | bitmasks [1]);无效,因为bitmap [0]具有未定义的值。
关于c++ - 质数筛子不能总是获得范围内正确的质数,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/9511173/