Cuda 随机数生成

本文介绍了Cuda 随机数生成的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

我想知道使用 curand 或其他东西生成一个 0 到 49k 之间的伪随机数的最佳方法对于每个线程都是相同的.

I was wondering what was the best way to generate one pseudo random number between 0 and 49k that would be the same for each thread, by using curand or something else.

我更喜欢在内核中生成随机数，因为我必须一次生成一个，但大约 10k 次.

I prefer to generate the random numbers inside the kernel because I will have to generate one at the time but about 10k times.

我可以使用 0.0 到 1.0 之间的浮点数，但我不知道如何使我的 PRN 可用于所有线程，因为大多数帖子和示例都显示了如何为每个线程设置不同的 PRN.

And I could use floats between 0.0 and 1.0, but I've no idea how to make my PRN available for all threads, because most post and example show how to have different PRN for each threads.

谢谢

推荐答案

可能你只需要学习 curand 文档，尤其是对于设备 API.为每个线程获取相同序列的关键是为每个线程创建状态(大多数示例都这样做)，然后将相同的序列号传递给每个线程的 init 函数.在curand_init中，参数顺序如下:

Probably you just need to study the curand documentation, especially for the device API. The key to getting the same sequence for each thread is to create state for each thread (most examples do this) and then pass the same sequence number to the init function for each thread. In curand_init, the sequence of parameters is as follows:

curand_init(seed, subsequence number, offset, state)

通过为每个 init 调用设置相同的种子，我们为每个线程生成相同的序列.通过将子序列和偏移量设置为相同，我们在该序列中为每个线程选择相同的起始值.

by setting the seed for each init call the same, we generate the same sequence for each thread. by setting the subsequence and offset numbers the same, we select the same starting value within that sequence, for each thread.

下面是演示代码:

// compile with: nvcc -arch=sm_20 -lcurand -o t89 t89.cu
#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>

#define SCALE 49000
#define DSIZE 5000
#define nTPB 256

#define cudaCheckErrors(msg)
    do {
        cudaError_t __err = cudaGetLastError();
        if (__err != cudaSuccess) {
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)
",
                msg, cudaGetErrorString(__err),
                __FILE__, __LINE__);
            fprintf(stderr, "*** FAILED - ABORTING
");
            exit(1);
        }
    } while (0)

__device__ float getnextrand(curandState *state){

  return (float)(curand_uniform(state));
}

__device__ int getnextrandscaled(curandState *state, int scale){

  return (int) scale * getnextrand(state);
}


__global__ void initCurand(curandState *state, unsigned long seed){
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
    curand_init(seed, 0, 0, &state[idx]);
}

__global__ void testrand(curandState *state, int *a1, int *a2){
    int idx = threadIdx.x + blockIdx.x * blockDim.x;

    a1[idx] = getnextrandscaled(&state[idx], SCALE);
    a2[idx] = getnextrandscaled(&state[idx], SCALE);
}

int main() {

    int *h_a1, *h_a2, *d_a1, *d_a2;
    curandState *devState;

    h_a1 = (int *)malloc(DSIZE*sizeof(int));
    if (h_a1 == 0) {printf("malloc fail
"); return 1;}
    h_a2 = (int *)malloc(DSIZE*sizeof(int));
    if (h_a2 == 0) {printf("malloc fail
"); return 1;}
    cudaMalloc((void**)&d_a1, DSIZE * sizeof(int));
    cudaMalloc((void**)&d_a2, DSIZE * sizeof(int));
    cudaMalloc((void**)&devState, DSIZE * sizeof(curandState));
    cudaCheckErrors("cudamalloc");



     initCurand<<<(DSIZE+nTPB-1)/nTPB,nTPB>>>(devState, 1);
     cudaDeviceSynchronize();
     cudaCheckErrors("kernels1");
     testrand<<<(DSIZE+nTPB-1)/nTPB,nTPB>>>(devState, d_a1, d_a2);
     cudaDeviceSynchronize();
     cudaCheckErrors("kernels2");
     cudaMemcpy(h_a1, d_a1, DSIZE*sizeof(int), cudaMemcpyDeviceToHost);
     cudaMemcpy(h_a2, d_a2, DSIZE*sizeof(int), cudaMemcpyDeviceToHost);
     cudaCheckErrors("cudamemcpy");
     printf("1st returned random value is %d
", h_a1[0]);
     printf("2nd returned random value is %d
", h_a2[0]);

     for (int i=1; i< DSIZE; i++){
       if (h_a1[i] != h_a1[0]) {
         printf("mismatch on 1st value at %d, val = %d
", i, h_a1[i]);
         return 1;
         }
       if (h_a2[i] != h_a2[0]) {
         printf("mismatch on 2nd value at %d, val = %d
", i, h_a2[i]);
         return 1;
         }
       }
     printf("thread values match!
");

}

这篇关于Cuda 随机数生成的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！