vscode-cpptools有什么方法可以抑制“>>”错误。我在setting.json中将“* .cu”与“cpp”相关联。// use normal c++ syntax highlighting for CUDA files "files.associations": {"*.cu": "cpp"},并且工作正常,除了一个问题,kernel execution configuration parameters surrounded by <<< and >>>被误认为是错误expected an expressiondim3 dimGrid(2, 2, 1);dim3 dimBlock(width / 2, width / 2, 1);MatrixMulKernel<<<dimGrid, dimBlock>>>(d_M, d_N, d_P, width);任何建议 最佳答案 搜寻了几个小时,找不到完美的解决方案,但找到了一些解决方法。我在这里总结:通过编辑setting.json 对CUDA文件使用普通的c++语法突出显示在程序中包含CUDA的必要 header 包括虚拟头以解决方法智能波纹管是一个具体的例子 setting.json"files.associations": { "*.cu": "cpp", "*.cuh": "cpp" } cudaDmy.cuh#pragma once#ifdef __INTELLISENSE__void __syncthreads(); // workaround __syncthreads warning#define KERNEL_ARG2(grid, block)#define KERNEL_ARG3(grid, block, sh_mem)#define KERNEL_ARG4(grid, block, sh_mem, stream)#else#define KERNEL_ARG2(grid, block) <<< grid, block >>>#define KERNEL_ARG3(grid, block, sh_mem) <<< grid, block, sh_mem >>>#define KERNEL_ARG4(grid, block, sh_mem, stream) <<< grid, block, sh_mem,stream >>>#endif矩阵#include <stdio.h>#include <math.h>#include <time.h>#include <cuda.h>#include "cuda_runtime.h"#include "device_launch_parameters.h"#include <device_functions.h>#include <cuda_runtime_api.h>#include "cudaDmy.cuh"__global__ void MatrixMulKernel(float *M, float *N, float *P, int width){ int Row = blockIdx.y * blockDim.y + threadIdx.y; int Col = blockIdx.x * blockDim.x + threadIdx.x; if (Row < width && Col < width) { float Pvalue = 0; for (int i = 0; i < width; ++i) { Pvalue += M[Row * width + i] * N[width * i + Col]; } P[Row * width + Col] = Pvalue; }}void MatMul(float *M, float *N, float *P, int width){ float *d_M; float *d_N; float *d_P; int size = width * width * sizeof(float); cudaMalloc((void **)&d_M, size); cudaMemcpy(d_M, M, size, cudaMemcpyHostToDevice); cudaMalloc((void **)&d_N, size); cudaMemcpy(d_N, N, size, cudaMemcpyHostToDevice); cudaMalloc((void **)&d_P, size); dim3 dimGrid(2, 2, 1); dim3 dimBlock(width / 2, width / 2, 1); // <<<>>> will replace macro KERNEL_ARG2 when compiling MatrixMulKernel KERNEL_ARG2(dimGrid,dimBlock) (d_M, d_M, d_P, width); cudaMemcpy(P, d_P, size, cudaMemcpyDeviceToHost); cudaFree(d_M); cudaFree(d_N); cudaFree(d_P);}int main(){ int elem = 100; float *M = new float[elem]; float *N = new float[elem]; float *P = new float[elem]; for (int i = 0; i < elem; ++i) M[i] = i; for (int i = 0; i < elem; ++i) N[i] = i + elem; time_t t1 = time(NULL); MatMul(M, N, P, sqrt(elem)); time_t t2 = time(NULL); double seconds = difftime(t2,t1); printf ("%.3f seconds total time\n", seconds); for (int i = 0; i < elem/1000000; ++i) printf("%.1f\t", P[i]); printf("\n"); delete[] M; delete[] N; delete[] P; return 0;}让我们用NVCC编译它nvcc matrixMul.cu -Xcudafe "--diag_suppress=unrecognized_pragma" -o runcuda 有用的链接: https://devtalk.nvidia.com/default/topic/513485/cuda-programming-and-performance/__syncthreads-is-undefined-need-a-help/post/5189004/#5189004 https://stackoverflow.com/a/6182137/8037585 https://stackoverflow.com/a/27992604/8037585 https://gist.github.com/ruofeidu/df95ba27dfc6b77121b27fd4a6483426关于c++ - <<< >>> vscode中的cuda,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/51959774/
10-11 22:56
查看更多