Ich lerne 3D-Array-Manipulation in Cuda. Ich habe den folgenden Code implementiert, jedoch kann ich nicht das erwartete Ergebnis erhalten. Das heißt, ich nehme das Array und ändere die Elemente von 0 auf 1. Ich habe versucht, den Fehler zu finden, aber ich kann ihn nicht finden. Kann jemand zeigen, wo mein Fehler im Code ist?So verwenden Sie Cuda Memory 3D mit cudaMalloc3D
int iDivUp(int a, int b) {
return ((a % b) != 0) ? (a/b + 1) : (a/b);
}
__global__
void kernel(cudaPitchedPtr d_pitched_ptr, int COLS, int ROWS, int D) {
int t_idx = threadIdx.x + blockIdx.x * blockDim.x;
int t_idy = threadIdx.y + blockIdx.y * blockDim.y;
char* d_ptr = static_cast<char*>(d_pitched_ptr.ptr);
size_t pitch = d_pitched_ptr.pitch;
float* element = (float*)(d_ptr + t_idy * pitch) + t_idx;
element[0] = 1;
element[1] = 1;
element[2] = 1;
}
void iFilter() {
const int ROWS = 100
const int COLS = 120
const int DEPTH = 3;
int pixels[COLS][ROWS][DEPTH];
for (int j = 0; j < ROWS; j++) {
for (int i = 0; i < COLS; i++) {
for (int k = 0; k < DEPTH; k++) {
pixels[i][j][k] = 0;
}
}
}
cudaExtent extent = make_cudaExtent(COLS * sizeof(int), ROWS, DEPTH);
cudaPitchedPtr d_pitched_ptr;
cudaMalloc3D(&d_pitched_ptr, extent);
cudaMemcpy3DParms d_parms = {0};
d_parms.srcPtr.ptr = pixels;
d_parms.srcPtr.pitch = COLS * sizeof(int);
d_parms.srcPtr.xsize = COLS;
d_parms.srcPtr.ysize = ROWS;
d_parms.dstPtr.ptr = d_pitched_ptr.ptr;
d_parms.dstPtr.pitch = d_pitched_ptr.pitch;
d_parms.dstPtr.xsize = COLS;
d_parms.dstPtr.ysize = ROWS;
d_parms.extent.width = COLS * sizeof(int);
d_parms.extent.height = ROWS;
d_parms.extent.depth = DEPTH;
d_parms.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(&d_parms);
dim3 block_size(blocksize, blocksize);
dim3 grid_size(iDivUp(COLS, blocksize), iDivUp(ROWS, blocksize));
kernel<<<grid_size, block_size>>>(
d_pitched_ptr, COLS, ROWS, DEPTH);
int download_pixels[COLS][ROWS][DEPTH];
d_parms.srcPtr.ptr = d_pitched_ptr.ptr;
d_parms.srcPtr.pitch = d_pitched_ptr.pitch;
d_parms.dstPtr.ptr = download_pixels;
d_parms.dstPtr.pitch = COLS * sizeof(int);
d_parms.kind = cudaMemcpyDeviceToHost;
cudaMemcpy3D(&d_parms);
for (int j = 0; j < ROWS; j++) {
for (int i = 0; i < COLS; i++) {
for (int k = 0; k < DEPTH; k++) {
image.at<cv::Vec3b>(j, i)[k] = download_pixels[i][j][k];
std::cout << download_pixels[i][j][k] << " ";
}
std::cout << "\n";
}
}
}
OUTPUT: ich alle 0 statt 1
Dank Didnt diesen Fehler nicht erkennen. –