CUDA Kernel for copying array location from neighbour location -
i have cuda kernel copies i+1 th location ith location in device array. copying not done locations index values multiples of 32. [32]->[31] not copied, [64]->[63] not copied. happens irrespective of block size. how resolved? here full program. no calls syncthreads(). still problem exists.
#include <cstdio> struct soda { float *df0; size_t pitch; }; __global__ void stream_kernel (soda da1, soda da2, int m, int n); int main(int argc, char **argv){ int i, m=32, n=32;float *f0; soda da1, da2; dim3 blocksize = dim3(32,32); dim3 gridsize = dim3(1,1); f0 = (float *)malloc(m*n*sizeof(float)); cudamallocpitch((void **)&da1.df0, &da1.pitch, sizeof(float)*m, n); cudamallocpitch((void **)&da2.df0, &da2.pitch, sizeof(float)*m, n); (i=0; i<m*n; i++) f0[i] = (float)rand()/rand_max; cudamemcpy2d((void *)da1.df0, da1.pitch, (void *)f0, sizeof(float)*m, sizeof(float)*m, n, cudamemcpyhosttodevice); printf("\n"); for(int i=28;i<70; i++) printf("%5d ", i); printf("\n\n"); printf("\n"); for(int i=28;i<70; i++) printf("%.3f ", f0[i]); printf("\n\n"); stream_kernel<<<gridsize, blocksize>>>(da1, da2, m, n); cudamemcpy2d( (void *)f0, sizeof(float)*m, (void *)da2.df0, da2.pitch,sizeof(float)*m, n, cudamemcpydevicetohost); printf("\n"); for(int i=28;i<70; i++) printf("%.3f ", f0[i]); printf("\n\n"); free(f0);cudafree(da2.df0); cudafree(da1.df0); printf("\n\n"); return 0; } __global__ void stream_kernel (soda da1, soda da2, int m, int n) { int i, j, i2d; = blockidx.x * blockdim.x + threadidx.x; j = blockidx.y * blockdim.y + threadidx.y; i2d = + j * m; if (i2d>0) { da2.df0[i2d-1] = da1.df0[i2d];} }
the output
28 29 30 31 32 33 .... 0.999 0.218 0.513 0.839 0.613 0.296 0.638.... 0.218 0.513 0.839 0.198 0.296 0.638 ....
thanks comments. in 2d array stored in row major order, kernel moves (i,j)th position previous position. since array pitched, mentioned in comments, previous element of first element in each row not found using -1 offset. special case handled computing last element in previous array. got answer. thanks.
Comments
Post a Comment