CUDA Kernel for copying array location from neighbour location -


i have cuda kernel copies i+1 th location ith location in device array. copying not done locations index values multiples of 32. [32]->[31] not copied, [64]->[63] not copied. happens irrespective of block size. how resolved? here full program. no calls syncthreads(). still problem exists.

#include <cstdio> struct soda { float *df0; size_t pitch; };  __global__ void stream_kernel (soda da1, soda da2, int m, int n);  int main(int argc, char **argv){     int i, m=32, n=32;float *f0;     soda da1, da2;       dim3 blocksize = dim3(32,32);     dim3 gridsize = dim3(1,1);     f0 = (float *)malloc(m*n*sizeof(float));      cudamallocpitch((void **)&da1.df0, &da1.pitch, sizeof(float)*m, n);     cudamallocpitch((void **)&da2.df0, &da2.pitch, sizeof(float)*m, n);      (i=0; i<m*n; i++) f0[i] = (float)rand()/rand_max;      cudamemcpy2d((void *)da1.df0, da1.pitch, (void *)f0, sizeof(float)*m, sizeof(float)*m, n, cudamemcpyhosttodevice);      printf("\n");     for(int i=28;i<70; i++)          printf("%5d ", i);     printf("\n\n");      printf("\n");     for(int i=28;i<70; i++)         printf("%.3f ", f0[i]);      printf("\n\n");      stream_kernel<<<gridsize, blocksize>>>(da1, da2, m, n);      cudamemcpy2d( (void *)f0, sizeof(float)*m, (void *)da2.df0, da2.pitch,sizeof(float)*m, n, cudamemcpydevicetohost);     printf("\n");     for(int i=28;i<70; i++)          printf("%.3f ", f0[i]);     printf("\n\n");      free(f0);cudafree(da2.df0);     cudafree(da1.df0);     printf("\n\n");     return 0; }  __global__ void stream_kernel (soda da1, soda da2, int m, int n) {     int i, j, i2d;       = blockidx.x * blockdim.x + threadidx.x;     j = blockidx.y * blockdim.y + threadidx.y;     i2d = + j * m;     if (i2d>0) { da2.df0[i2d-1] = da1.df0[i2d];} } 

the output

   28    29    30    31    32    33    ....  0.999 0.218 0.513 0.839 0.613 0.296 0.638.... 0.218 0.513 0.839 0.198 0.296 0.638 .... 

thanks comments. in 2d array stored in row major order, kernel moves (i,j)th position previous position. since array pitched, mentioned in comments, previous element of first element in each row not found using -1 offset. special case handled computing last element in previous array. got answer. thanks.


Comments

Popular posts from this blog

dns - How To Use Custom Nameserver On Free Cloudflare? -

python - Pygame screen.blit not working -

c# - Web API response xml language -