summaryrefslogtreecommitdiff
path: root/c,cc/vector_addition.cu
diff options
context:
space:
mode:
authorPatrick Simianer <p@simianer.de>2014-06-15 03:50:12 +0200
committerPatrick Simianer <p@simianer.de>2014-06-15 03:50:12 +0200
commit258e1b92ebbfdebefabc120969ab87c3d8b75c3d (patch)
treeef4ab11fe0bf9d720cea23b35711358a8465feeb /c,cc/vector_addition.cu
parentcf3a29feb5887344b6633ead1b4b6d5657a15a4b (diff)
old c,cc examples
Diffstat (limited to 'c,cc/vector_addition.cu')
-rw-r--r--c,cc/vector_addition.cu61
1 files changed, 61 insertions, 0 deletions
diff --git a/c,cc/vector_addition.cu b/c,cc/vector_addition.cu
new file mode 100644
index 0000000..4f16bc3
--- /dev/null
+++ b/c,cc/vector_addition.cu
@@ -0,0 +1,61 @@
+#include "stdio.h"
+
+
+__global__ void add_arrays_gpu( float *in1, float *in2, float *out, int Ntot)
+{
+ int idx=blockIdx.x*blockDim.x+threadIdx.x;
+ if (idx < Ntot)
+ out[idx]=in1[idx]+in2[idx];
+}
+
+
+int main(void)
+{
+ /* pointers to host memory */
+ float *a, *b, *c;
+ /* pointers to device memory */
+ float *a_d, *b_d, *c_d;
+ int N=100000000;
+ int i;
+
+ /* Allocate arrays a, b and c on host*/
+ a = (float*) malloc(N*sizeof(float));
+ b = (float*) malloc(N*sizeof(float));
+ c = (float*) malloc(N*sizeof(float));
+
+ /* Allocate arrays a_d, b_d and c_d on device*/
+ cudaMalloc ((void **) &a_d, sizeof(float)*N);
+ cudaMalloc ((void **) &b_d, sizeof(float)*N);
+ cudaMalloc ((void **) &c_d, sizeof(float)*N);
+
+ /* Initialize arrays a and b */
+ for (i=0; i<N; i++) {
+ a[i]= (float) i;
+ b[i]=(float) -i;
+ }
+
+
+ /* Copy data from host memory to device memory */
+ cudaMemcpy(a_d, a, sizeof(float)*N, cudaMemcpyHostToDevice);
+ cudaMemcpy(b_d, b, sizeof(float)*N, cudaMemcpyHostToDevice);
+
+ /* Compute the execution configuration */
+ int block_size=256;
+ dim3 dimBlock(block_size);
+ dim3 dimGrid ( (N/dimBlock.x) + (!(N%dimBlock.x)?0:1) );
+
+ /* Add arrays a and b, store result in c */
+ add_arrays_gpu<<<dimGrid,dimBlock>>>(a_d, b_d, c_d, N);
+
+ /* Copy data from deveice memory to host memory */
+ //cudaMemcpy(c, c_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
+
+ /* Print c */
+/*for(i=0; i<N; i++)
+ printf(" c[%d]=%f\n",i,c[i]);*/
+
+ /* Free the memory */
+ free(a); free(b); free(c);
+ cudaFree(a_d); cudaFree(b_d);cudaFree(c_d);
+}
+