# include<iostream>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
# define n 10
//定义成宏而不是定义成全局变量
//是因为随便定义全局变量可能会导致在调用核函数的时候发生"应输入表达式"的错误
__global__ void add(int *a , int *b , int *c)
{
int i = blockIdx.x;
c[i] = a[i] + b[i];
}
int main()
{
int a[n], b[n], c[n];
int* dev_a, int* dev_b, int* dev_c;
cudaMalloc((void**)&dev_a, n * sizeof(int));
cudaMalloc((void**)&dev_b,n * sizeof(int));
cudaMalloc((void**)&dev_c, n * sizeof(int));
//向gpu申请内存
//此时dev_这三个变量已经指向gpu的内存而不是cpu的
for (int i = 0; i < n; i++)
{
a[i] = i, b[i] = i + 1; //给cpu的两个数组赋值
}
cudaMemcpy(dev_a, a, n * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, n * sizeof(int), cudaMemcpyHostToDevice);
//将cpu的数组传到gpu上
add << <n, 1 >> > (dev_a, dev_b, dev_c);
//n个block,每个block有1个thread
cudaMemcpy(c, dev_c, n * sizeof(int), cudaMemcpyDeviceToHost);
//将答案复制到cpu上
for (int i = 0; i < n; i++) printf("%d ", c[i]);
cudaFree(dev_a); //释放gpu上申请的内存
cudaFree(dev_b);
cudaFree(dev_c);
return 0;
}
标签:int,相加,矢量,dev,cpu,cuda,gpu,sizeof,void
From: https://www.cnblogs.com/algoshimo/p/18063325