1 #include "cuda_runtime.h" 12 int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
20 int out_index = (k + c*b);
21 output[out_index] = 0;
22 for(i = 0; i < w*h; ++i){
23 int in_index = i + h*w*(k + b*c);
24 output[out_index] += input[in_index];
26 output[out_index] /= w*h;
31 int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
39 int out_index = (k + c*b);
40 for(i = 0; i < w*h; ++i){
41 int in_index = i + h*w*(k + b*c);
42 in_delta[in_index] += out_delta[out_index] / (w*h);
48 size_t n = layer.
c*layer.
batch;
50 forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.
w, layer.
h, layer.
c, net.input_gpu, layer.output_gpu);
51 check_error(cudaPeekAtLastError());
56 size_t n = layer.
c*layer.
batch;
58 backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.
w, layer.
h, layer.
c, net.delta_gpu, layer.delta_gpu);
59 check_error(cudaPeekAtLastError());
__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output)
__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta)
void forward_avgpool_layer_gpu(avgpool_layer layer, network net)
void backward_avgpool_layer_gpu(avgpool_layer layer, network net)