1 #include "cuda_runtime.h" 10 __global__
void forward_maxpool_layer_kernel(
int n,
int in_h,
int in_w,
int in_c,
int stride,
int size,
int pad,
float *input,
float *output,
int *indexes)
12 int h = (in_h + pad - size)/stride + 1;
13 int w = (in_w + pad - size)/stride + 1;
16 int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
27 int w_offset = -pad/2;
28 int h_offset = -pad/2;
30 int out_index = j + w*(i + h*(k + c*b));
31 float max = -INFINITY;
34 for(l = 0; l < size; ++l){
35 for(m = 0; m < size; ++m){
36 int cur_h = h_offset + i*stride + l;
37 int cur_w = w_offset + j*stride + m;
38 int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
39 int valid = (cur_h >= 0 && cur_h < in_h &&
40 cur_w >= 0 && cur_w < in_w);
41 float val = (valid != 0) ? input[index] : -INFINITY;
42 max_i = (val > max) ? index : max_i;
43 max = (val > max) ? val : max;
46 output[out_index] = max;
47 indexes[out_index] = max_i;
50 __global__
void backward_maxpool_layer_kernel(
int n,
int in_h,
int in_w,
int in_c,
int stride,
int size,
int pad,
float *delta,
float *prev_delta,
int *indexes)
52 int h = (in_h + pad - size)/stride + 1;
53 int w = (in_w + pad - size)/stride + 1;
55 int area = (size-1)/stride;
57 int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
69 int w_offset = -pad/2;
70 int h_offset = -pad/2;
74 for(l = -area; l < area+1; ++l){
75 for(m = -area; m < area+1; ++m){
76 int out_w = (j-w_offset)/stride + m;
77 int out_h = (i-h_offset)/stride + l;
78 int out_index = out_w + w*(out_h + h*(k + c*b));
79 int valid = (out_w >= 0 && out_w < w &&
80 out_h >= 0 && out_h < h);
81 d += (valid && indexes[out_index] == index) ? delta[out_index] : 0;
84 prev_delta[index] += d;
93 size_t n = h*w*c*layer.
batch;
95 forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.
h, layer.
w, layer.
c, layer.
stride, layer.
size, layer.
pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu);
96 check_error(cudaPeekAtLastError());
101 size_t n = layer.
h*layer.
w*layer.
c*layer.
batch;
103 backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.
h, layer.
w, layer.
c, layer.
stride, layer.
size, layer.
pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu);
104 check_error(cudaPeekAtLastError());
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
void forward_maxpool_layer_gpu(maxpool_layer layer, network net)
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
void backward_maxpool_layer_gpu(maxpool_layer layer, network net)