darknet  v3
maxpool_layer_kernels.cu
Go to the documentation of this file.
1 #include "cuda_runtime.h"
2 #include "curand.h"
3 #include "cublas_v2.h"
4 
5 extern "C" {
6 #include "maxpool_layer.h"
7 #include "cuda.h"
8 }
9 
10 __global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
11 {
12  int h = (in_h + pad - size)/stride + 1;
13  int w = (in_w + pad - size)/stride + 1;
14  int c = in_c;
15 
16  int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
17  if(id >= n) return;
18 
19  int j = id % w;
20  id /= w;
21  int i = id % h;
22  id /= h;
23  int k = id % c;
24  id /= c;
25  int b = id;
26 
27  int w_offset = -pad/2;
28  int h_offset = -pad/2;
29 
30  int out_index = j + w*(i + h*(k + c*b));
31  float max = -INFINITY;
32  int max_i = -1;
33  int l, m;
34  for(l = 0; l < size; ++l){
35  for(m = 0; m < size; ++m){
36  int cur_h = h_offset + i*stride + l;
37  int cur_w = w_offset + j*stride + m;
38  int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c));
39  int valid = (cur_h >= 0 && cur_h < in_h &&
40  cur_w >= 0 && cur_w < in_w);
41  float val = (valid != 0) ? input[index] : -INFINITY;
42  max_i = (val > max) ? index : max_i;
43  max = (val > max) ? val : max;
44  }
45  }
46  output[out_index] = max;
47  indexes[out_index] = max_i;
48 }
49 
50 __global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
51 {
52  int h = (in_h + pad - size)/stride + 1;
53  int w = (in_w + pad - size)/stride + 1;
54  int c = in_c;
55  int area = (size-1)/stride;
56 
57  int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
58  if(id >= n) return;
59 
60  int index = id;
61  int j = id % in_w;
62  id /= in_w;
63  int i = id % in_h;
64  id /= in_h;
65  int k = id % in_c;
66  id /= in_c;
67  int b = id;
68 
69  int w_offset = -pad/2;
70  int h_offset = -pad/2;
71 
72  float d = 0;
73  int l, m;
74  for(l = -area; l < area+1; ++l){
75  for(m = -area; m < area+1; ++m){
76  int out_w = (j-w_offset)/stride + m;
77  int out_h = (i-h_offset)/stride + l;
78  int out_index = out_w + w*(out_h + h*(k + c*b));
79  int valid = (out_w >= 0 && out_w < w &&
80  out_h >= 0 && out_h < h);
81  d += (valid && indexes[out_index] == index) ? delta[out_index] : 0;
82  }
83  }
84  prev_delta[index] += d;
85 }
86 
88 {
89  int h = layer.out_h;
90  int w = layer.out_w;
91  int c = layer.c;
92 
93  size_t n = h*w*c*layer.batch;
94 
95  forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, net.input_gpu, layer.output_gpu, layer.indexes_gpu);
96  check_error(cudaPeekAtLastError());
97 }
98 
100 {
101  size_t n = layer.h*layer.w*layer.c*layer.batch;
102 
103  backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, net.delta_gpu, layer.indexes_gpu);
104  check_error(cudaPeekAtLastError());
105 }
106 
int w
Definition: darknet.h:140
int pad
Definition: darknet.h:151
int out_w
Definition: darknet.h:141
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
int size
Definition: darknet.h:145
int h
Definition: darknet.h:140
int out_h
Definition: darknet.h:141
int batch
Definition: darknet.h:131
void forward_maxpool_layer_gpu(maxpool_layer layer, network net)
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
int stride
Definition: darknet.h:147
int c
Definition: darknet.h:140
void backward_maxpool_layer_gpu(maxpool_layer layer, network net)
Definition: darknet.h:119