darknet  v3
batchnorm_layer.c
Go to the documentation of this file.
1 #include "convolutional_layer.h"
2 #include "batchnorm_layer.h"
3 #include "blas.h"
4 #include <stdio.h>
5 
6 layer make_batchnorm_layer(int batch, int w, int h, int c)
7 {
8  fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
9  layer l = {0};
10  l.type = BATCHNORM;
11  l.batch = batch;
12  l.h = l.out_h = h;
13  l.w = l.out_w = w;
14  l.c = l.out_c = c;
15  l.output = calloc(h * w * c * batch, sizeof(float));
16  l.delta = calloc(h * w * c * batch, sizeof(float));
17  l.inputs = w*h*c;
18  l.outputs = l.inputs;
19 
20  l.scales = calloc(c, sizeof(float));
21  l.scale_updates = calloc(c, sizeof(float));
22  l.biases = calloc(c, sizeof(float));
23  l.bias_updates = calloc(c, sizeof(float));
24  int i;
25  for(i = 0; i < c; ++i){
26  l.scales[i] = 1;
27  }
28 
29  l.mean = calloc(c, sizeof(float));
30  l.variance = calloc(c, sizeof(float));
31 
32  l.rolling_mean = calloc(c, sizeof(float));
33  l.rolling_variance = calloc(c, sizeof(float));
34 
37 #ifdef GPU
38  l.forward_gpu = forward_batchnorm_layer_gpu;
39  l.backward_gpu = backward_batchnorm_layer_gpu;
40 
41  l.output_gpu = cuda_make_array(l.output, h * w * c * batch);
42  l.delta_gpu = cuda_make_array(l.delta, h * w * c * batch);
43 
44  l.biases_gpu = cuda_make_array(l.biases, c);
45  l.bias_updates_gpu = cuda_make_array(l.bias_updates, c);
46 
47  l.scales_gpu = cuda_make_array(l.scales, c);
48  l.scale_updates_gpu = cuda_make_array(l.scale_updates, c);
49 
50  l.mean_gpu = cuda_make_array(l.mean, c);
51  l.variance_gpu = cuda_make_array(l.variance, c);
52 
53  l.rolling_mean_gpu = cuda_make_array(l.mean, c);
54  l.rolling_variance_gpu = cuda_make_array(l.variance, c);
55 
56  l.mean_delta_gpu = cuda_make_array(l.mean, c);
57  l.variance_delta_gpu = cuda_make_array(l.variance, c);
58 
59  l.x_gpu = cuda_make_array(l.output, l.batch*l.outputs);
60  l.x_norm_gpu = cuda_make_array(l.output, l.batch*l.outputs);
61  #ifdef CUDNN
62  cudnnCreateTensorDescriptor(&l.normTensorDesc);
63  cudnnCreateTensorDescriptor(&l.dstTensorDesc);
64  cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
65  cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
66 
67  #endif
68 #endif
69  return l;
70 }
71 
72 void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
73 {
74  int i,b,f;
75  for(f = 0; f < n; ++f){
76  float sum = 0;
77  for(b = 0; b < batch; ++b){
78  for(i = 0; i < size; ++i){
79  int index = i + size*(f + n*b);
80  sum += delta[index] * x_norm[index];
81  }
82  }
83  scale_updates[f] += sum;
84  }
85 }
86 
87 void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
88 {
89 
90  int i,j,k;
91  for(i = 0; i < filters; ++i){
92  mean_delta[i] = 0;
93  for (j = 0; j < batch; ++j) {
94  for (k = 0; k < spatial; ++k) {
95  int index = j*filters*spatial + i*spatial + k;
96  mean_delta[i] += delta[index];
97  }
98  }
99  mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
100  }
101 }
102 void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
103 {
104 
105  int i,j,k;
106  for(i = 0; i < filters; ++i){
107  variance_delta[i] = 0;
108  for(j = 0; j < batch; ++j){
109  for(k = 0; k < spatial; ++k){
110  int index = j*filters*spatial + i*spatial + k;
111  variance_delta[i] += delta[index]*(x[index] - mean[i]);
112  }
113  }
114  variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.));
115  }
116 }
117 void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
118 {
119  int f, j, k;
120  for(j = 0; j < batch; ++j){
121  for(f = 0; f < filters; ++f){
122  for(k = 0; k < spatial; ++k){
123  int index = j*filters*spatial + f*spatial + k;
124  delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
125  }
126  }
127  }
128 }
129 
130 void resize_batchnorm_layer(layer *layer, int w, int h)
131 {
132  fprintf(stderr, "Not implemented\n");
133 }
134 
136 {
137  if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, net.input, 1, l.output, 1);
138  copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
139  if(net.train){
140  mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
141  variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);
142 
143  scal_cpu(l.out_c, .99, l.rolling_mean, 1);
144  axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
145  scal_cpu(l.out_c, .99, l.rolling_variance, 1);
146  axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);
147 
148  normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);
149  copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
150  } else {
152  }
153  scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
154  add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
155 }
156 
158 {
159  if(!net.train){
160  l.mean = l.rolling_mean;
162  }
165 
166  scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
167 
171  if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, net.delta, 1);
172 }
173 
174 #ifdef GPU
175 
176 void pull_batchnorm_layer(layer l)
177 {
178  cuda_pull_array(l.scales_gpu, l.scales, l.c);
179  cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.c);
180  cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.c);
181 }
182 void push_batchnorm_layer(layer l)
183 {
184  cuda_push_array(l.scales_gpu, l.scales, l.c);
185  cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.c);
186  cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.c);
187 }
188 
189 void forward_batchnorm_layer_gpu(layer l, network net)
190 {
191  if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, net.input_gpu, 1, l.output_gpu, 1);
192  copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
193  if (net.train) {
194 #ifdef CUDNN
195  float one = 1;
196  float zero = 0;
197  cudnnBatchNormalizationForwardTraining(cudnn_handle(),
198  CUDNN_BATCHNORM_SPATIAL,
199  &one,
200  &zero,
201  l.dstTensorDesc,
202  l.x_gpu,
203  l.dstTensorDesc,
204  l.output_gpu,
205  l.normTensorDesc,
206  l.scales_gpu,
207  l.biases_gpu,
208  .01,
209  l.rolling_mean_gpu,
210  l.rolling_variance_gpu,
211  .00001,
212  l.mean_gpu,
213  l.variance_gpu);
214 #else
215  fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu);
216  fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu);
217 
218  scal_gpu(l.out_c, .99, l.rolling_mean_gpu, 1);
219  axpy_gpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1);
220  scal_gpu(l.out_c, .99, l.rolling_variance_gpu, 1);
221  axpy_gpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1);
222 
223  copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1);
224  normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
225  copy_gpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1);
226 
227  scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
228  add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
229 #endif
230  } else {
231  normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w);
232  scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
233  add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h);
234  }
235 
236 }
237 
238 void backward_batchnorm_layer_gpu(layer l, network net)
239 {
240  if(!net.train){
241  l.mean_gpu = l.rolling_mean_gpu;
242  l.variance_gpu = l.rolling_variance_gpu;
243  }
244 #ifdef CUDNN
245  float one = 1;
246  float zero = 0;
247  cudnnBatchNormalizationBackward(cudnn_handle(),
248  CUDNN_BATCHNORM_SPATIAL,
249  &one,
250  &zero,
251  &one,
252  &one,
253  l.dstTensorDesc,
254  l.x_gpu,
255  l.dstTensorDesc,
256  l.delta_gpu,
257  l.dstTensorDesc,
258  l.x_norm_gpu,
259  l.normTensorDesc,
260  l.scales_gpu,
261  l.scale_updates_gpu,
262  l.bias_updates_gpu,
263  .00001,
264  l.mean_gpu,
265  l.variance_gpu);
266  copy_gpu(l.outputs*l.batch, l.x_norm_gpu, 1, l.delta_gpu, 1);
267 #else
268  backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h);
269  backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu);
270 
271  scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w);
272 
273  fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu);
274  fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu);
275  normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu);
276 #endif
277  if(l.type == BATCHNORM) copy_gpu(l.outputs*l.batch, l.delta_gpu, 1, net.delta_gpu, 1);
278 }
279 #endif
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
float * scales
Definition: darknet.h:239
void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
float * mean
Definition: darknet.h:252
float * biases
Definition: darknet.h:236
void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
Definition: blas_kernels.cu:69
int w
Definition: darknet.h:140
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size)
void(* forward_gpu)(struct layer, struct network)
Definition: darknet.h:126
float * rolling_variance
Definition: darknet.h:259
void add_bias(float *output, float *biases, int batch, int n, int size)
void(* backward_gpu)(struct layer, struct network)
Definition: darknet.h:127
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
Definition: blas.c:110
float * x
Definition: darknet.h:261
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
Definition: blas_kernels.cu:50
void(* forward)(struct layer, struct network)
Definition: darknet.h:123
int out_w
Definition: darknet.h:141
void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
float * delta
Definition: darknet.h:486
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
int out_c
Definition: darknet.h:141
float * variance_delta
Definition: darknet.h:256
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
Definition: blas.c:94
void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
void resize_batchnorm_layer(layer *layer, int w, int h)
void scal_gpu(int N, float ALPHA, float *X, int INCX)
int train
Definition: darknet.h:488
int h
Definition: darknet.h:140
float * delta
Definition: darknet.h:245
int out_h
Definition: darknet.h:141
int inputs
Definition: darknet.h:134
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
void(* backward)(struct layer, struct network)
Definition: darknet.h:124
float * x_norm
Definition: darknet.h:262
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size)
Definition: blas_kernels.cu:21
int batch
Definition: darknet.h:131
float * output
Definition: darknet.h:246
void scal_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:184
float * rolling_mean
Definition: darknet.h:258
void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
Definition: blas.c:147
float * mean_delta
Definition: darknet.h:255
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
float * bias_updates
Definition: darknet.h:237
void scale_bias(float *output, float *scales, int batch, int n, int size)
void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
int c
Definition: darknet.h:140
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:226
LAYER_TYPE type
Definition: darknet.h:120
float * input
Definition: darknet.h:484
float * scale_updates
Definition: darknet.h:240
void forward_batchnorm_layer(layer l, network net)
int outputs
Definition: darknet.h:135
float * variance
Definition: darknet.h:253
void backward_batchnorm_layer(layer l, network net)
void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
layer make_batchnorm_layer(int batch, int w, int h, int c)
Definition: darknet.h:119