darknet  v3
connected_layer.c
Go to the documentation of this file.
1 #include "connected_layer.h"
2 #include "convolutional_layer.h"
3 #include "batchnorm_layer.h"
4 #include "utils.h"
5 #include "cuda.h"
6 #include "blas.h"
7 #include "gemm.h"
8 
9 #include <math.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam)
15 {
16  int i;
17  layer l = {0};
18  l.learning_rate_scale = 1;
19  l.type = CONNECTED;
20 
21  l.inputs = inputs;
22  l.outputs = outputs;
23  l.batch=batch;
24  l.batch_normalize = batch_normalize;
25  l.h = 1;
26  l.w = 1;
27  l.c = inputs;
28  l.out_h = 1;
29  l.out_w = 1;
30  l.out_c = outputs;
31 
32  l.output = calloc(batch*outputs, sizeof(float));
33  l.delta = calloc(batch*outputs, sizeof(float));
34 
35  l.weight_updates = calloc(inputs*outputs, sizeof(float));
36  l.bias_updates = calloc(outputs, sizeof(float));
37 
38  l.weights = calloc(outputs*inputs, sizeof(float));
39  l.biases = calloc(outputs, sizeof(float));
40 
44 
45  //float scale = 1./sqrt(inputs);
46  float scale = sqrt(2./inputs);
47  for(i = 0; i < outputs*inputs; ++i){
48  l.weights[i] = scale*rand_uniform(-1, 1);
49  }
50 
51  for(i = 0; i < outputs; ++i){
52  l.biases[i] = 0;
53  }
54 
55  if(adam){
56  l.m = calloc(l.inputs*l.outputs, sizeof(float));
57  l.v = calloc(l.inputs*l.outputs, sizeof(float));
58  l.bias_m = calloc(l.outputs, sizeof(float));
59  l.scale_m = calloc(l.outputs, sizeof(float));
60  l.bias_v = calloc(l.outputs, sizeof(float));
61  l.scale_v = calloc(l.outputs, sizeof(float));
62  }
63  if(batch_normalize){
64  l.scales = calloc(outputs, sizeof(float));
65  l.scale_updates = calloc(outputs, sizeof(float));
66  for(i = 0; i < outputs; ++i){
67  l.scales[i] = 1;
68  }
69 
70  l.mean = calloc(outputs, sizeof(float));
71  l.mean_delta = calloc(outputs, sizeof(float));
72  l.variance = calloc(outputs, sizeof(float));
73  l.variance_delta = calloc(outputs, sizeof(float));
74 
75  l.rolling_mean = calloc(outputs, sizeof(float));
76  l.rolling_variance = calloc(outputs, sizeof(float));
77 
78  l.x = calloc(batch*outputs, sizeof(float));
79  l.x_norm = calloc(batch*outputs, sizeof(float));
80  }
81 
82 #ifdef GPU
83  l.forward_gpu = forward_connected_layer_gpu;
84  l.backward_gpu = backward_connected_layer_gpu;
85  l.update_gpu = update_connected_layer_gpu;
86 
87  l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
88  l.biases_gpu = cuda_make_array(l.biases, outputs);
89 
90  l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs);
91  l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs);
92 
93  l.output_gpu = cuda_make_array(l.output, outputs*batch);
94  l.delta_gpu = cuda_make_array(l.delta, outputs*batch);
95  if (adam) {
96  l.m_gpu = cuda_make_array(0, inputs*outputs);
97  l.v_gpu = cuda_make_array(0, inputs*outputs);
98  l.bias_m_gpu = cuda_make_array(0, outputs);
99  l.bias_v_gpu = cuda_make_array(0, outputs);
100  l.scale_m_gpu = cuda_make_array(0, outputs);
101  l.scale_v_gpu = cuda_make_array(0, outputs);
102  }
103 
104  if(batch_normalize){
105  l.mean_gpu = cuda_make_array(l.mean, outputs);
106  l.variance_gpu = cuda_make_array(l.variance, outputs);
107 
108  l.rolling_mean_gpu = cuda_make_array(l.mean, outputs);
109  l.rolling_variance_gpu = cuda_make_array(l.variance, outputs);
110 
111  l.mean_delta_gpu = cuda_make_array(l.mean, outputs);
112  l.variance_delta_gpu = cuda_make_array(l.variance, outputs);
113 
114  l.scales_gpu = cuda_make_array(l.scales, outputs);
115  l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs);
116 
117  l.x_gpu = cuda_make_array(l.output, l.batch*outputs);
118  l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs);
119 #ifdef CUDNN
120  cudnnCreateTensorDescriptor(&l.normTensorDesc);
121  cudnnCreateTensorDescriptor(&l.dstTensorDesc);
122  cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
123  cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
124 #endif
125  }
126 #endif
127  l.activation = activation;
128  fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs);
129  return l;
130 }
131 
133 {
134  float learning_rate = a.learning_rate*l.learning_rate_scale;
135  float momentum = a.momentum;
136  float decay = a.decay;
137  int batch = a.batch;
138  axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
139  scal_cpu(l.outputs, momentum, l.bias_updates, 1);
140 
141  if(l.batch_normalize){
142  axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
143  scal_cpu(l.outputs, momentum, l.scale_updates, 1);
144  }
145 
146  axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1);
147  axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
148  scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
149 }
150 
152 {
153  fill_cpu(l.outputs*l.batch, 0, l.output, 1);
154  int m = l.batch;
155  int k = l.inputs;
156  int n = l.outputs;
157  float *a = net.input;
158  float *b = l.weights;
159  float *c = l.output;
160  gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
161  if(l.batch_normalize){
162  forward_batchnorm_layer(l, net);
163  } else {
164  add_bias(l.output, l.biases, l.batch, l.outputs, 1);
165  }
167 }
168 
170 {
172 
173  if(l.batch_normalize){
174  backward_batchnorm_layer(l, net);
175  } else {
177  }
178 
179  int m = l.outputs;
180  int k = l.batch;
181  int n = l.inputs;
182  float *a = l.delta;
183  float *b = net.input;
184  float *c = l.weight_updates;
185  gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);
186 
187  m = l.batch;
188  k = l.outputs;
189  n = l.inputs;
190 
191  a = l.delta;
192  b = l.weights;
193  c = net.delta;
194 
195  if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
196 }
197 
198 
200 {
201  int i, j;
202  for(i = 0; i < l.outputs; ++i){
203  float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001);
204  for(j = 0; j < l.inputs; ++j){
205  l.weights[i*l.inputs + j] *= scale;
206  }
207  l.biases[i] -= l.rolling_mean[i] * scale;
208  l.scales[i] = 1;
209  l.rolling_mean[i] = 0;
210  l.rolling_variance[i] = 1;
211  }
212 }
213 
214 
216 {
217  if(l.batch_normalize){
218  printf("Scales ");
220  /*
221  printf("Rolling Mean ");
222  print_statistics(l.rolling_mean, l.outputs);
223  printf("Rolling Variance ");
224  print_statistics(l.rolling_variance, l.outputs);
225  */
226  }
227  printf("Biases ");
229  printf("Weights ");
231 }
232 
233 #ifdef GPU
234 
235 void pull_connected_layer(layer l)
236 {
237  cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs);
238  cuda_pull_array(l.biases_gpu, l.biases, l.outputs);
239  cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs);
240  cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
241  if (l.batch_normalize){
242  cuda_pull_array(l.scales_gpu, l.scales, l.outputs);
243  cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs);
244  cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs);
245  }
246 }
247 
248 void push_connected_layer(layer l)
249 {
250  cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs);
251  cuda_push_array(l.biases_gpu, l.biases, l.outputs);
252  cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs);
253  cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
254  if (l.batch_normalize){
255  cuda_push_array(l.scales_gpu, l.scales, l.outputs);
256  cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs);
257  cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs);
258  }
259 }
260 
261 void update_connected_layer_gpu(layer l, update_args a)
262 {
263  float learning_rate = a.learning_rate*l.learning_rate_scale;
264  float momentum = a.momentum;
265  float decay = a.decay;
266  int batch = a.batch;
267  if(a.adam){
268  adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.inputs*l.outputs, batch, a.t);
269  adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t);
270  if(l.scales_gpu){
271  adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.outputs, batch, a.t);
272  }
273  }else{
274  axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
275  scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1);
276 
277  if(l.batch_normalize){
278  axpy_gpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
279  scal_gpu(l.outputs, momentum, l.scale_updates_gpu, 1);
280  }
281 
282  axpy_gpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
283  axpy_gpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
284  scal_gpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1);
285  }
286 }
287 
288 void forward_connected_layer_gpu(layer l, network net)
289 {
290  fill_gpu(l.outputs*l.batch, 0, l.output_gpu, 1);
291 
292  int m = l.batch;
293  int k = l.inputs;
294  int n = l.outputs;
295  float * a = net.input_gpu;
296  float * b = l.weights_gpu;
297  float * c = l.output_gpu;
298  gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
299 
300  if (l.batch_normalize) {
301  forward_batchnorm_layer_gpu(l, net);
302  } else {
303  add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1);
304  }
305  activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
306 }
307 
308 void backward_connected_layer_gpu(layer l, network net)
309 {
310  constrain_gpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
311  gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
312  if(l.batch_normalize){
313  backward_batchnorm_layer_gpu(l, net);
314  } else {
315  backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.outputs, 1);
316  }
317 
318  int m = l.outputs;
319  int k = l.batch;
320  int n = l.inputs;
321  float * a = l.delta_gpu;
322  float * b = net.input_gpu;
323  float * c = l.weight_updates_gpu;
324  gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n);
325 
326  m = l.batch;
327  k = l.outputs;
328  n = l.inputs;
329 
330  a = l.delta_gpu;
331  b = l.weights_gpu;
332  c = net.delta_gpu;
333 
334  if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
335 }
336 #endif
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
void update_connected_layer(layer l, update_args a)
float momentum
Definition: darknet.h:104
ACTIVATION activation
Definition: darknet.h:121
ACTIVATION
Definition: darknet.h:56
float * scales
Definition: darknet.h:239
float * mean
Definition: darknet.h:252
float * biases
Definition: darknet.h:236
void statistics_connected_layer(layer l)
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
Definition: blas_kernels.cu:69
float * weight_updates
Definition: darknet.h:243
int w
Definition: darknet.h:140
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size)
void(* update)(struct layer, update_args)
Definition: darknet.h:125
void(* forward_gpu)(struct layer, struct network)
Definition: darknet.h:126
float * scale_v
Definition: darknet.h:270
float learning_rate
Definition: darknet.h:103
float * rolling_variance
Definition: darknet.h:259
void add_bias(float *output, float *biases, int batch, int n, int size)
void(* backward_gpu)(struct layer, struct network)
Definition: darknet.h:127
float * x
Definition: darknet.h:261
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* update_gpu)(struct layer, update_args)
Definition: darknet.h:128
float decay
Definition: darknet.h:105
void(* forward)(struct layer, struct network)
Definition: darknet.h:123
int out_w
Definition: darknet.h:141
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
Definition: activations.c:143
float * delta
Definition: darknet.h:486
float B1
Definition: darknet.h:107
int out_c
Definition: darknet.h:141
void fill_gpu(int N, float ALPHA, float *X, int INCX)
void print_statistics(float *a, int n)
Definition: utils.c:507
float * variance_delta
Definition: darknet.h:256
float * v
Definition: darknet.h:265
void constrain_gpu(int N, float ALPHA, float *X, int INCX)
void forward_connected_layer(layer l, network net)
int batch_normalize
Definition: darknet.h:129
void fill_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:190
int batch
Definition: darknet.h:102
void scal_gpu(int N, float ALPHA, float *X, int INCX)
float * bias_m
Definition: darknet.h:267
int h
Definition: darknet.h:140
float * delta
Definition: darknet.h:245
int out_h
Definition: darknet.h:141
void gemm(int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
Definition: gemm.c:65
int inputs
Definition: darknet.h:134
int adam
Definition: darknet.h:106
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
void(* backward)(struct layer, struct network)
Definition: darknet.h:124
float * x_norm
Definition: darknet.h:262
int batch
Definition: darknet.h:131
float * output
Definition: darknet.h:246
void scal_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:184
float * rolling_mean
Definition: darknet.h:258
float * mean_delta
Definition: darknet.h:255
float * bias_updates
Definition: darknet.h:237
float learning_rate_scale
Definition: darknet.h:168
int c
Definition: darknet.h:140
void activate_array(float *x, const int n, const ACTIVATION a)
Definition: activations.c:100
float eps
Definition: darknet.h:109
float B2
Definition: darknet.h:108
layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam)
LAYER_TYPE type
Definition: darknet.h:120
float * scale_m
Definition: darknet.h:269
float * input
Definition: darknet.h:484
float * scale_updates
Definition: darknet.h:240
void activate_array_gpu(float *x, int n, ACTIVATION a)
void forward_batchnorm_layer(layer l, network net)
int outputs
Definition: darknet.h:135
float * m
Definition: darknet.h:264
float * variance
Definition: darknet.h:253
void backward_batchnorm_layer(layer l, network net)
float rand_uniform(float min, float max)
Definition: utils.c:698
void denormalize_connected_layer(layer l)
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
void backward_connected_layer(layer l, network net)
float * bias_v
Definition: darknet.h:268
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)
Definition: darknet.h:119
float * weights
Definition: darknet.h:242