darknet  v3
local_layer.c
Go to the documentation of this file.
1 #include "local_layer.h"
2 #include "utils.h"
3 #include "im2col.h"
4 #include "col2im.h"
5 #include "blas.h"
6 #include "gemm.h"
7 #include <stdio.h>
8 #include <time.h>
9 
11 {
12  int h = l.h;
13  if (!l.pad) h -= l.size;
14  else h -= 1;
15  return h/l.stride + 1;
16 }
17 
19 {
20  int w = l.w;
21  if (!l.pad) w -= l.size;
22  else w -= 1;
23  return w/l.stride + 1;
24 }
25 
26 local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
27 {
28  int i;
29  local_layer l = {0};
30  l.type = LOCAL;
31 
32  l.h = h;
33  l.w = w;
34  l.c = c;
35  l.n = n;
36  l.batch = batch;
37  l.stride = stride;
38  l.size = size;
39  l.pad = pad;
40 
41  int out_h = local_out_height(l);
42  int out_w = local_out_width(l);
43  int locations = out_h*out_w;
44  l.out_h = out_h;
45  l.out_w = out_w;
46  l.out_c = n;
47  l.outputs = l.out_h * l.out_w * l.out_c;
48  l.inputs = l.w * l.h * l.c;
49 
50  l.weights = calloc(c*n*size*size*locations, sizeof(float));
51  l.weight_updates = calloc(c*n*size*size*locations, sizeof(float));
52 
53  l.biases = calloc(l.outputs, sizeof(float));
54  l.bias_updates = calloc(l.outputs, sizeof(float));
55 
56  // float scale = 1./sqrt(size*size*c);
57  float scale = sqrt(2./(size*size*c));
58  for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1);
59 
60  l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
61  l.delta = calloc(l.batch*out_h * out_w * n, sizeof(float));
62 
63  l.workspace_size = out_h*out_w*size*size*c;
64 
68 
69 #ifdef GPU
70  l.forward_gpu = forward_local_layer_gpu;
71  l.backward_gpu = backward_local_layer_gpu;
72  l.update_gpu = update_local_layer_gpu;
73 
74  l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations);
75  l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations);
76 
77  l.biases_gpu = cuda_make_array(l.biases, l.outputs);
78  l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs);
79 
80  l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
81  l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
82 
83 #endif
84  l.activation = activation;
85 
86  fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
87 
88  return l;
89 }
90 
92 {
93  int out_h = local_out_height(l);
94  int out_w = local_out_width(l);
95  int i, j;
96  int locations = out_h * out_w;
97 
98  for(i = 0; i < l.batch; ++i){
99  copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1);
100  }
101 
102  for(i = 0; i < l.batch; ++i){
103  float *input = net.input + i*l.w*l.h*l.c;
104  im2col_cpu(input, l.c, l.h, l.w,
105  l.size, l.stride, l.pad, net.workspace);
106  float *output = l.output + i*l.outputs;
107  for(j = 0; j < locations; ++j){
108  float *a = l.weights + j*l.size*l.size*l.c*l.n;
109  float *b = net.workspace + j;
110  float *c = output + j;
111 
112  int m = l.n;
113  int n = 1;
114  int k = l.size*l.size*l.c;
115 
116  gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations);
117  }
118  }
120 }
121 
123 {
124  int i, j;
125  int locations = l.out_w*l.out_h;
126 
128 
129  for(i = 0; i < l.batch; ++i){
130  axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
131  }
132 
133  for(i = 0; i < l.batch; ++i){
134  float *input = net.input + i*l.w*l.h*l.c;
135  im2col_cpu(input, l.c, l.h, l.w,
136  l.size, l.stride, l.pad, net.workspace);
137 
138  for(j = 0; j < locations; ++j){
139  float *a = l.delta + i*l.outputs + j;
140  float *b = net.workspace + j;
141  float *c = l.weight_updates + j*l.size*l.size*l.c*l.n;
142  int m = l.n;
143  int n = l.size*l.size*l.c;
144  int k = 1;
145 
146  gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
147  }
148 
149  if(net.delta){
150  for(j = 0; j < locations; ++j){
151  float *a = l.weights + j*l.size*l.size*l.c*l.n;
152  float *b = l.delta + i*l.outputs + j;
153  float *c = net.workspace + j;
154 
155  int m = l.size*l.size*l.c;
156  int n = 1;
157  int k = l.n;
158 
159  gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
160  }
161 
162  col2im_cpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta+i*l.c*l.h*l.w);
163  }
164  }
165 }
166 
168 {
169  float learning_rate = a.learning_rate*l.learning_rate_scale;
170  float momentum = a.momentum;
171  float decay = a.decay;
172  int batch = a.batch;
173 
174  int locations = l.out_w*l.out_h;
175  int size = l.size*l.size*l.c*l.n*locations;
176  axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
177  scal_cpu(l.outputs, momentum, l.bias_updates, 1);
178 
179  axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
180  axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
181  scal_cpu(size, momentum, l.weight_updates, 1);
182 }
183 
184 #ifdef GPU
185 
186 void forward_local_layer_gpu(const local_layer l, network net)
187 {
188  int out_h = local_out_height(l);
189  int out_w = local_out_width(l);
190  int i, j;
191  int locations = out_h * out_w;
192 
193  for(i = 0; i < l.batch; ++i){
194  copy_gpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
195  }
196 
197  for(i = 0; i < l.batch; ++i){
198  float *input = net.input_gpu + i*l.w*l.h*l.c;
199  im2col_gpu(input, l.c, l.h, l.w,
200  l.size, l.stride, l.pad, net.workspace);
201  float *output = l.output_gpu + i*l.outputs;
202  for(j = 0; j < locations; ++j){
203  float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
204  float *b = net.workspace + j;
205  float *c = output + j;
206 
207  int m = l.n;
208  int n = 1;
209  int k = l.size*l.size*l.c;
210 
211  gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations);
212  }
213  }
214  activate_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation);
215 }
216 
217 void backward_local_layer_gpu(local_layer l, network net)
218 {
219  int i, j;
220  int locations = l.out_w*l.out_h;
221 
222  gradient_array_gpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
223  for(i = 0; i < l.batch; ++i){
224  axpy_gpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1);
225  }
226 
227  for(i = 0; i < l.batch; ++i){
228  float *input = net.input_gpu + i*l.w*l.h*l.c;
229  im2col_gpu(input, l.c, l.h, l.w,
230  l.size, l.stride, l.pad, net.workspace);
231 
232  for(j = 0; j < locations; ++j){
233  float *a = l.delta_gpu + i*l.outputs + j;
234  float *b = net.workspace + j;
235  float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n;
236  int m = l.n;
237  int n = l.size*l.size*l.c;
238  int k = 1;
239 
240  gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
241  }
242 
243  if(net.delta_gpu){
244  for(j = 0; j < locations; ++j){
245  float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n;
246  float *b = l.delta_gpu + i*l.outputs + j;
247  float *c = net.workspace + j;
248 
249  int m = l.size*l.size*l.c;
250  int n = 1;
251  int k = l.n;
252 
253  gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
254  }
255 
256  col2im_gpu(net.workspace, l.c, l.h, l.w, l.size, l.stride, l.pad, net.delta_gpu+i*l.c*l.h*l.w);
257  }
258  }
259 }
260 
261 void update_local_layer_gpu(local_layer l, update_args a)
262 {
263  float learning_rate = a.learning_rate*l.learning_rate_scale;
264  float momentum = a.momentum;
265  float decay = a.decay;
266  int batch = a.batch;
267 
268  int locations = l.out_w*l.out_h;
269  int size = l.size*l.size*l.c*l.n*locations;
270  axpy_gpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
271  scal_gpu(l.outputs, momentum, l.bias_updates_gpu, 1);
272 
273  axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
274  axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
275  scal_gpu(size, momentum, l.weight_updates_gpu, 1);
276 }
277 
278 void pull_local_layer(local_layer l)
279 {
280  int locations = l.out_w*l.out_h;
281  int size = l.size*l.size*l.c*l.n*locations;
282  cuda_pull_array(l.weights_gpu, l.weights, size);
283  cuda_pull_array(l.biases_gpu, l.biases, l.outputs);
284 }
285 
286 void push_local_layer(local_layer l)
287 {
288  int locations = l.out_w*l.out_h;
289  int size = l.size*l.size*l.c*l.n*locations;
290  cuda_push_array(l.weights_gpu, l.weights, size);
291  cuda_push_array(l.biases_gpu, l.biases, l.outputs);
292 }
293 #endif
size_t workspace_size
Definition: darknet.h:336
float momentum
Definition: darknet.h:104
ACTIVATION activation
Definition: darknet.h:121
ACTIVATION
Definition: darknet.h:56
float * biases
Definition: darknet.h:236
void backward_local_layer(local_layer l, network net)
Definition: local_layer.c:122
float * weight_updates
Definition: darknet.h:243
int w
Definition: darknet.h:140
int pad
Definition: darknet.h:151
int n
Definition: darknet.h:142
int local_out_width(local_layer l)
Definition: local_layer.c:18
void col2im_gpu(float *data_col, int channels, int height, int width, int ksize, int stride, int pad, float *data_im)
void(* update)(struct layer, update_args)
Definition: darknet.h:125
void(* forward_gpu)(struct layer, struct network)
Definition: darknet.h:126
float learning_rate
Definition: darknet.h:103
int local_out_height(local_layer l)
Definition: local_layer.c:10
void(* backward_gpu)(struct layer, struct network)
Definition: darknet.h:127
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* update_gpu)(struct layer, update_args)
Definition: darknet.h:128
float decay
Definition: darknet.h:105
void im2col_gpu(float *im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
void(* forward)(struct layer, struct network)
Definition: darknet.h:123
int out_w
Definition: darknet.h:141
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
Definition: activations.c:143
float * delta
Definition: darknet.h:486
int out_c
Definition: darknet.h:141
float * workspace
Definition: darknet.h:487
int size
Definition: darknet.h:145
int batch
Definition: darknet.h:102
void scal_gpu(int N, float ALPHA, float *X, int INCX)
int h
Definition: darknet.h:140
float * delta
Definition: darknet.h:245
int out_h
Definition: darknet.h:141
void gemm(int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
Definition: gemm.c:65
int inputs
Definition: darknet.h:134
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
void(* backward)(struct layer, struct network)
Definition: darknet.h:124
Definition: darknet.h:77
int batch
Definition: darknet.h:131
float * output
Definition: darknet.h:246
void scal_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:184
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
void im2col_cpu(float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
Definition: im2col.c:16
float * bias_updates
Definition: darknet.h:237
float learning_rate_scale
Definition: darknet.h:168
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
Definition: local_layer.c:26
void forward_local_layer(const local_layer l, network net)
Definition: local_layer.c:91
int stride
Definition: darknet.h:147
int c
Definition: darknet.h:140
void activate_array(float *x, const int n, const ACTIVATION a)
Definition: activations.c:100
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:226
LAYER_TYPE type
Definition: darknet.h:120
float * input
Definition: darknet.h:484
void activate_array_gpu(float *x, int n, ACTIVATION a)
int outputs
Definition: darknet.h:135
void col2im_cpu(float *data_col, int channels, int height, int width, int ksize, int stride, int pad, float *data_im)
Definition: col2im.c:14
float rand_uniform(float min, float max)
Definition: utils.c:698
void update_local_layer(local_layer l, update_args a)
Definition: local_layer.c:167
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)
Definition: darknet.h:119
float * weights
Definition: darknet.h:242