darknet  v3
deconvolutional_layer.c
Go to the documentation of this file.
2 #include "convolutional_layer.h"
3 #include "batchnorm_layer.h"
4 #include "utils.h"
5 #include "im2col.h"
6 #include "col2im.h"
7 #include "blas.h"
8 #include "gemm.h"
9 
10 #include <stdio.h>
11 #include <time.h>
12 
13 
14 static size_t get_workspace_size(layer l){
15  return (size_t)l.h*l.w*l.size*l.size*l.n*sizeof(float);
16 }
17 
19 {
20  int i,j,f;
21  float center = (l.size-1) / 2.;
22  for(f = 0; f < l.n; ++f){
23  for(j = 0; j < l.size; ++j){
24  for(i = 0; i < l.size; ++i){
25  float val = (1 - fabs(i - center)) * (1 - fabs(j - center));
26  int c = f%l.c;
27  int ind = f*l.size*l.size*l.c + c*l.size*l.size + j*l.size + i;
28  l.weights[ind] = val;
29  }
30  }
31  }
32 }
33 
34 
35 layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam)
36 {
37  int i;
38  layer l = {0};
40 
41  l.h = h;
42  l.w = w;
43  l.c = c;
44  l.n = n;
45  l.batch = batch;
46  l.stride = stride;
47  l.size = size;
48 
49  l.nweights = c*n*size*size;
50  l.nbiases = n;
51 
52  l.weights = calloc(c*n*size*size, sizeof(float));
53  l.weight_updates = calloc(c*n*size*size, sizeof(float));
54 
55  l.biases = calloc(n, sizeof(float));
56  l.bias_updates = calloc(n, sizeof(float));
57  //float scale = n/(size*size*c);
58  //printf("scale: %f\n", scale);
59  float scale = .02;
60  for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
61  //bilinear_init(l);
62  for(i = 0; i < n; ++i){
63  l.biases[i] = 0;
64  }
65  l.pad = padding;
66 
67  l.out_h = (l.h - 1) * l.stride + l.size - 2*l.pad;
68  l.out_w = (l.w - 1) * l.stride + l.size - 2*l.pad;
69  l.out_c = n;
70  l.outputs = l.out_w * l.out_h * l.out_c;
71  l.inputs = l.w * l.h * l.c;
72 
73  scal_cpu(l.nweights, (float)l.out_w*l.out_h/(l.w*l.h), l.weights, 1);
74 
75  l.output = calloc(l.batch*l.outputs, sizeof(float));
76  l.delta = calloc(l.batch*l.outputs, sizeof(float));
77 
81 
82  l.batch_normalize = batch_normalize;
83 
84  if(batch_normalize){
85  l.scales = calloc(n, sizeof(float));
86  l.scale_updates = calloc(n, sizeof(float));
87  for(i = 0; i < n; ++i){
88  l.scales[i] = 1;
89  }
90 
91  l.mean = calloc(n, sizeof(float));
92  l.variance = calloc(n, sizeof(float));
93 
94  l.mean_delta = calloc(n, sizeof(float));
95  l.variance_delta = calloc(n, sizeof(float));
96 
97  l.rolling_mean = calloc(n, sizeof(float));
98  l.rolling_variance = calloc(n, sizeof(float));
99  l.x = calloc(l.batch*l.outputs, sizeof(float));
100  l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
101  }
102  if(adam){
103  l.m = calloc(c*n*size*size, sizeof(float));
104  l.v = calloc(c*n*size*size, sizeof(float));
105  l.bias_m = calloc(n, sizeof(float));
106  l.scale_m = calloc(n, sizeof(float));
107  l.bias_v = calloc(n, sizeof(float));
108  l.scale_v = calloc(n, sizeof(float));
109  }
110 
111 #ifdef GPU
115 
116  if(gpu_index >= 0){
117 
118  if (adam) {
119  l.m_gpu = cuda_make_array(l.m, c*n*size*size);
120  l.v_gpu = cuda_make_array(l.v, c*n*size*size);
121  l.bias_m_gpu = cuda_make_array(l.bias_m, n);
122  l.bias_v_gpu = cuda_make_array(l.bias_v, n);
123  l.scale_m_gpu = cuda_make_array(l.scale_m, n);
124  l.scale_v_gpu = cuda_make_array(l.scale_v, n);
125  }
126  l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
127  l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
128 
129  l.biases_gpu = cuda_make_array(l.biases, n);
130  l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
131 
132  l.delta_gpu = cuda_make_array(l.delta, l.batch*l.out_h*l.out_w*n);
133  l.output_gpu = cuda_make_array(l.output, l.batch*l.out_h*l.out_w*n);
134 
135  if(batch_normalize){
136  l.mean_gpu = cuda_make_array(0, n);
137  l.variance_gpu = cuda_make_array(0, n);
138 
139  l.rolling_mean_gpu = cuda_make_array(0, n);
140  l.rolling_variance_gpu = cuda_make_array(0, n);
141 
142  l.mean_delta_gpu = cuda_make_array(0, n);
143  l.variance_delta_gpu = cuda_make_array(0, n);
144 
145  l.scales_gpu = cuda_make_array(l.scales, n);
146  l.scale_updates_gpu = cuda_make_array(0, n);
147 
148  l.x_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
149  l.x_norm_gpu = cuda_make_array(0, l.batch*l.out_h*l.out_w*n);
150  }
151  }
152  #ifdef CUDNN
153  cudnnCreateTensorDescriptor(&l.dstTensorDesc);
154  cudnnCreateTensorDescriptor(&l.normTensorDesc);
155  cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w);
156  cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.out_c, 1, 1);
157  #endif
158 #endif
159 
160  l.activation = activation;
161  l.workspace_size = get_workspace_size(l);
162 
163  fprintf(stderr, "deconv%5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
164 
165  return l;
166 }
167 
169 {
170  int i, j;
171  for(i = 0; i < l.n; ++i){
172  float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
173  for(j = 0; j < l.c*l.size*l.size; ++j){
174  l.weights[i*l.c*l.size*l.size + j] *= scale;
175  }
176  l.biases[i] -= l.rolling_mean[i] * scale;
177  l.scales[i] = 1;
178  l.rolling_mean[i] = 0;
179  l.rolling_variance[i] = 1;
180  }
181 }
182 
183 void resize_deconvolutional_layer(layer *l, int h, int w)
184 {
185  l->h = h;
186  l->w = w;
187  l->out_h = (l->h - 1) * l->stride + l->size - 2*l->pad;
188  l->out_w = (l->w - 1) * l->stride + l->size - 2*l->pad;
189 
190  l->outputs = l->out_h * l->out_w * l->out_c;
191  l->inputs = l->w * l->h * l->c;
192 
193  l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
194  l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
195  if(l->batch_normalize){
196  l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
197  l->x_norm = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
198  }
199 
200 #ifdef GPU
201  cuda_free(l->delta_gpu);
202  cuda_free(l->output_gpu);
203 
204  l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
205  l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
206 
207  if(l->batch_normalize){
208  cuda_free(l->x_gpu);
209  cuda_free(l->x_norm_gpu);
210 
211  l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
212  l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
213  }
214  #ifdef CUDNN
215  cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
216  cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
217  #endif
218 #endif
219  l->workspace_size = get_workspace_size(*l);
220 }
221 
223 {
224  int i;
225 
226  int m = l.size*l.size*l.n;
227  int n = l.h*l.w;
228  int k = l.c;
229 
230  fill_cpu(l.outputs*l.batch, 0, l.output, 1);
231 
232  for(i = 0; i < l.batch; ++i){
233  float *a = l.weights;
234  float *b = net.input + i*l.c*l.h*l.w;
235  float *c = net.workspace;
236 
237  gemm_cpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
238 
239  col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output+i*l.outputs);
240  }
241  if (l.batch_normalize) {
242  forward_batchnorm_layer(l, net);
243  } else {
244  add_bias(l.output, l.biases, l.batch, l.n, l.out_w*l.out_h);
245  }
247 }
248 
250 {
251  int i;
252 
254 
255  if(l.batch_normalize){
256  backward_batchnorm_layer(l, net);
257  } else {
258  backward_bias(l.bias_updates, l.delta, l.batch, l.n, l.out_w*l.out_h);
259  }
260 
261  //if(net.delta) memset(net.delta, 0, l.batch*l.h*l.w*l.c*sizeof(float));
262 
263  for(i = 0; i < l.batch; ++i){
264  int m = l.c;
265  int n = l.size*l.size*l.n;
266  int k = l.h*l.w;
267 
268  float *a = net.input + i*m*k;
269  float *b = net.workspace;
270  float *c = l.weight_updates;
271 
272  im2col_cpu(l.delta + i*l.outputs, l.out_c, l.out_h, l.out_w,
273  l.size, l.stride, l.pad, b);
274  gemm_cpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
275 
276  if(net.delta){
277  int m = l.c;
278  int n = l.h*l.w;
279  int k = l.size*l.size*l.n;
280 
281  float *a = l.weights;
282  float *b = net.workspace;
283  float *c = net.delta + i*n*m;
284 
285  gemm_cpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
286  }
287  }
288 }
289 
291 {
292  float learning_rate = a.learning_rate*l.learning_rate_scale;
293  float momentum = a.momentum;
294  float decay = a.decay;
295  int batch = a.batch;
296 
297  int size = l.size*l.size*l.c*l.n;
298  axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
299  scal_cpu(l.n, momentum, l.bias_updates, 1);
300 
301  if(l.scales){
302  axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
303  scal_cpu(l.n, momentum, l.scale_updates, 1);
304  }
305 
306  axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
307  axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
308  scal_cpu(size, momentum, l.weight_updates, 1);
309 }
310 
311 
312 
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
size_t workspace_size
Definition: darknet.h:336
float momentum
Definition: darknet.h:104
ACTIVATION activation
Definition: darknet.h:121
ACTIVATION
Definition: darknet.h:56
float * scales
Definition: darknet.h:239
float * mean
Definition: darknet.h:252
float * biases
Definition: darknet.h:236
float * weight_updates
Definition: darknet.h:243
int w
Definition: darknet.h:140
void update_deconvolutional_layer(layer l, update_args a)
int pad
Definition: darknet.h:151
int n
Definition: darknet.h:142
void(* update)(struct layer, update_args)
Definition: darknet.h:125
void(* forward_gpu)(struct layer, struct network)
Definition: darknet.h:126
float * scale_v
Definition: darknet.h:270
float learning_rate
Definition: darknet.h:103
float * rolling_variance
Definition: darknet.h:259
void denormalize_deconvolutional_layer(layer l)
void forward_deconvolutional_layer(const layer l, network net)
void add_bias(float *output, float *biases, int batch, int n, int size)
void(* backward_gpu)(struct layer, struct network)
Definition: darknet.h:127
float * x
Definition: darknet.h:261
void(* update_gpu)(struct layer, update_args)
Definition: darknet.h:128
float decay
Definition: darknet.h:105
void(* forward)(struct layer, struct network)
Definition: darknet.h:123
int out_w
Definition: darknet.h:141
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
Definition: activations.c:143
float * delta
Definition: darknet.h:486
int nweights
Definition: darknet.h:136
int out_c
Definition: darknet.h:141
float * workspace
Definition: darknet.h:487
float * variance_delta
Definition: darknet.h:256
void backward_deconvolutional_layer_gpu(layer l, network net)
float * v
Definition: darknet.h:265
void update_deconvolutional_layer_gpu(layer l, update_args a)
int batch_normalize
Definition: darknet.h:129
void fill_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:190
int size
Definition: darknet.h:145
int batch
Definition: darknet.h:102
float * bias_m
Definition: darknet.h:267
int h
Definition: darknet.h:140
float * delta
Definition: darknet.h:245
int out_h
Definition: darknet.h:141
int inputs
Definition: darknet.h:134
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
void(* backward)(struct layer, struct network)
Definition: darknet.h:124
float * x_norm
Definition: darknet.h:262
int batch
Definition: darknet.h:131
void bilinear_init(layer l)
float * output
Definition: darknet.h:246
void scal_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:184
float * rolling_mean
Definition: darknet.h:258
float * mean_delta
Definition: darknet.h:255
void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
Definition: gemm.c:145
layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int adam)
void im2col_cpu(float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
Definition: im2col.c:16
float rand_normal()
Definition: utils.c:654
float * bias_updates
Definition: darknet.h:237
void resize_deconvolutional_layer(layer *l, int h, int w)
float learning_rate_scale
Definition: darknet.h:168
int stride
Definition: darknet.h:147
int c
Definition: darknet.h:140
void activate_array(float *x, const int n, const ACTIVATION a)
Definition: activations.c:100
void forward_deconvolutional_layer_gpu(layer l, network net)
void backward_deconvolutional_layer(layer l, network net)
int gpu_index
Definition: cuda.c:1
LAYER_TYPE type
Definition: darknet.h:120
float * scale_m
Definition: darknet.h:269
float * input
Definition: darknet.h:484
float * scale_updates
Definition: darknet.h:240
void forward_batchnorm_layer(layer l, network net)
int outputs
Definition: darknet.h:135
int nbiases
Definition: darknet.h:137
float * m
Definition: darknet.h:264
float * variance
Definition: darknet.h:253
void col2im_cpu(float *data_col, int channels, int height, int width, int ksize, int stride, int pad, float *data_im)
Definition: col2im.c:14
void backward_batchnorm_layer(layer l, network net)
float * bias_v
Definition: darknet.h:268
Definition: darknet.h:119
float * weights
Definition: darknet.h:242