darknet  v3
rnn_layer.c
Go to the documentation of this file.
1 #include "rnn_layer.h"
2 #include "connected_layer.h"
3 #include "utils.h"
4 #include "cuda.h"
5 #include "blas.h"
6 #include "gemm.h"
7 
8 #include <math.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 static void increment_layer(layer *l, int steps)
14 {
15  int num = l->outputs*l->batch*steps;
16  l->output += num;
17  l->delta += num;
18  l->x += num;
19  l->x_norm += num;
20 
21 #ifdef GPU
22  l->output_gpu += num;
23  l->delta_gpu += num;
24  l->x_gpu += num;
25  l->x_norm_gpu += num;
26 #endif
27 }
28 
29 layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam)
30 {
31  fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs);
32  batch = batch / steps;
33  layer l = {0};
34  l.batch = batch;
35  l.type = RNN;
36  l.steps = steps;
37  l.inputs = inputs;
38 
39  l.state = calloc(batch*outputs, sizeof(float));
40  l.prev_state = calloc(batch*outputs, sizeof(float));
41 
42  l.input_layer = malloc(sizeof(layer));
43  fprintf(stderr, "\t\t");
44  *(l.input_layer) = make_connected_layer(batch*steps, inputs, outputs, activation, batch_normalize, adam);
45  l.input_layer->batch = batch;
46 
47  l.self_layer = malloc(sizeof(layer));
48  fprintf(stderr, "\t\t");
49  *(l.self_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam);
50  l.self_layer->batch = batch;
51 
52  l.output_layer = malloc(sizeof(layer));
53  fprintf(stderr, "\t\t");
54  *(l.output_layer) = make_connected_layer(batch*steps, outputs, outputs, activation, batch_normalize, adam);
55  l.output_layer->batch = batch;
56 
57  l.outputs = outputs;
58  l.output = l.output_layer->output;
59  l.delta = l.output_layer->delta;
60 
64 #ifdef GPU
65  l.forward_gpu = forward_rnn_layer_gpu;
66  l.backward_gpu = backward_rnn_layer_gpu;
67  l.update_gpu = update_rnn_layer_gpu;
68  l.state_gpu = cuda_make_array(0, batch*outputs);
69  l.prev_state_gpu = cuda_make_array(0, batch*outputs);
70  l.output_gpu = l.output_layer->output_gpu;
71  l.delta_gpu = l.output_layer->delta_gpu;
72 #ifdef CUDNN
73  cudnnSetTensor4dDescriptor(l.input_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.input_layer->out_c, l.input_layer->out_h, l.input_layer->out_w);
74  cudnnSetTensor4dDescriptor(l.self_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.self_layer->out_c, l.self_layer->out_h, l.self_layer->out_w);
75  cudnnSetTensor4dDescriptor(l.output_layer->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.output_layer->out_c, l.output_layer->out_h, l.output_layer->out_w);
76 #endif
77 #endif
78 
79  return l;
80 }
81 
83 {
87 }
88 
90 {
91  network s = net;
92  s.train = net.train;
93  int i;
94  layer input_layer = *(l.input_layer);
95  layer self_layer = *(l.self_layer);
96  layer output_layer = *(l.output_layer);
97 
98  fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
99  fill_cpu(l.outputs * l.batch * l.steps, 0, self_layer.delta, 1);
100  fill_cpu(l.outputs * l.batch * l.steps, 0, input_layer.delta, 1);
101  if(net.train) fill_cpu(l.outputs * l.batch, 0, l.state, 1);
102 
103  for (i = 0; i < l.steps; ++i) {
104  s.input = net.input;
105  forward_connected_layer(input_layer, s);
106 
107  s.input = l.state;
108  forward_connected_layer(self_layer, s);
109 
110  float *old_state = l.state;
111  if(net.train) l.state += l.outputs*l.batch;
112  if(l.shortcut){
113  copy_cpu(l.outputs * l.batch, old_state, 1, l.state, 1);
114  }else{
115  fill_cpu(l.outputs * l.batch, 0, l.state, 1);
116  }
117  axpy_cpu(l.outputs * l.batch, 1, input_layer.output, 1, l.state, 1);
118  axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1);
119 
120  s.input = l.state;
121  forward_connected_layer(output_layer, s);
122 
123  net.input += l.inputs*l.batch;
124  increment_layer(&input_layer, 1);
125  increment_layer(&self_layer, 1);
126  increment_layer(&output_layer, 1);
127  }
128 }
129 
131 {
132  network s = net;
133  s.train = net.train;
134  int i;
135  layer input_layer = *(l.input_layer);
136  layer self_layer = *(l.self_layer);
137  layer output_layer = *(l.output_layer);
138 
139  increment_layer(&input_layer, l.steps-1);
140  increment_layer(&self_layer, l.steps-1);
141  increment_layer(&output_layer, l.steps-1);
142 
143  l.state += l.outputs*l.batch*l.steps;
144  for (i = l.steps-1; i >= 0; --i) {
145  copy_cpu(l.outputs * l.batch, input_layer.output, 1, l.state, 1);
146  axpy_cpu(l.outputs * l.batch, 1, self_layer.output, 1, l.state, 1);
147 
148  s.input = l.state;
149  s.delta = self_layer.delta;
150  backward_connected_layer(output_layer, s);
151 
152  l.state -= l.outputs*l.batch;
153  /*
154  if(i > 0){
155  copy_cpu(l.outputs * l.batch, input_layer.output - l.outputs*l.batch, 1, l.state, 1);
156  axpy_cpu(l.outputs * l.batch, 1, self_layer.output - l.outputs*l.batch, 1, l.state, 1);
157  }else{
158  fill_cpu(l.outputs * l.batch, 0, l.state, 1);
159  }
160  */
161 
162  s.input = l.state;
163  s.delta = self_layer.delta - l.outputs*l.batch;
164  if (i == 0) s.delta = 0;
165  backward_connected_layer(self_layer, s);
166 
167  copy_cpu(l.outputs*l.batch, self_layer.delta, 1, input_layer.delta, 1);
168  if (i > 0 && l.shortcut) axpy_cpu(l.outputs*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.outputs*l.batch, 1);
169  s.input = net.input + i*l.inputs*l.batch;
170  if(net.delta) s.delta = net.delta + i*l.inputs*l.batch;
171  else s.delta = 0;
172  backward_connected_layer(input_layer, s);
173 
174  increment_layer(&input_layer, -1);
175  increment_layer(&self_layer, -1);
176  increment_layer(&output_layer, -1);
177  }
178 }
179 
180 #ifdef GPU
181 
182 void pull_rnn_layer(layer l)
183 {
184  pull_connected_layer(*(l.input_layer));
185  pull_connected_layer(*(l.self_layer));
186  pull_connected_layer(*(l.output_layer));
187 }
188 
189 void push_rnn_layer(layer l)
190 {
191  push_connected_layer(*(l.input_layer));
192  push_connected_layer(*(l.self_layer));
193  push_connected_layer(*(l.output_layer));
194 }
195 
196 void update_rnn_layer_gpu(layer l, update_args a)
197 {
198  update_connected_layer_gpu(*(l.input_layer), a);
199  update_connected_layer_gpu(*(l.self_layer), a);
200  update_connected_layer_gpu(*(l.output_layer), a);
201 }
202 
203 void forward_rnn_layer_gpu(layer l, network net)
204 {
205  network s = {0};
206  s.train = net.train;
207  int i;
208  layer input_layer = *(l.input_layer);
209  layer self_layer = *(l.self_layer);
210  layer output_layer = *(l.output_layer);
211 
212  fill_gpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
213  fill_gpu(l.outputs * l.batch * l.steps, 0, self_layer.delta_gpu, 1);
214  fill_gpu(l.outputs * l.batch * l.steps, 0, input_layer.delta_gpu, 1);
215 
216  if(net.train) {
217  fill_gpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1);
218  copy_gpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1);
219  }
220 
221  for (i = 0; i < l.steps; ++i) {
222  s.input_gpu = net.input_gpu;
223  forward_connected_layer_gpu(input_layer, s);
224 
225  s.input_gpu = l.state_gpu;
226  forward_connected_layer_gpu(self_layer, s);
227 
228  fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1);
229  axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
230  axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
231 
232  s.input_gpu = l.state_gpu;
233  forward_connected_layer_gpu(output_layer, s);
234 
235  net.input_gpu += l.inputs*l.batch;
236  increment_layer(&input_layer, 1);
237  increment_layer(&self_layer, 1);
238  increment_layer(&output_layer, 1);
239  }
240 }
241 
242 void backward_rnn_layer_gpu(layer l, network net)
243 {
244  network s = {0};
245  s.train = net.train;
246  int i;
247  layer input_layer = *(l.input_layer);
248  layer self_layer = *(l.self_layer);
249  layer output_layer = *(l.output_layer);
250  increment_layer(&input_layer, l.steps - 1);
251  increment_layer(&self_layer, l.steps - 1);
252  increment_layer(&output_layer, l.steps - 1);
253  float *last_input = input_layer.output_gpu;
254  float *last_self = self_layer.output_gpu;
255  for (i = l.steps-1; i >= 0; --i) {
256  fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1);
257  axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
258  axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
259 
260  s.input_gpu = l.state_gpu;
261  s.delta_gpu = self_layer.delta_gpu;
262  backward_connected_layer_gpu(output_layer, s);
263 
264  if(i != 0) {
265  fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1);
266  axpy_gpu(l.outputs * l.batch, 1, input_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1);
267  axpy_gpu(l.outputs * l.batch, 1, self_layer.output_gpu - l.outputs*l.batch, 1, l.state_gpu, 1);
268  }else {
269  copy_gpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.state_gpu, 1);
270  }
271 
272  copy_gpu(l.outputs*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
273 
274  s.input_gpu = l.state_gpu;
275  s.delta_gpu = (i > 0) ? self_layer.delta_gpu - l.outputs*l.batch : 0;
276  if (i == 0) s.delta_gpu = 0;
277  backward_connected_layer_gpu(self_layer, s);
278 
279  s.input_gpu = net.input_gpu + i*l.inputs*l.batch;
280  if(net.delta_gpu) s.delta_gpu = net.delta_gpu + i*l.inputs*l.batch;
281  else s.delta_gpu = 0;
282  backward_connected_layer_gpu(input_layer, s);
283 
284  increment_layer(&input_layer, -1);
285  increment_layer(&self_layer, -1);
286  increment_layer(&output_layer, -1);
287  }
288  fill_gpu(l.outputs * l.batch, 0, l.state_gpu, 1);
289  axpy_gpu(l.outputs * l.batch, 1, last_input, 1, l.state_gpu, 1);
290  axpy_gpu(l.outputs * l.batch, 1, last_self, 1, l.state_gpu, 1);
291 }
292 #endif
int steps
Definition: darknet.h:157
void update_connected_layer(layer l, update_args a)
ACTIVATION
Definition: darknet.h:56
struct layer * output_layer
Definition: darknet.h:297
layer make_rnn_layer(int batch, int inputs, int outputs, int steps, ACTIVATION activation, int batch_normalize, int adam)
Definition: rnn_layer.c:29
void(* update)(struct layer, update_args)
Definition: darknet.h:125
void(* forward_gpu)(struct layer, struct network)
Definition: darknet.h:126
void(* backward_gpu)(struct layer, struct network)
Definition: darknet.h:127
float * x
Definition: darknet.h:261
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* update_gpu)(struct layer, update_args)
Definition: darknet.h:128
void update_rnn_layer(layer l, update_args a)
Definition: rnn_layer.c:82
void(* forward)(struct layer, struct network)
Definition: darknet.h:123
int out_w
Definition: darknet.h:141
float * delta
Definition: darknet.h:486
int out_c
Definition: darknet.h:141
void fill_gpu(int N, float ALPHA, float *X, int INCX)
void forward_connected_layer(layer l, network net)
void fill_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:190
float * state
Definition: darknet.h:223
int train
Definition: darknet.h:488
int shortcut
Definition: darknet.h:130
void forward_rnn_layer(layer l, network net)
Definition: rnn_layer.c:89
float * delta
Definition: darknet.h:245
int out_h
Definition: darknet.h:141
Definition: darknet.h:80
int inputs
Definition: darknet.h:134
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
void(* backward)(struct layer, struct network)
Definition: darknet.h:124
float * x_norm
Definition: darknet.h:262
int batch
Definition: darknet.h:131
float * output
Definition: darknet.h:246
struct layer * input_layer
Definition: darknet.h:295
void backward_rnn_layer(layer l, network net)
Definition: rnn_layer.c:130
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:226
layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam)
LAYER_TYPE type
Definition: darknet.h:120
float * input
Definition: darknet.h:484
float * prev_state
Definition: darknet.h:224
int outputs
Definition: darknet.h:135
void backward_connected_layer(layer l, network net)
struct layer * self_layer
Definition: darknet.h:296
Definition: darknet.h:119