13 static void increment_layer(
layer *l,
int steps)
31 fprintf(stderr,
"GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
32 batch = batch / steps;
40 fprintf(stderr,
"\t\t");
45 fprintf(stderr,
"\t\t");
50 fprintf(stderr,
"\t\t");
55 fprintf(stderr,
"\t\t");
62 fprintf(stderr,
"\t\t");
67 fprintf(stderr,
"\t\t");
75 l.
output = calloc(outputs*batch*steps,
sizeof(
float));
76 l.
delta = calloc(outputs*batch*steps,
sizeof(
float));
77 l.
state = calloc(outputs*batch,
sizeof(
float));
78 l.
prev_state = calloc(outputs*batch,
sizeof(
float));
82 l.
r_cpu = calloc(outputs*batch,
sizeof(
float));
83 l.
z_cpu = calloc(outputs*batch,
sizeof(
float));
84 l.
h_cpu = calloc(outputs*batch,
sizeof(
float));
95 l.forgot_state_gpu = cuda_make_array(0, batch*outputs);
96 l.forgot_delta_gpu = cuda_make_array(0, batch*outputs);
97 l.prev_state_gpu = cuda_make_array(0, batch*outputs);
98 l.state_gpu = cuda_make_array(0, batch*outputs);
99 l.output_gpu = cuda_make_array(0, batch*outputs*steps);
100 l.delta_gpu = cuda_make_array(0, batch*outputs*steps);
101 l.r_gpu = cuda_make_array(0, batch*outputs);
102 l.z_gpu = cuda_make_array(0, batch*outputs);
103 l.h_gpu = cuda_make_array(0, batch*outputs);
106 cudnnSetTensor4dDescriptor(l.
uz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.
uz->
out_c, l.
uz->
out_h, l.
uz->
out_w);
107 cudnnSetTensor4dDescriptor(l.
uh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.
uh->
out_c, l.
uh->
out_h, l.
uh->
out_w);
108 cudnnSetTensor4dDescriptor(l.
ur->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.
ur->
out_c, l.
ur->
out_h, l.
ur->
out_w);
109 cudnnSetTensor4dDescriptor(l.
wz->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.
wz->
out_c, l.
wz->
out_h, l.
wz->
out_w);
110 cudnnSetTensor4dDescriptor(l.
wh->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.
wh->
out_c, l.
wh->
out_h, l.
wh->
out_w);
111 cudnnSetTensor4dDescriptor(l.
wr->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.
wr->
out_c, l.
wr->
out_h, l.
wr->
out_w);
153 for (i = 0; i < l.
steps; ++i) {
194 increment_layer(&uz, 1);
195 increment_layer(&ur, 1);
196 increment_layer(&uh, 1);
198 increment_layer(&wz, 1);
199 increment_layer(&wr, 1);
200 increment_layer(&wh, 1);
210 void pull_gru_layer(
layer l)
214 void push_gru_layer(
layer l)
220 update_connected_layer_gpu(*(l.
ur), a);
221 update_connected_layer_gpu(*(l.
uz), a);
222 update_connected_layer_gpu(*(l.
uh), a);
223 update_connected_layer_gpu(*(l.
wr), a);
224 update_connected_layer_gpu(*(l.
wz), a);
225 update_connected_layer_gpu(*(l.
wh), a);
253 for (i = 0; i < l.
steps; ++i) {
254 s.input_gpu = l.state_gpu;
255 forward_connected_layer_gpu(wz, s);
256 forward_connected_layer_gpu(wr, s);
258 s.input_gpu = net.input_gpu;
259 forward_connected_layer_gpu(uz, s);
260 forward_connected_layer_gpu(ur, s);
261 forward_connected_layer_gpu(uh, s);
275 s.input_gpu = l.forgot_state_gpu;
276 forward_connected_layer_gpu(wh, s);
292 increment_layer(&uz, 1);
293 increment_layer(&ur, 1);
294 increment_layer(&uh, 1);
296 increment_layer(&wz, 1);
297 increment_layer(&wr, 1);
298 increment_layer(&wh, 1);
315 increment_layer(&uz, l.
steps - 1);
316 increment_layer(&ur, l.
steps - 1);
317 increment_layer(&uh, l.
steps - 1);
319 increment_layer(&wz, l.
steps - 1);
320 increment_layer(&wr, l.
steps - 1);
321 increment_layer(&wh, l.
steps - 1);
327 float *end_state = l.output_gpu;
328 for (i = l.
steps-1; i >= 0; --i) {
331 float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.
outputs*l.
batch;
365 s.input_gpu = l.forgot_state_gpu;
366 s.delta_gpu = l.forgot_delta_gpu;
368 backward_connected_layer_gpu(wh, s);
378 s.input_gpu = l.state_gpu;
379 s.delta_gpu = prev_delta_gpu;
381 backward_connected_layer_gpu(wr, s);
382 backward_connected_layer_gpu(wz, s);
384 s.input_gpu = net.input_gpu;
385 s.delta_gpu = net.delta_gpu;
387 backward_connected_layer_gpu(uh, s);
388 backward_connected_layer_gpu(ur, s);
389 backward_connected_layer_gpu(uz, s);
393 if(net.delta_gpu) net.delta_gpu -= l.
inputs*l.
batch;
396 increment_layer(&uz, -1);
397 increment_layer(&ur, -1);
398 increment_layer(&uh, -1);
400 increment_layer(&wz, -1);
401 increment_layer(&wr, -1);
402 increment_layer(&wh, -1);
void update_connected_layer(layer l, update_args a)
void(* update)(struct layer, update_args)
void(* forward_gpu)(struct layer, struct network)
void(* backward_gpu)(struct layer, struct network)
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* update_gpu)(struct layer, update_args)
void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
void(* forward)(struct layer, struct network)
void mul_gpu(int N, float *X, int INCX, float *Y, int INCY)
void fill_gpu(int N, float ALPHA, float *X, int INCX)
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize, int adam)
void forward_connected_layer(layer l, network net)
void fill_cpu(int N, float ALPHA, float *X, int INCX)
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* backward)(struct layer, struct network)
void mult_add_into_gpu(int num, float *a, float *b, float *c)
void update_gru_layer(layer l, update_args a)
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
void backward_gru_layer(layer l, network net)
void activate_array(float *x, const int n, const ACTIVATION a)
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam)
void activate_array_gpu(float *x, int n, ACTIVATION a)
void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)
void forward_gru_layer(layer l, network net)
void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c)
void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc)