32 l.
output = calloc(batch*outputs,
sizeof(
float));
33 l.
delta = calloc(batch*outputs,
sizeof(
float));
38 l.
weights = calloc(outputs*inputs,
sizeof(
float));
39 l.
biases = calloc(outputs,
sizeof(
float));
46 float scale = sqrt(2./inputs);
47 for(i = 0; i < outputs*inputs; ++i){
51 for(i = 0; i < outputs; ++i){
64 l.
scales = calloc(outputs,
sizeof(
float));
66 for(i = 0; i < outputs; ++i){
70 l.
mean = calloc(outputs,
sizeof(
float));
72 l.
variance = calloc(outputs,
sizeof(
float));
78 l.
x = calloc(batch*outputs,
sizeof(
float));
79 l.
x_norm = calloc(batch*outputs,
sizeof(
float));
87 l.weights_gpu = cuda_make_array(l.
weights, outputs*inputs);
88 l.biases_gpu = cuda_make_array(l.
biases, outputs);
90 l.weight_updates_gpu = cuda_make_array(l.
weight_updates, outputs*inputs);
91 l.bias_updates_gpu = cuda_make_array(l.
bias_updates, outputs);
93 l.output_gpu = cuda_make_array(l.
output, outputs*batch);
94 l.delta_gpu = cuda_make_array(l.
delta, outputs*batch);
96 l.m_gpu = cuda_make_array(0, inputs*outputs);
97 l.v_gpu = cuda_make_array(0, inputs*outputs);
98 l.bias_m_gpu = cuda_make_array(0, outputs);
99 l.bias_v_gpu = cuda_make_array(0, outputs);
100 l.scale_m_gpu = cuda_make_array(0, outputs);
101 l.scale_v_gpu = cuda_make_array(0, outputs);
105 l.mean_gpu = cuda_make_array(l.
mean, outputs);
106 l.variance_gpu = cuda_make_array(l.
variance, outputs);
108 l.rolling_mean_gpu = cuda_make_array(l.
mean, outputs);
109 l.rolling_variance_gpu = cuda_make_array(l.
variance, outputs);
111 l.mean_delta_gpu = cuda_make_array(l.
mean, outputs);
112 l.variance_delta_gpu = cuda_make_array(l.
variance, outputs);
114 l.scales_gpu = cuda_make_array(l.
scales, outputs);
115 l.scale_updates_gpu = cuda_make_array(l.
scale_updates, outputs);
117 l.x_gpu = cuda_make_array(l.
output, l.
batch*outputs);
118 l.x_norm_gpu = cuda_make_array(l.
output, l.
batch*outputs);
120 cudnnCreateTensorDescriptor(&l.normTensorDesc);
121 cudnnCreateTensorDescriptor(&l.dstTensorDesc);
122 cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.
batch, l.
out_c, l.
out_h, l.
out_w);
123 cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.
out_c, 1, 1);
128 fprintf(stderr,
"connected %4d -> %4d\n", inputs, outputs);
136 float decay = a.
decay;
157 float *a = net.
input;
160 gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
183 float *b = net.
input;
185 gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);
195 if(c)
gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
202 for(i = 0; i < l.
outputs; ++i){
204 for(j = 0; j < l.
inputs; ++j){
235 void pull_connected_layer(
layer l)
248 void push_connected_layer(
layer l)
265 float decay = a.
decay;
268 adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.
B1, a.
B2, a.
eps, decay, learning_rate, l.
inputs*l.
outputs, batch, a.
t);
269 adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.
B1, a.
B2, a.
eps, decay, learning_rate, l.
outputs, batch, a.
t);
271 adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.
B1, a.
B2, a.
eps, decay, learning_rate, l.
outputs, batch, a.
t);
274 axpy_gpu(l.
outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
278 axpy_gpu(l.
outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
295 float * a = net.input_gpu;
296 float * b = l.weights_gpu;
297 float * c = l.output_gpu;
298 gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
301 forward_batchnorm_layer_gpu(l, net);
313 backward_batchnorm_layer_gpu(l, net);
321 float * a = l.delta_gpu;
322 float * b = net.input_gpu;
323 float * c = l.weight_updates_gpu;
324 gemm_gpu(1,0,m,n,k,1,a,m,b,n,1,c,n);
334 if(c) gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
void update_connected_layer(layer l, update_args a)
void statistics_connected_layer(layer l)
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size)
void(* update)(struct layer, update_args)
void(* forward_gpu)(struct layer, struct network)
void add_bias(float *output, float *biases, int batch, int n, int size)
void(* backward_gpu)(struct layer, struct network)
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* update_gpu)(struct layer, update_args)
void(* forward)(struct layer, struct network)
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
void fill_gpu(int N, float ALPHA, float *X, int INCX)
void print_statistics(float *a, int n)
void constrain_gpu(int N, float ALPHA, float *X, int INCX)
void forward_connected_layer(layer l, network net)
void fill_cpu(int N, float ALPHA, float *X, int INCX)
void scal_gpu(int N, float ALPHA, float *X, int INCX)
void gemm(int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* backward)(struct layer, struct network)
void scal_cpu(int N, float ALPHA, float *X, int INCX)
float learning_rate_scale
void activate_array(float *x, const int n, const ACTIVATION a)
layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize, int adam)
void activate_array_gpu(float *x, int n, ACTIVATION a)
void forward_batchnorm_layer(layer l, network net)
void backward_batchnorm_layer(layer l, network net)
float rand_uniform(float min, float max)
void denormalize_connected_layer(layer l)
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
void backward_connected_layer(layer l, network net)
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)