1 #include "cuda_runtime.h" 27 for(i = 0; i < l.
batch; ++i){
28 float *a = l.weights_gpu;
29 float *b = net.input_gpu + i*l.
c*l.
h*l.
w;
32 gemm_gpu(1,0,m,n,k,1,a,m,b,n,0,c,n);
37 forward_batchnorm_layer_gpu(l, net);
52 backward_batchnorm_layer_gpu(l, net);
59 for(i = 0; i < l.
batch; ++i){
64 float *a = net.input_gpu + i*m*k;
66 float *c = l.weight_updates_gpu;
70 gemm_gpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
77 float *a = l.weights_gpu;
79 float *c = net.delta_gpu + i*n*m;
81 gemm_gpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
89 cuda_pull_array(l.biases_gpu, l.
biases, l.
n);
93 cuda_pull_array(l.scales_gpu, l.
scales, l.
n);
102 cuda_push_array(l.biases_gpu, l.
biases, l.
n);
106 cuda_push_array(l.scales_gpu, l.
scales, l.
n);
116 float decay = a.
decay;
120 adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.
B1, a.
B2, a.
eps, decay, learning_rate, l.
nweights, batch, a.
t);
121 adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, a.
B1, a.
B2, a.
eps, decay, learning_rate, l.
n, batch, a.
t);
123 adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, a.
B1, a.
B2, a.
eps, decay, learning_rate, l.
n, batch, a.
t);
126 axpy_gpu(l.
nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
127 axpy_gpu(l.
nweights, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
130 axpy_gpu(l.
n, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
131 scal_gpu(l.
n, momentum, l.bias_updates_gpu, 1);
134 axpy_gpu(l.
n, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
135 scal_gpu(l.
n, momentum, l.scale_updates_gpu, 1);
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
void pull_deconvolutional_layer(layer l)
void col2im_gpu(float *data_col, int channels, int height, int width, int ksize, int stride, int pad, float *data_im)
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size)
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void im2col_gpu(float *im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
void push_deconvolutional_layer(layer l)
void fill_gpu(int N, float ALPHA, float *X, int INCX)
void backward_deconvolutional_layer_gpu(layer l, network net)
void update_deconvolutional_layer_gpu(layer l, update_args a)
void scal_gpu(int N, float ALPHA, float *X, int INCX)
float learning_rate_scale
void forward_deconvolutional_layer_gpu(layer l, network net)
void activate_array_gpu(float *x, int n, ACTIVATION a)
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)