8 fprintf(stderr,
"Batch Normalization Layer: %d x %d x %d image\n", w,h,c);
15 l.
output = calloc(h * w * c * batch,
sizeof(
float));
16 l.
delta = calloc(h * w * c * batch,
sizeof(
float));
20 l.
scales = calloc(c,
sizeof(
float));
22 l.
biases = calloc(c,
sizeof(
float));
25 for(i = 0; i < c; ++i){
29 l.
mean = calloc(c,
sizeof(
float));
30 l.
variance = calloc(c,
sizeof(
float));
41 l.output_gpu = cuda_make_array(l.
output, h * w * c * batch);
42 l.delta_gpu = cuda_make_array(l.
delta, h * w * c * batch);
44 l.biases_gpu = cuda_make_array(l.
biases, c);
47 l.scales_gpu = cuda_make_array(l.
scales, c);
50 l.mean_gpu = cuda_make_array(l.
mean, c);
51 l.variance_gpu = cuda_make_array(l.
variance, c);
53 l.rolling_mean_gpu = cuda_make_array(l.
mean, c);
54 l.rolling_variance_gpu = cuda_make_array(l.
variance, c);
56 l.mean_delta_gpu = cuda_make_array(l.
mean, c);
57 l.variance_delta_gpu = cuda_make_array(l.
variance, c);
62 cudnnCreateTensorDescriptor(&l.normTensorDesc);
63 cudnnCreateTensorDescriptor(&l.dstTensorDesc);
64 cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.
batch, l.
out_c, l.
out_h, l.
out_w);
65 cudnnSetTensor4dDescriptor(l.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l.
out_c, 1, 1);
72 void backward_scale_cpu(
float *x_norm,
float *delta,
int batch,
int n,
int size,
float *scale_updates)
75 for(f = 0; f < n; ++f){
77 for(b = 0; b < batch; ++b){
78 for(i = 0; i < size; ++i){
79 int index = i + size*(f + n*b);
80 sum += delta[index] * x_norm[index];
83 scale_updates[f] += sum;
87 void mean_delta_cpu(
float *delta,
float *variance,
int batch,
int filters,
int spatial,
float *mean_delta)
91 for(i = 0; i < filters; ++i){
93 for (j = 0; j < batch; ++j) {
94 for (k = 0; k < spatial; ++k) {
95 int index = j*filters*spatial + i*spatial + k;
96 mean_delta[i] += delta[index];
99 mean_delta[i] *= (-1./sqrt(variance[i] + .00001f));
102 void variance_delta_cpu(
float *x,
float *delta,
float *mean,
float *variance,
int batch,
int filters,
int spatial,
float *variance_delta)
106 for(i = 0; i < filters; ++i){
107 variance_delta[i] = 0;
108 for(j = 0; j < batch; ++j){
109 for(k = 0; k < spatial; ++k){
110 int index = j*filters*spatial + i*spatial + k;
111 variance_delta[i] += delta[index]*(x[index] - mean[i]);
114 variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (
float)(-3./2.));
117 void normalize_delta_cpu(
float *x,
float *mean,
float *variance,
float *mean_delta,
float *variance_delta,
int batch,
int filters,
int spatial,
float *delta)
120 for(j = 0; j < batch; ++j){
121 for(f = 0; f < filters; ++f){
122 for(k = 0; k < spatial; ++k){
123 int index = j*filters*spatial + f*spatial + k;
124 delta[index] = delta[index] * 1./(sqrt(variance[f] + .00001f)) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch);
132 fprintf(stderr,
"Not implemented\n");
176 void pull_batchnorm_layer(
layer l)
178 cuda_pull_array(l.scales_gpu, l.
scales, l.
c);
182 void push_batchnorm_layer(
layer l)
184 cuda_push_array(l.scales_gpu, l.
scales, l.
c);
197 cudnnBatchNormalizationForwardTraining(cudnn_handle(),
198 CUDNN_BATCHNORM_SPATIAL,
210 l.rolling_variance_gpu,
219 axpy_gpu(l.
out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1);
221 axpy_gpu(l.
out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1);
241 l.mean_gpu = l.rolling_mean_gpu;
242 l.variance_gpu = l.rolling_variance_gpu;
247 cudnnBatchNormalizationBackward(cudnn_handle(),
248 CUDNN_BATCHNORM_SPATIAL,
void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size)
void(* forward_gpu)(struct layer, struct network)
void add_bias(float *output, float *biases, int batch, int n, int size)
void(* backward_gpu)(struct layer, struct network)
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
void(* forward)(struct layer, struct network)
void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
void resize_batchnorm_layer(layer *layer, int w, int h)
void scal_gpu(int N, float ALPHA, float *X, int INCX)
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* backward)(struct layer, struct network)
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size)
void scal_cpu(int N, float ALPHA, float *X, int INCX)
void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
void scale_bias(float *output, float *scales, int batch, int n, int size)
void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
void forward_batchnorm_layer(layer l, network net)
void backward_batchnorm_layer(layer l, network net)
void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
layer make_batchnorm_layer(int batch, int w, int h, int c)