8 fprintf(stderr,
"Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
19 layer.
output = calloc(h * w * c * batch,
sizeof(
float));
20 layer.
delta = calloc(h * w * c * batch,
sizeof(
float));
21 layer.
squared = calloc(h * w * c * batch,
sizeof(
float));
22 layer.
norms = calloc(h * w * c * batch,
sizeof(
float));
29 layer.
forward_gpu = forward_normalization_layer_gpu;
32 layer.output_gpu = cuda_make_array(layer.
output, h * w * c * batch);
33 layer.delta_gpu = cuda_make_array(layer.
delta, h * w * c * batch);
34 layer.squared_gpu = cuda_make_array(layer.
squared, h * w * c * batch);
35 layer.norms_gpu = cuda_make_array(layer.
norms, h * w * c * batch);
43 int batch = layer->
batch;
50 layer->
output = realloc(layer->
output, h * w * c * batch *
sizeof(
float));
51 layer->
delta = realloc(layer->
delta, h * w * c * batch *
sizeof(
float));
52 layer->
squared = realloc(layer->
squared, h * w * c * batch *
sizeof(
float));
53 layer->
norms = realloc(layer->
norms, h * w * c * batch *
sizeof(
float));
55 cuda_free(layer->output_gpu);
56 cuda_free(layer->delta_gpu);
57 cuda_free(layer->squared_gpu);
58 cuda_free(layer->norms_gpu);
59 layer->output_gpu = cuda_make_array(layer->
output, h * w * c * batch);
60 layer->delta_gpu = cuda_make_array(layer->
delta, h * w * c * batch);
61 layer->squared_gpu = cuda_make_array(layer->
squared, h * w * c * batch);
62 layer->norms_gpu = cuda_make_array(layer->
norms, h * w * c * batch);
74 for(b = 0; b < layer.
batch; ++b){
75 float *squared = layer.
squared + w*h*c*b;
76 float *norms = layer.
norms + w*h*c*b;
77 float *input = net.
input + w*h*c*b;
78 pow_cpu(w*h*c, 2, input, 1, squared, 1);
81 for(k = 0; k < layer.
size/2; ++k){
85 for(k = 1; k < layer.
c; ++k){
86 copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
87 int prev = k - ((layer.
size-1)/2) - 1;
88 int next = k + (layer.
size/2);
89 if(prev >= 0)
axpy_cpu(w*h, -layer.
alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
90 if(next < layer.
c)
axpy_cpu(w*h, layer.
alpha, squared + w*h*next, 1, norms + w*h*k, 1);
118 for(b = 0; b < layer.
batch; ++b){
119 float *squared = layer.squared_gpu + w*h*c*b;
120 float *norms = layer.norms_gpu + w*h*c*b;
121 float *input = net.input_gpu + w*h*c*b;
122 pow_gpu(w*h*c, 2, input, 1, squared, 1);
125 for(k = 0; k < layer.
size/2; ++k){
129 for(k = 1; k < layer.
c; ++k){
130 copy_gpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
131 int prev = k - ((layer.
size-1)/2) - 1;
132 int next = k + (layer.
size/2);
133 if(prev >= 0)
axpy_gpu(w*h, -layer.
alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
134 if(next < layer.
c)
axpy_gpu(w*h, layer.
alpha, squared + w*h*next, 1, norms + w*h*k, 1);
137 pow_gpu(w*h*c*layer.
batch, -layer.
beta, layer.norms_gpu, 1, layer.output_gpu, 1);
138 mul_gpu(w*h*c*layer.
batch, net.input_gpu, 1, layer.output_gpu, 1);
141 void backward_normalization_layer_gpu(
const layer layer,
network net)
148 pow_gpu(w*h*c*layer.
batch, -layer.
beta, layer.norms_gpu, 1, net.delta_gpu, 1);
149 mul_gpu(w*h*c*layer.
batch, layer.delta_gpu, 1, net.delta_gpu, 1);
void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void resize_normalization_layer(layer *layer, int w, int h)
void(* forward_gpu)(struct layer, struct network)
void(* backward_gpu)(struct layer, struct network)
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
void(* forward)(struct layer, struct network)
void mul_gpu(int N, float *X, int INCX, float *Y, int INCY)
void forward_normalization_layer(const layer layer, network net)
void scal_gpu(int N, float ALPHA, float *X, int INCX)
void backward_normalization_layer(const layer layer, network net)
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* backward)(struct layer, struct network)
void scal_cpu(int N, float ALPHA, float *X, int INCX)
void const_cpu(int N, float ALPHA, float *X, int INCX)
layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
void const_gpu(int N, float ALPHA, float *X, int INCX)
void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)