43 int locations = out_h*out_w;
50 l.
weights = calloc(c*n*size*size*locations,
sizeof(
float));
57 float scale = sqrt(2./(size*size*c));
60 l.
output = calloc(l.
batch*out_h * out_w * n,
sizeof(
float));
61 l.
delta = calloc(l.
batch*out_h * out_w * n,
sizeof(
float));
74 l.weights_gpu = cuda_make_array(l.
weights, c*n*size*size*locations);
75 l.weight_updates_gpu = cuda_make_array(l.
weight_updates, c*n*size*size*locations);
80 l.delta_gpu = cuda_make_array(l.
delta, l.
batch*out_h*out_w*n);
81 l.output_gpu = cuda_make_array(l.
output, l.
batch*out_h*out_w*n);
86 fprintf(stderr,
"Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
96 int locations = out_h * out_w;
98 for(i = 0; i < l.
batch; ++i){
102 for(i = 0; i < l.
batch; ++i){
103 float *input = net.
input + i*l.
w*l.
h*l.
c;
107 for(j = 0; j < locations; ++j){
110 float *c = output + j;
116 gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations);
129 for(i = 0; i < l.
batch; ++i){
133 for(i = 0; i < l.
batch; ++i){
134 float *input = net.
input + i*l.
w*l.
h*l.
c;
138 for(j = 0; j < locations; ++j){
146 gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
150 for(j = 0; j < locations; ++j){
159 gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
171 float decay = a.
decay;
191 int locations = out_h * out_w;
193 for(i = 0; i < l.
batch; ++i){
197 for(i = 0; i < l.
batch; ++i){
198 float *input = net.input_gpu + i*l.
w*l.
h*l.
c;
201 float *output = l.output_gpu + i*l.
outputs;
202 for(j = 0; j < locations; ++j){
203 float *a = l.weights_gpu + j*l.
size*l.
size*l.
c*l.
n;
205 float *c = output + j;
211 gemm_gpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations);
223 for(i = 0; i < l.
batch; ++i){
227 for(i = 0; i < l.
batch; ++i){
228 float *input = net.input_gpu + i*l.
w*l.
h*l.
c;
232 for(j = 0; j < locations; ++j){
233 float *a = l.delta_gpu + i*l.
outputs + j;
235 float *c = l.weight_updates_gpu + j*l.
size*l.
size*l.
c*l.
n;
240 gemm_gpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n);
244 for(j = 0; j < locations; ++j){
245 float *a = l.weights_gpu + j*l.
size*l.
size*l.
c*l.
n;
246 float *b = l.delta_gpu + i*l.
outputs + j;
253 gemm_gpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations);
265 float decay = a.
decay;
270 axpy_gpu(l.
outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
273 axpy_gpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
274 axpy_gpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
275 scal_gpu(size, momentum, l.weight_updates_gpu, 1);
282 cuda_pull_array(l.weights_gpu, l.
weights, size);
290 cuda_push_array(l.weights_gpu, l.
weights, size);
void backward_local_layer(local_layer l, network net)
int local_out_width(local_layer l)
void col2im_gpu(float *data_col, int channels, int height, int width, int ksize, int stride, int pad, float *data_im)
void(* update)(struct layer, update_args)
void(* forward_gpu)(struct layer, struct network)
int local_out_height(local_layer l)
void(* backward_gpu)(struct layer, struct network)
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* update_gpu)(struct layer, update_args)
void im2col_gpu(float *im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
void(* forward)(struct layer, struct network)
void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta)
void scal_gpu(int N, float ALPHA, float *X, int INCX)
void gemm(int TA, int TB, int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, float BETA, float *C, int ldc)
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* backward)(struct layer, struct network)
void scal_cpu(int N, float ALPHA, float *X, int INCX)
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
void im2col_cpu(float *data_im, int channels, int height, int width, int ksize, int stride, int pad, float *data_col)
float learning_rate_scale
local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation)
void forward_local_layer(const local_layer l, network net)
void activate_array(float *x, const int n, const ACTIVATION a)
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
void activate_array_gpu(float *x, int n, ACTIVATION a)
void col2im_cpu(float *data_col, int channels, int height, int width, int ksize, int stride, int pad, float *data_im)
float rand_uniform(float min, float max)
void update_local_layer(local_layer l, update_args a)
void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)