darknet  v3
network.c
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <time.h>
3 #include <assert.h>
4 #include "network.h"
5 #include "image.h"
6 #include "data.h"
7 #include "utils.h"
8 #include "blas.h"
9 
10 #include "crop_layer.h"
11 #include "connected_layer.h"
12 #include "gru_layer.h"
13 #include "rnn_layer.h"
14 #include "crnn_layer.h"
15 #include "local_layer.h"
16 #include "convolutional_layer.h"
17 #include "activation_layer.h"
18 #include "detection_layer.h"
19 #include "region_layer.h"
20 #include "yolo_layer.h"
21 #include "normalization_layer.h"
22 #include "batchnorm_layer.h"
23 #include "maxpool_layer.h"
24 #include "reorg_layer.h"
25 #include "avgpool_layer.h"
26 #include "cost_layer.h"
27 #include "softmax_layer.h"
28 #include "dropout_layer.h"
29 #include "route_layer.h"
30 #include "upsample_layer.h"
31 #include "shortcut_layer.h"
32 #include "parser.h"
33 #include "data.h"
34 
36 {
37  load_args args = {0};
38  args.w = net->w;
39  args.h = net->h;
40  args.size = net->w;
41 
42  args.min = net->min_crop;
43  args.max = net->max_crop;
44  args.angle = net->angle;
45  args.aspect = net->aspect;
46  args.exposure = net->exposure;
47  args.center = net->center;
48  args.saturation = net->saturation;
49  args.hue = net->hue;
50  return args;
51 }
52 
53 network *load_network(char *cfg, char *weights, int clear)
54 {
56  if(weights && weights[0] != 0){
57  load_weights(net, weights);
58  }
59  if(clear) (*net->seen) = 0;
60  return net;
61 }
62 
64 {
65  size_t batch_num = (*net->seen)/(net->batch*net->subdivisions);
66  return batch_num;
67 }
68 
70 {
71  int i;
72  for (i = 0; i < net->n; ++i) {
73  #ifdef GPU
74  layer l = net->layers[i];
75  if(l.state_gpu){
76  fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1);
77  }
78  if(l.h_gpu){
79  fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1);
80  }
81  #endif
82  }
83 }
84 
86 {
87  reset_network_state(net, 0);
88 }
89 
91 {
92  size_t batch_num = get_current_batch(net);
93  int i;
94  float rate;
95  if (batch_num < net->burn_in) return net->learning_rate * pow((float)batch_num / net->burn_in, net->power);
96  switch (net->policy) {
97  case CONSTANT:
98  return net->learning_rate;
99  case STEP:
100  return net->learning_rate * pow(net->scale, batch_num/net->step);
101  case STEPS:
102  rate = net->learning_rate;
103  for(i = 0; i < net->num_steps; ++i){
104  if(net->steps[i] > batch_num) return rate;
105  rate *= net->scales[i];
106  }
107  return rate;
108  case EXP:
109  return net->learning_rate * pow(net->gamma, batch_num);
110  case POLY:
111  return net->learning_rate * pow(1 - (float)batch_num / net->max_batches, net->power);
112  case RANDOM:
113  return net->learning_rate * pow(rand_uniform(0,1), net->power);
114  case SIG:
115  return net->learning_rate * (1./(1.+exp(net->gamma*(batch_num - net->step))));
116  default:
117  fprintf(stderr, "Policy is weird!\n");
118  return net->learning_rate;
119  }
120 }
121 
123 {
124  switch(a){
125  case CONVOLUTIONAL:
126  return "convolutional";
127  case ACTIVE:
128  return "activation";
129  case LOCAL:
130  return "local";
131  case DECONVOLUTIONAL:
132  return "deconvolutional";
133  case CONNECTED:
134  return "connected";
135  case RNN:
136  return "rnn";
137  case GRU:
138  return "gru";
139  case LSTM:
140  return "lstm";
141  case CRNN:
142  return "crnn";
143  case MAXPOOL:
144  return "maxpool";
145  case REORG:
146  return "reorg";
147  case AVGPOOL:
148  return "avgpool";
149  case SOFTMAX:
150  return "softmax";
151  case DETECTION:
152  return "detection";
153  case REGION:
154  return "region";
155  case YOLO:
156  return "yolo";
157  case DROPOUT:
158  return "dropout";
159  case CROP:
160  return "crop";
161  case COST:
162  return "cost";
163  case ROUTE:
164  return "route";
165  case SHORTCUT:
166  return "shortcut";
167  case NORMALIZATION:
168  return "normalization";
169  case BATCHNORM:
170  return "batchnorm";
171  default:
172  break;
173  }
174  return "none";
175 }
176 
178 {
179  network *net = calloc(1, sizeof(network));
180  net->n = n;
181  net->layers = calloc(net->n, sizeof(layer));
182  net->seen = calloc(1, sizeof(size_t));
183  net->t = calloc(1, sizeof(int));
184  net->cost = calloc(1, sizeof(float));
185  return net;
186 }
187 
189 {
190 #ifdef GPU
191  if(netp->gpu_index >= 0){
192  forward_network_gpu(netp);
193  return;
194  }
195 #endif
196  network net = *netp;
197  int i;
198  for(i = 0; i < net.n; ++i){
199  net.index = i;
200  layer l = net.layers[i];
201  if(l.delta){
202  fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
203  }
204  l.forward(l, net);
205  net.input = l.output;
206  if(l.truth) {
207  net.truth = l.output;
208  }
209  }
210  calc_network_cost(netp);
211 }
212 
214 {
215 #ifdef GPU
216  if(netp->gpu_index >= 0){
217  update_network_gpu(netp);
218  return;
219  }
220 #endif
221  network net = *netp;
222  int i;
223  update_args a = {0};
224  a.batch = net.batch*net.subdivisions;
226  a.momentum = net.momentum;
227  a.decay = net.decay;
228  a.adam = net.adam;
229  a.B1 = net.B1;
230  a.B2 = net.B2;
231  a.eps = net.eps;
232  ++*net.t;
233  a.t = *net.t;
234 
235  for(i = 0; i < net.n; ++i){
236  layer l = net.layers[i];
237  if(l.update){
238  l.update(l, a);
239  }
240  }
241 }
242 
244 {
245  network net = *netp;
246  int i;
247  float sum = 0;
248  int count = 0;
249  for(i = 0; i < net.n; ++i){
250  if(net.layers[i].cost){
251  sum += net.layers[i].cost[0];
252  ++count;
253  }
254  }
255  *net.cost = sum/count;
256 }
257 
259 {
260  return max_index(net->output, net->outputs);
261 }
262 
264 {
265 #ifdef GPU
266  if(netp->gpu_index >= 0){
267  backward_network_gpu(netp);
268  return;
269  }
270 #endif
271  network net = *netp;
272  int i;
273  network orig = net;
274  for(i = net.n-1; i >= 0; --i){
275  layer l = net.layers[i];
276  if(l.stopbackward) break;
277  if(i == 0){
278  net = orig;
279  }else{
280  layer prev = net.layers[i-1];
281  net.input = prev.output;
282  net.delta = prev.delta;
283  }
284  net.index = i;
285  l.backward(l, net);
286  }
287 }
288 
290 {
291  *net->seen += net->batch;
292  net->train = 1;
293  forward_network(net);
294  backward_network(net);
295  float error = *net->cost;
296  if(((*net->seen)/net->batch)%net->subdivisions == 0) update_network(net);
297  return error;
298 }
299 
301 {
302  int batch = net->batch;
303 
304  int i;
305  float sum = 0;
306  for(i = 0; i < n; ++i){
307  get_random_batch(d, batch, net->input, net->truth);
308  float err = train_network_datum(net);
309  sum += err;
310  }
311  return (float)sum/(n*batch);
312 }
313 
315 {
316  assert(d.X.rows % net->batch == 0);
317  int batch = net->batch;
318  int n = d.X.rows / batch;
319 
320  int i;
321  float sum = 0;
322  for(i = 0; i < n; ++i){
323  get_next_batch(d, batch, i*batch, net->input, net->truth);
324  float err = train_network_datum(net);
325  sum += err;
326  }
327  return (float)sum/(n*batch);
328 }
329 
331 {
332  int i;
333  for(i = 0; i < net->n; ++i){
334  net->layers[i].temperature = t;
335  }
336 }
337 
338 
340 {
341  net->batch = b;
342  int i;
343  for(i = 0; i < net->n; ++i){
344  net->layers[i].batch = b;
345 #ifdef CUDNN
346  if(net->layers[i].type == CONVOLUTIONAL){
347  cudnn_convolutional_setup(net->layers + i);
348  }
349  if(net->layers[i].type == DECONVOLUTIONAL){
350  layer *l = net->layers + i;
351  cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, l->out_h, l->out_w);
352  cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
353  }
354 #endif
355  }
356 }
357 
358 int resize_network(network *net, int w, int h)
359 {
360 #ifdef GPU
362  cuda_free(net->workspace);
363 #endif
364  int i;
365  //if(w == net->w && h == net->h) return 0;
366  net->w = w;
367  net->h = h;
368  int inputs = 0;
369  size_t workspace_size = 0;
370  //fprintf(stderr, "Resizing to %d x %d...\n", w, h);
371  //fflush(stderr);
372  for (i = 0; i < net->n; ++i){
373  layer l = net->layers[i];
374  if(l.type == CONVOLUTIONAL){
375  resize_convolutional_layer(&l, w, h);
376  }else if(l.type == CROP){
377  resize_crop_layer(&l, w, h);
378  }else if(l.type == MAXPOOL){
379  resize_maxpool_layer(&l, w, h);
380  }else if(l.type == REGION){
381  resize_region_layer(&l, w, h);
382  }else if(l.type == YOLO){
383  resize_yolo_layer(&l, w, h);
384  }else if(l.type == ROUTE){
385  resize_route_layer(&l, net);
386  }else if(l.type == SHORTCUT){
387  resize_shortcut_layer(&l, w, h);
388  }else if(l.type == UPSAMPLE){
389  resize_upsample_layer(&l, w, h);
390  }else if(l.type == REORG){
391  resize_reorg_layer(&l, w, h);
392  }else if(l.type == AVGPOOL){
393  resize_avgpool_layer(&l, w, h);
394  }else if(l.type == NORMALIZATION){
395  resize_normalization_layer(&l, w, h);
396  }else if(l.type == COST){
397  resize_cost_layer(&l, inputs);
398  }else{
399  error("Cannot resize this type of layer");
400  }
401  if(l.workspace_size > workspace_size) workspace_size = l.workspace_size;
402  if(l.workspace_size > 2000000000) assert(0);
403  inputs = l.outputs;
404  net->layers[i] = l;
405  w = l.out_w;
406  h = l.out_h;
407  if(l.type == AVGPOOL) break;
408  }
409  layer out = get_network_output_layer(net);
410  net->inputs = net->layers[0].inputs;
411  net->outputs = out.outputs;
412  net->truths = out.outputs;
413  if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths;
414  net->output = out.output;
415  free(net->input);
416  free(net->truth);
417  net->input = calloc(net->inputs*net->batch, sizeof(float));
418  net->truth = calloc(net->truths*net->batch, sizeof(float));
419 #ifdef GPU
420  if(gpu_index >= 0){
421  cuda_free(net->input_gpu);
422  cuda_free(net->truth_gpu);
423  net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch);
424  net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch);
425  if(workspace_size){
426  net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
427  }
428  }else {
429  free(net->workspace);
430  net->workspace = calloc(1, workspace_size);
431  }
432 #else
433  free(net->workspace);
434  net->workspace = calloc(1, workspace_size);
435 #endif
436  //fprintf(stderr, " Done!\n");
437  return 0;
438 }
439 
441 {
442  int i;
443  for(i = 0; i < net->n; ++i){
444  if(net->layers[i].type == DETECTION){
445  return net->layers[i];
446  }
447  }
448  fprintf(stderr, "Detection layer not found!!\n");
449  layer l = {0};
450  return l;
451 }
452 
454 {
455  layer l = net->layers[i];
456 #ifdef GPU
457  //cuda_pull_array(l.output_gpu, l.output, l.outputs);
458 #endif
459  if (l.out_w && l.out_h && l.out_c){
460  return float_to_image(l.out_w, l.out_h, l.out_c, l.output);
461  }
462  image def = {0};
463  return def;
464 }
465 
467 {
468  int i;
469  for(i = net->n-1; i >= 0; --i){
470  image m = get_network_image_layer(net, i);
471  if(m.h != 0) return m;
472  }
473  image def = {0};
474  return def;
475 }
476 
478 {
479  image *prev = 0;
480  int i;
481  char buff[256];
482  for(i = 0; i < net->n; ++i){
483  sprintf(buff, "Layer %d", i);
484  layer l = net->layers[i];
485  if(l.type == CONVOLUTIONAL){
486  prev = visualize_convolutional_layer(l, buff, prev);
487  }
488  }
489 }
490 
491 void top_predictions(network *net, int k, int *index)
492 {
493  top_k(net->output, net->outputs, k, index);
494 }
495 
496 
497 float *network_predict(network *net, float *input)
498 {
499  network orig = *net;
500  net->input = input;
501  net->truth = 0;
502  net->train = 0;
503  net->delta = 0;
504  forward_network(net);
505  float *out = net->output;
506  *net = orig;
507  return out;
508 }
509 
510 int num_detections(network *net, float thresh)
511 {
512  int i;
513  int s = 0;
514  for(i = 0; i < net->n; ++i){
515  layer l = net->layers[i];
516  if(l.type == YOLO){
517  s += yolo_num_detections(l, thresh);
518  }
519  if(l.type == DETECTION || l.type == REGION){
520  s += l.w*l.h*l.n;
521  }
522  }
523  return s;
524 }
525 
526 detection *make_network_boxes(network *net, float thresh, int *num)
527 {
528  layer l = net->layers[net->n - 1];
529  int i;
530  int nboxes = num_detections(net, thresh);
531  if(num) *num = nboxes;
532  detection *dets = calloc(nboxes, sizeof(detection));
533  for(i = 0; i < nboxes; ++i){
534  dets[i].prob = calloc(l.classes, sizeof(float));
535  if(l.coords > 4){
536  dets[i].mask = calloc(l.coords-4, sizeof(float));
537  }
538  }
539  return dets;
540 }
541 
542 void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets)
543 {
544  int j;
545  for(j = 0; j < net->n; ++j){
546  layer l = net->layers[j];
547  if(l.type == YOLO){
548  int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets);
549  dets += count;
550  }
551  if(l.type == REGION){
552  get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets);
553  dets += l.w*l.h*l.n;
554  }
555  if(l.type == DETECTION){
556  get_detection_detections(l, w, h, thresh, dets);
557  dets += l.w*l.h*l.n;
558  }
559  }
560 }
561 
562 detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num)
563 {
564  detection *dets = make_network_boxes(net, thresh, num);
565  fill_network_boxes(net, w, h, thresh, hier, map, relative, dets);
566  return dets;
567 }
568 
569 void free_detections(detection *dets, int n)
570 {
571  int i;
572  for(i = 0; i < n; ++i){
573  free(dets[i].prob);
574  if(dets[i].mask) free(dets[i].mask);
575  }
576  free(dets);
577 }
578 
580 {
581  image imr = letterbox_image(im, net->w, net->h);
582  set_batch_network(net, 1);
583  float *p = network_predict(net, imr.data);
584  free_image(imr);
585  return p;
586 }
587 
588 int network_width(network *net){return net->w;}
589 int network_height(network *net){return net->h;}
590 
592 {
593  int i,j,b,m;
594  int k = net->outputs;
595  matrix pred = make_matrix(test.X.rows, k);
596  float *X = calloc(net->batch*test.X.rows, sizeof(float));
597  for(i = 0; i < test.X.rows; i += net->batch){
598  for(b = 0; b < net->batch; ++b){
599  if(i+b == test.X.rows) break;
600  memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
601  }
602  for(m = 0; m < n; ++m){
603  float *out = network_predict(net, X);
604  for(b = 0; b < net->batch; ++b){
605  if(i+b == test.X.rows) break;
606  for(j = 0; j < k; ++j){
607  pred.vals[i+b][j] += out[j+b*k]/n;
608  }
609  }
610  }
611  }
612  free(X);
613  return pred;
614 }
615 
617 {
618  int i,j,b;
619  int k = net->outputs;
620  matrix pred = make_matrix(test.X.rows, k);
621  float *X = calloc(net->batch*test.X.cols, sizeof(float));
622  for(i = 0; i < test.X.rows; i += net->batch){
623  for(b = 0; b < net->batch; ++b){
624  if(i+b == test.X.rows) break;
625  memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
626  }
627  float *out = network_predict(net, X);
628  for(b = 0; b < net->batch; ++b){
629  if(i+b == test.X.rows) break;
630  for(j = 0; j < k; ++j){
631  pred.vals[i+b][j] = out[j+b*k];
632  }
633  }
634  }
635  free(X);
636  return pred;
637 }
638 
640 {
641  int i,j;
642  for(i = 0; i < net->n; ++i){
643  layer l = net->layers[i];
644  float *output = l.output;
645  int n = l.outputs;
646  float mean = mean_array(output, n);
647  float vari = variance_array(output, n);
648  fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
649  if(n > 100) n = 100;
650  for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]);
651  if(n == 100)fprintf(stderr,".....\n");
652  fprintf(stderr, "\n");
653  }
654 }
655 
656 void compare_networks(network *n1, network *n2, data test)
657 {
658  matrix g1 = network_predict_data(n1, test);
659  matrix g2 = network_predict_data(n2, test);
660  int i;
661  int a,b,c,d;
662  a = b = c = d = 0;
663  for(i = 0; i < g1.rows; ++i){
664  int truth = max_index(test.y.vals[i], test.y.cols);
665  int p1 = max_index(g1.vals[i], g1.cols);
666  int p2 = max_index(g2.vals[i], g2.cols);
667  if(p1 == truth){
668  if(p2 == truth) ++d;
669  else ++c;
670  }else{
671  if(p2 == truth) ++b;
672  else ++a;
673  }
674  }
675  printf("%5d %5d\n%5d %5d\n", a, b, c, d);
676  float num = pow((abs(b - c) - 1.), 2.);
677  float den = b + c;
678  printf("%f\n", num/den);
679 }
680 
682 {
683  matrix guess = network_predict_data(net, d);
684  float acc = matrix_topk_accuracy(d.y, guess,1);
685  free_matrix(guess);
686  return acc;
687 }
688 
689 float *network_accuracies(network *net, data d, int n)
690 {
691  static float acc[2];
692  matrix guess = network_predict_data(net, d);
693  acc[0] = matrix_topk_accuracy(d.y, guess, 1);
694  acc[1] = matrix_topk_accuracy(d.y, guess, n);
695  free_matrix(guess);
696  return acc;
697 }
698 
700 {
701  int i;
702  for(i = net->n - 1; i >= 0; --i){
703  if(net->layers[i].type != COST) break;
704  }
705  return net->layers[i];
706 }
707 
709 {
710  matrix guess = network_predict_data_multi(net, d, n);
711  float acc = matrix_topk_accuracy(d.y, guess,1);
712  free_matrix(guess);
713  return acc;
714 }
715 
717 {
718  int i;
719  for(i = 0; i < net->n; ++i){
720  free_layer(net->layers[i]);
721  }
722  free(net->layers);
723  if(net->input) free(net->input);
724  if(net->truth) free(net->truth);
725 #ifdef GPU
726  if(net->input_gpu) cuda_free(net->input_gpu);
727  if(net->truth_gpu) cuda_free(net->truth_gpu);
728 #endif
729  free(net);
730 }
731 
732 // Some day...
733 // ^ What the hell is this comment for?
734 
735 
737 {
738  int i;
739  for(i = net->n - 1; i >= 0; --i){
740  if(net->layers[i].type != COST) break;
741  }
742  return net->layers[i];
743 }
744 
746 {
747  return net->layers[0].inputs;
748 }
749 
751 {
752  return network_output_layer(net).outputs;
753 }
754 
756 {
757  return network_output_layer(net).output;
758 }
759 
760 #ifdef GPU
761 
762 void forward_network_gpu(network *netp)
763 {
764  network net = *netp;
766  cuda_push_array(net.input_gpu, net.input, net.inputs*net.batch);
767  if(net.truth){
768  cuda_push_array(net.truth_gpu, net.truth, net.truths*net.batch);
769  }
770 
771  int i;
772  for(i = 0; i < net.n; ++i){
773  net.index = i;
774  layer l = net.layers[i];
775  if(l.delta_gpu){
776  fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
777  }
778  l.forward_gpu(l, net);
779  net.input_gpu = l.output_gpu;
780  net.input = l.output;
781  if(l.truth) {
782  net.truth_gpu = l.output_gpu;
783  net.truth = l.output;
784  }
785  }
786  pull_network_output(netp);
787  calc_network_cost(netp);
788 }
789 
790 void backward_network_gpu(network *netp)
791 {
792  int i;
793  network net = *netp;
794  network orig = net;
796  for(i = net.n-1; i >= 0; --i){
797  layer l = net.layers[i];
798  if(l.stopbackward) break;
799  if(i == 0){
800  net = orig;
801  }else{
802  layer prev = net.layers[i-1];
803  net.input = prev.output;
804  net.delta = prev.delta;
805  net.input_gpu = prev.output_gpu;
806  net.delta_gpu = prev.delta_gpu;
807  }
808  net.index = i;
809  l.backward_gpu(l, net);
810  }
811 }
812 
813 void update_network_gpu(network *netp)
814 {
815  network net = *netp;
817  int i;
818  update_args a = {0};
819  a.batch = net.batch*net.subdivisions;
821  a.momentum = net.momentum;
822  a.decay = net.decay;
823  a.adam = net.adam;
824  a.B1 = net.B1;
825  a.B2 = net.B2;
826  a.eps = net.eps;
827  ++*net.t;
828  a.t = (*net.t);
829 
830  for(i = 0; i < net.n; ++i){
831  layer l = net.layers[i];
832  if(l.update_gpu){
833  l.update_gpu(l, a);
834  }
835  }
836 }
837 
838 void harmless_update_network_gpu(network *netp)
839 {
840  network net = *netp;
842  int i;
843  for(i = 0; i < net.n; ++i){
844  layer l = net.layers[i];
845  if(l.weight_updates_gpu) fill_gpu(l.nweights, 0, l.weight_updates_gpu, 1);
846  if(l.bias_updates_gpu) fill_gpu(l.nbiases, 0, l.bias_updates_gpu, 1);
847  if(l.scale_updates_gpu) fill_gpu(l.nbiases, 0, l.scale_updates_gpu, 1);
848  }
849 }
850 
851 typedef struct {
852  network *net;
853  data d;
854  float *err;
855 } train_args;
856 
857 void *train_thread(void *ptr)
858 {
859  train_args args = *(train_args*)ptr;
860  free(ptr);
861  cuda_set_device(args.net->gpu_index);
862  *args.err = train_network(args.net, args.d);
863  return 0;
864 }
865 
866 pthread_t train_network_in_thread(network *net, data d, float *err)
867 {
868  pthread_t thread;
869  train_args *ptr = (train_args *)calloc(1, sizeof(train_args));
870  ptr->net = net;
871  ptr->d = d;
872  ptr->err = err;
873  if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed");
874  return thread;
875 }
876 
877 void merge_weights(layer l, layer base)
878 {
879  if (l.type == CONVOLUTIONAL) {
880  axpy_cpu(l.n, 1, l.bias_updates, 1, base.biases, 1);
881  axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weights, 1);
882  if (l.scales) {
883  axpy_cpu(l.n, 1, l.scale_updates, 1, base.scales, 1);
884  }
885  } else if(l.type == CONNECTED) {
886  axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.biases, 1);
887  axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weights, 1);
888  }
889 }
890 
891 void scale_weights(layer l, float s)
892 {
893  if (l.type == CONVOLUTIONAL) {
894  scal_cpu(l.n, s, l.biases, 1);
895  scal_cpu(l.nweights, s, l.weights, 1);
896  if (l.scales) {
897  scal_cpu(l.n, s, l.scales, 1);
898  }
899  } else if(l.type == CONNECTED) {
900  scal_cpu(l.outputs, s, l.biases, 1);
901  scal_cpu(l.outputs*l.inputs, s, l.weights, 1);
902  }
903 }
904 
905 
906 void pull_weights(layer l)
907 {
908  if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
909  cuda_pull_array(l.biases_gpu, l.bias_updates, l.n);
910  cuda_pull_array(l.weights_gpu, l.weight_updates, l.nweights);
911  if(l.scales) cuda_pull_array(l.scales_gpu, l.scale_updates, l.n);
912  } else if(l.type == CONNECTED){
913  cuda_pull_array(l.biases_gpu, l.bias_updates, l.outputs);
914  cuda_pull_array(l.weights_gpu, l.weight_updates, l.outputs*l.inputs);
915  }
916 }
917 
918 void push_weights(layer l)
919 {
920  if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
921  cuda_push_array(l.biases_gpu, l.biases, l.n);
922  cuda_push_array(l.weights_gpu, l.weights, l.nweights);
923  if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n);
924  } else if(l.type == CONNECTED){
925  cuda_push_array(l.biases_gpu, l.biases, l.outputs);
926  cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs);
927  }
928 }
929 
930 void distribute_weights(layer l, layer base)
931 {
932  if (l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL) {
933  cuda_push_array(l.biases_gpu, base.biases, l.n);
934  cuda_push_array(l.weights_gpu, base.weights, l.nweights);
935  if (base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n);
936  } else if (l.type == CONNECTED) {
937  cuda_push_array(l.biases_gpu, base.biases, l.outputs);
938  cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs);
939  }
940 }
941 
942 
943 /*
944 
945  void pull_updates(layer l)
946  {
947  if(l.type == CONVOLUTIONAL){
948  cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n);
949  cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
950  if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n);
951  } else if(l.type == CONNECTED){
952  cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
953  cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
954  }
955  }
956 
957  void push_updates(layer l)
958  {
959  if(l.type == CONVOLUTIONAL){
960  cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n);
961  cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights);
962  if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n);
963  } else if(l.type == CONNECTED){
964  cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
965  cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs);
966  }
967  }
968 
969  void update_layer(layer l, network net)
970  {
971  int update_batch = net.batch*net.subdivisions;
972  float rate = get_current_rate(net);
973  l.t = get_current_batch(net);
974  if(l.update_gpu){
975  l.update_gpu(l, update_batch, rate*l.learning_rate_scale, net.momentum, net.decay);
976  }
977  }
978  void merge_updates(layer l, layer base)
979  {
980  if (l.type == CONVOLUTIONAL) {
981  axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1);
982  axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1);
983  if (l.scale_updates) {
984  axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1);
985  }
986  } else if(l.type == CONNECTED) {
987  axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1);
988  axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1);
989  }
990  }
991 
992  void distribute_updates(layer l, layer base)
993  {
994  if(l.type == CONVOLUTIONAL || l.type == DECONVOLUTIONAL){
995  cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n);
996  cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights);
997  if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n);
998  } else if(l.type == CONNECTED){
999  cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs);
1000  cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs);
1001  }
1002  }
1003  */
1004 
1005 /*
1006  void sync_layer(network *nets, int n, int j)
1007  {
1008  int i;
1009  network net = nets[0];
1010  layer base = net.layers[j];
1011  scale_weights(base, 0);
1012  for (i = 0; i < n; ++i) {
1013  cuda_set_device(nets[i].gpu_index);
1014  layer l = nets[i].layers[j];
1015  pull_weights(l);
1016  merge_weights(l, base);
1017  }
1018  scale_weights(base, 1./n);
1019  for (i = 0; i < n; ++i) {
1020  cuda_set_device(nets[i].gpu_index);
1021  layer l = nets[i].layers[j];
1022  distribute_weights(l, base);
1023  }
1024  }
1025  */
1026 
1027 void sync_layer(network **nets, int n, int j)
1028 {
1029  int i;
1030  network *net = nets[0];
1031  layer base = net->layers[j];
1032  scale_weights(base, 0);
1033  for (i = 0; i < n; ++i) {
1034  cuda_set_device(nets[i]->gpu_index);
1035  layer l = nets[i]->layers[j];
1036  pull_weights(l);
1037  merge_weights(l, base);
1038  }
1039  scale_weights(base, 1./n);
1040  for (i = 0; i < n; ++i) {
1041  cuda_set_device(nets[i]->gpu_index);
1042  layer l = nets[i]->layers[j];
1043  distribute_weights(l, base);
1044  }
1045 }
1046 
1047 typedef struct{
1048  network **nets;
1049  int n;
1050  int j;
1051 } sync_args;
1052 
1053 void *sync_layer_thread(void *ptr)
1054 {
1055  sync_args args = *(sync_args*)ptr;
1056  sync_layer(args.nets, args.n, args.j);
1057  free(ptr);
1058  return 0;
1059 }
1060 
1061 pthread_t sync_layer_in_thread(network **nets, int n, int j)
1062 {
1063  pthread_t thread;
1064  sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args));
1065  ptr->nets = nets;
1066  ptr->n = n;
1067  ptr->j = j;
1068  if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed");
1069  return thread;
1070 }
1071 
1072 void sync_nets(network **nets, int n, int interval)
1073 {
1074  int j;
1075  int layers = nets[0]->n;
1076  pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t));
1077 
1078  *(nets[0]->seen) += interval * (n-1) * nets[0]->batch * nets[0]->subdivisions;
1079  for (j = 0; j < n; ++j){
1080  *(nets[j]->seen) = *(nets[0]->seen);
1081  }
1082  for (j = 0; j < layers; ++j) {
1083  threads[j] = sync_layer_in_thread(nets, n, j);
1084  }
1085  for (j = 0; j < layers; ++j) {
1086  pthread_join(threads[j], 0);
1087  }
1088  free(threads);
1089 }
1090 
1091 float train_networks(network **nets, int n, data d, int interval)
1092 {
1093  int i;
1094  int batch = nets[0]->batch;
1095  int subdivisions = nets[0]->subdivisions;
1096  assert(batch * subdivisions * n == d.X.rows);
1097  pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t));
1098  float *errors = (float *) calloc(n, sizeof(float));
1099 
1100  float sum = 0;
1101  for(i = 0; i < n; ++i){
1102  data p = get_data_part(d, i, n);
1103  threads[i] = train_network_in_thread(nets[i], p, errors + i);
1104  }
1105  for(i = 0; i < n; ++i){
1106  pthread_join(threads[i], 0);
1107  //printf("%f\n", errors[i]);
1108  sum += errors[i];
1109  }
1110  //cudaDeviceSynchronize();
1111  if (get_current_batch(nets[0]) % interval == 0) {
1112  printf("Syncing... ");
1113  fflush(stdout);
1114  sync_nets(nets, n, interval);
1115  printf("Done!\n");
1116  }
1117  //cudaDeviceSynchronize();
1118  free(threads);
1119  free(errors);
1120  return (float)sum/(n);
1121 }
1122 
1123 void pull_network_output(network *net)
1124 {
1126  cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch);
1127 }
1128 
1129 #endif
Definition: darknet.h:82
size_t workspace_size
Definition: darknet.h:336
float momentum
Definition: darknet.h:104
void resize_normalization_layer(layer *layer, int w, int h)
float hue
Definition: darknet.h:576
int min_crop
Definition: darknet.h:470
float * scales
Definition: darknet.h:239
float decay
Definition: darknet.h:447
image * visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights)
void get_random_batch(data d, int n, float *X, float *y)
Definition: data.c:1449
float * biases
Definition: darknet.h:236
float train_network_sgd(network *net, data d, int n)
Definition: network.c:300
int batch
Definition: darknet.h:436
void visualize_network(network *net)
Definition: network.c:477
float temperature
Definition: darknet.h:210
void reset_rnn(network *net)
Definition: network.c:85
int rows
Definition: darknet.h:533
float * weight_updates
Definition: darknet.h:243
float variance_array(float *a, int n)
Definition: utils.c:514
int w
Definition: darknet.h:140
int network_width(network *net)
Definition: network.c:588
Definition: darknet.h:87
matrix network_predict_data_multi(network *net, data test, int n)
Definition: network.c:591
int n
Definition: darknet.h:142
int truths
Definition: darknet.h:139
int w
Definition: darknet.h:559
float learning_rate
Definition: darknet.h:445
float get_current_rate(network *net)
Definition: network.c:90
int cols
Definition: darknet.h:533
void resize_shortcut_layer(layer *l, int w, int h)
int max_index(float *a, int n)
Definition: utils.c:619
float hue
Definition: darknet.h:478
float momentum
Definition: darknet.h:446
void(* update)(struct layer, update_args)
Definition: darknet.h:125
void(* forward_gpu)(struct layer, struct network)
Definition: darknet.h:126
void free_network(network *net)
Definition: network.c:716
image get_network_image(network *net)
Definition: network.c:466
int network_inputs(network *net)
Definition: network.c:745
float * network_predict(network *net, float *input)
Definition: network.c:497
void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets)
Definition: region_layer.c:364
int max
Definition: darknet.h:565
float learning_rate
Definition: darknet.h:103
int step
Definition: darknet.h:452
float * truth
Definition: darknet.h:485
float aspect
Definition: darknet.h:573
Definition: darknet.h:73
matrix network_predict_data(network *net, data test)
Definition: network.c:616
void free_detections(detection *dets, int n)
Definition: network.c:569
void(* backward_gpu)(struct layer, struct network)
Definition: darknet.h:127
int yolo_num_detections(layer l, float thresh)
Definition: yolo_layer.c:275
size_t * seen
Definition: darknet.h:437
int gpu_index
Definition: darknet.h:481
int stopbackward
Definition: darknet.h:204
void print_network(network *net)
Definition: network.c:639
void top_k(float *a, int n, int k, int *index)
Definition: utils.c:237
void resize_reorg_layer(layer *l, int w, int h)
Definition: reorg_layer.c:58
void(* update_gpu)(struct layer, update_args)
Definition: darknet.h:128
int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets)
Definition: yolo_layer.c:316
float decay
Definition: darknet.h:105
float scale
Definition: darknet.h:449
Definition: darknet.h:512
void resize_avgpool_layer(avgpool_layer *l, int w, int h)
Definition: avgpool_layer.c:33
layer get_network_detection_layer(network *net)
Definition: network.c:440
void(* forward)(struct layer, struct network)
Definition: darknet.h:123
int out_w
Definition: darknet.h:141
int h
Definition: darknet.h:558
int max_crop
Definition: darknet.h:469
image float_to_image(int w, int h, int c, float *data)
Definition: image.c:774
void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets)
Definition: network.c:542
int size
Definition: darknet.h:565
network * parse_network_cfg(char *filename)
Definition: parser.c:742
float * delta
Definition: darknet.h:486
detection * get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num)
Definition: network.c:562
float B1
Definition: darknet.h:107
int nweights
Definition: darknet.h:136
int burn_in
Definition: darknet.h:457
LAYER_TYPE
Definition: darknet.h:64
int out_c
Definition: darknet.h:141
float exposure
Definition: darknet.h:575
Definition: darknet.h:79
void fill_gpu(int N, float ALPHA, float *X, int INCX)
int * steps
Definition: darknet.h:455
size_t get_current_batch(network *net)
Definition: network.c:63
void get_detection_detections(layer l, int w, int h, float thresh, detection *dets)
int h
Definition: darknet.h:514
int * t
Definition: darknet.h:438
int max_batches
Definition: darknet.h:453
int center
Definition: darknet.h:569
float * workspace
Definition: darknet.h:487
image get_network_image_layer(network *net, int i)
Definition: network.c:453
void update_network(network *netp)
Definition: network.c:213
letterbox_image
Definition: darknet.py:98
float network_accuracy(network *net, data d)
Definition: network.c:681
Definition: darknet.h:431
Definition: darknet.h:74
float * network_predict_image(network *net, image im)
Definition: network.c:579
int resize_network(network *net, int w, int h)
Definition: network.c:358
void backward_network(network *netp)
Definition: network.c:263
float aspect
Definition: darknet.h:475
void fill_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:190
int batch
Definition: darknet.h:102
int center
Definition: darknet.h:473
layer * layers
Definition: darknet.h:441
float B1
Definition: darknet.h:460
void resize_yolo_layer(layer *l, int w, int h)
Definition: yolo_layer.c:63
float eps
Definition: darknet.h:462
int train
Definition: darknet.h:488
Definition: darknet.h:431
free_image
Definition: darknet.py:95
float * cost
Definition: darknet.h:490
float mean_array(float *a, int n)
Definition: utils.c:487
network * load_network(char *cfg, char *weights, int clear)
Definition: network.c:53
int subdivisions
Definition: darknet.h:440
int h
Definition: darknet.h:140
Definition: darknet.h:431
float * delta
Definition: darknet.h:245
int out_h
Definition: darknet.h:141
void resize_maxpool_layer(maxpool_layer *l, int w, int h)
Definition: maxpool_layer.c:54
Definition: darknet.h:80
float * scales
Definition: darknet.h:454
void resize_region_layer(layer *l, int w, int h)
Definition: region_layer.c:56
int inputs
Definition: darknet.h:134
void get_next_batch(data d, int n, int offset, float *X, float *y)
Definition: data.c:1459
int adam
Definition: darknet.h:106
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
void(* backward)(struct layer, struct network)
Definition: darknet.h:124
int network_outputs(network *net)
Definition: network.c:750
void resize_cost_layer(cost_layer *l, int inputs)
Definition: cost_layer.c:68
Definition: darknet.h:77
float * prob
Definition: darknet.h:526
int batch
Definition: darknet.h:131
float B2
Definition: darknet.h:461
int num_detections(network *net, float thresh)
Definition: network.c:510
float * output
Definition: darknet.h:246
void scal_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:184
float saturation
Definition: darknet.h:477
int index
Definition: darknet.h:489
void resize_crop_layer(layer *l, int w, int h)
Definition: crop_layer.c:48
Definition: darknet.h:88
void resize_convolutional_layer(convolutional_layer *l, int w, int h)
float saturation
Definition: darknet.h:574
void top_predictions(network *net, int k, int *index)
Definition: network.c:491
Definition: darknet.h:431
data get_data_part(data d, int part, int total)
Definition: data.c:1613
int truth
Definition: darknet.h:159
layer network_output_layer(network *net)
Definition: network.c:736
Definition: darknet.h:83
float * bias_updates
Definition: darknet.h:237
void free_matrix(matrix m)
Definition: matrix.c:10
int truths
Definition: darknet.h:466
float train_network(network *net, data d)
Definition: network.c:314
void free_layer(layer)
Definition: layer.c:6
void cuda_set_device(int n)
Definition: cuda.c:176
void reset_network_state(network *net, int b)
Definition: network.c:69
void set_batch_network(network *net, int b)
Definition: network.c:339
int coords
Definition: darknet.h:173
int min
Definition: darknet.h:565
float eps
Definition: darknet.h:109
int get_predicted_class_network(network *net)
Definition: network.c:258
matrix X
Definition: darknet.h:540
int num_steps
Definition: darknet.h:456
Definition: darknet.h:90
void resize_upsample_layer(layer *l, int w, int h)
float B2
Definition: darknet.h:108
load_args get_base_args(network *net)
Definition: network.c:35
int classes
Definition: darknet.h:172
float * mask
Definition: darknet.h:527
int network_height(network *net)
Definition: network.c:589
int gpu_index
Definition: cuda.c:1
int outputs
Definition: darknet.h:465
LAYER_TYPE type
Definition: darknet.h:120
float * input
Definition: darknet.h:484
float * scale_updates
Definition: darknet.h:240
float ** vals
Definition: darknet.h:534
void compare_networks(network *n1, network *n2, data test)
Definition: network.c:656
layer get_network_output_layer(network *net)
Definition: network.c:699
matrix make_matrix(int rows, int cols)
Definition: matrix.c:91
int h
Definition: darknet.h:468
int n
Definition: darknet.h:435
float * network_output(network *net)
Definition: network.c:755
detection * make_network_boxes(network *net, float thresh, int *num)
Definition: network.c:526
network * make_network(int n)
Definition: network.c:177
float angle
Definition: darknet.h:572
float matrix_topk_accuracy(matrix truth, matrix guess, int k)
Definition: matrix.c:17
float * output
Definition: darknet.h:442
Definition: darknet.h:81
int outputs
Definition: darknet.h:135
int nbiases
Definition: darknet.h:137
float network_accuracy_multi(network *net, data d, int n)
Definition: network.c:708
void set_temp_network(network *net, float t)
Definition: network.c:330
int adam
Definition: darknet.h:459
void resize_route_layer(route_layer *l, network *net)
Definition: route_layer.c:40
Definition: darknet.h:72
float rand_uniform(float min, float max)
Definition: utils.c:698
void calc_network_cost(network *netp)
Definition: network.c:243
float train_network_datum(network *net)
Definition: network.c:289
float * network_accuracies(network *net, data d, int n)
Definition: network.c:689
Definition: darknet.h:431
void error(const char *s)
Definition: utils.c:253
learning_rate_policy policy
Definition: darknet.h:443
float power
Definition: darknet.h:450
void load_weights(network *net, char *filename)
Definition: parser.c:1308
int w
Definition: darknet.h:468
Definition: darknet.h:538
float * cost
Definition: darknet.h:222
int inputs
Definition: darknet.h:464
Definition: darknet.h:119
char * get_layer_string(LAYER_TYPE a)
Definition: network.c:122
float * data
Definition: darknet.h:516
matrix y
Definition: darknet.h:541
float exposure
Definition: darknet.h:476
float * weights
Definition: darknet.h:242
float gamma
Definition: darknet.h:448
void forward_network(network *netp)
Definition: network.c:188
float angle
Definition: darknet.h:474