darknet  v3
attention.c
Go to the documentation of this file.
1 #include "darknet.h"
2 
3 #include <sys/time.h>
4 #include <assert.h>
5 
6 void extend_data_truth(data *d, int n, float val)
7 {
8  int i, j;
9  for(i = 0; i < d->y.rows; ++i){
10  d->y.vals[i] = realloc(d->y.vals[i], (d->y.cols+n)*sizeof(float));
11  for(j = 0; j < n; ++j){
12  d->y.vals[i][d->y.cols + j] = val;
13  }
14  }
15  d->y.cols += n;
16 }
17 
19 {
20  int i,b;
21  int k = 1;
22  matrix pred = make_matrix(test.X.rows, k);
23  float *X = calloc(net->batch*test.X.cols, sizeof(float));
24  float *y = calloc(net->batch*test.y.cols, sizeof(float));
25  for(i = 0; i < test.X.rows; i += net->batch){
26  for(b = 0; b < net->batch; ++b){
27  if(i+b == test.X.rows) break;
28  memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
29  memcpy(y+b*test.y.cols, test.y.vals[i+b], test.y.cols*sizeof(float));
30  }
31 
32  network orig = *net;
33  net->input = X;
34  net->truth = y;
35  net->train = 0;
36  net->delta = 0;
37  forward_network(net);
38  *net = orig;
39 
40  float *delta = net->layers[net->n-1].output;
41  for(b = 0; b < net->batch; ++b){
42  if(i+b == test.X.rows) break;
43  int t = max_index(y + b*test.y.cols, 1000);
44  float err = sum_array(delta + b*net->outputs, net->outputs);
45  pred.vals[i+b][0] = -err;
46  //pred.vals[i+b][0] = 1-delta[b*net->outputs + t];
47  }
48  }
49  free(X);
50  free(y);
51  return pred;
52 }
53 
54 void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
55 {
56  int i, j;
57 
58  float avg_cls_loss = -1;
59  float avg_att_loss = -1;
60  char *base = basecfg(cfgfile);
61  printf("%s\n", base);
62  printf("%d\n", ngpus);
63  network **nets = calloc(ngpus, sizeof(network*));
64 
65  srand(time(0));
66  int seed = rand();
67  for(i = 0; i < ngpus; ++i){
68  srand(seed);
69 #ifdef GPU
70  cuda_set_device(gpus[i]);
71 #endif
72  nets[i] = load_network(cfgfile, weightfile, clear);
73  nets[i]->learning_rate *= ngpus;
74  }
75  srand(time(0));
76  network *net = nets[0];
77 
78  int imgs = net->batch * net->subdivisions * ngpus;
79 
80  printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
81  list *options = read_data_cfg(datacfg);
82 
83  char *backup_directory = option_find_str(options, "backup", "/backup/");
84  char *label_list = option_find_str(options, "labels", "data/labels.list");
85  char *train_list = option_find_str(options, "train", "data/train.list");
86  int classes = option_find_int(options, "classes", 2);
87 
88  char **labels = get_labels(label_list);
89  list *plist = get_paths(train_list);
90  char **paths = (char **)list_to_array(plist);
91  printf("%d\n", plist->size);
92  int N = plist->size;
93  double time;
94 
95  int divs=3;
96  int size=2;
97 
98  load_args args = {0};
99  args.w = divs*net->w/size;
100  args.h = divs*net->h/size;
101  args.size = divs*net->w/size;
102  args.threads = 32;
103  args.hierarchy = net->hierarchy;
104 
105  args.min = net->min_ratio*args.w;
106  args.max = net->max_ratio*args.w;
107  args.angle = net->angle;
108  args.aspect = net->aspect;
109  args.exposure = net->exposure;
110  args.saturation = net->saturation;
111  args.hue = net->hue;
112 
113  args.paths = paths;
114  args.classes = classes;
115  args.n = imgs;
116  args.m = N;
117  args.labels = labels;
118  args.type = CLASSIFICATION_DATA;
119 
120  data train;
121  data buffer;
122  pthread_t load_thread;
123  args.d = &buffer;
124  load_thread = load_data(args);
125 
126  int epoch = (*net->seen)/N;
127  while(get_current_batch(net) < net->max_batches || net->max_batches == 0){
128  time = what_time_is_it_now();
129 
130  pthread_join(load_thread, 0);
131  train = buffer;
132  load_thread = load_data(args);
133  data resized = resize_data(train, net->w, net->h);
134  extend_data_truth(&resized, divs*divs, 0);
135  data *tiles = tile_data(train, divs, size);
136 
137  printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
138  time = what_time_is_it_now();
139 
140  float aloss = 0;
141  float closs = 0;
142  int z;
143  for (i = 0; i < divs*divs/ngpus; ++i) {
144 #pragma omp parallel for
145  for(j = 0; j < ngpus; ++j){
146  int index = i*ngpus + j;
147  extend_data_truth(tiles+index, divs*divs, SECRET_NUM);
148  matrix deltas = network_loss_data(nets[j], tiles[index]);
149  for(z = 0; z < resized.y.rows; ++z){
150  resized.y.vals[z][train.y.cols + index] = deltas.vals[z][0];
151  }
152  free_matrix(deltas);
153  }
154  }
155  int *inds = calloc(resized.y.rows, sizeof(int));
156  for(z = 0; z < resized.y.rows; ++z){
157  int index = max_index(resized.y.vals[z] + train.y.cols, divs*divs);
158  inds[z] = index;
159  for(i = 0; i < divs*divs; ++i){
160  resized.y.vals[z][train.y.cols + i] = (i == index)? 1 : 0;
161  }
162  }
163  data best = select_data(tiles, inds);
164  free(inds);
165  #ifdef GPU
166  if (ngpus == 1) {
167  closs = train_network(net, best);
168  } else {
169  closs = train_networks(nets, ngpus, best, 4);
170  }
171  #endif
172  for (i = 0; i < divs*divs; ++i) {
173  printf("%.2f ", resized.y.vals[0][train.y.cols + i]);
174  if((i+1)%divs == 0) printf("\n");
175  free_data(tiles[i]);
176  }
177  free_data(best);
178  printf("\n");
179  image im = float_to_image(64,64,3,resized.X.vals[0]);
180  //show_image(im, "orig");
181  //cvWaitKey(100);
182  /*
183  image im1 = float_to_image(64,64,3,tiles[i].X.vals[0]);
184  image im2 = float_to_image(64,64,3,resized.X.vals[0]);
185  show_image(im1, "tile");
186  show_image(im2, "res");
187  */
188 #ifdef GPU
189  if (ngpus == 1) {
190  aloss = train_network(net, resized);
191  } else {
192  aloss = train_networks(nets, ngpus, resized, 4);
193  }
194 #endif
195  for(i = 0; i < divs*divs; ++i){
196  printf("%f ", nets[0]->output[1000 + i]);
197  if ((i+1) % divs == 0) printf("\n");
198  }
199  printf("\n");
200 
201  free_data(resized);
202  free_data(train);
203  if(avg_cls_loss == -1) avg_cls_loss = closs;
204  if(avg_att_loss == -1) avg_att_loss = aloss;
205  avg_cls_loss = avg_cls_loss*.9 + closs*.1;
206  avg_att_loss = avg_att_loss*.9 + aloss*.1;
207 
208  printf("%ld, %.3f: Att: %f, %f avg, Class: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net->seen)/N, aloss, avg_att_loss, closs, avg_cls_loss, get_current_rate(net), what_time_is_it_now()-time, *net->seen);
209  if(*net->seen/N > epoch){
210  epoch = *net->seen/N;
211  char buff[256];
212  sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
213  save_weights(net, buff);
214  }
215  if(get_current_batch(net)%1000 == 0){
216  char buff[256];
217  sprintf(buff, "%s/%s.backup",backup_directory,base);
218  save_weights(net, buff);
219  }
220  }
221  char buff[256];
222  sprintf(buff, "%s/%s.weights", backup_directory, base);
223  save_weights(net, buff);
224  pthread_join(load_thread, 0);
225 
226  free_network(net);
227  free_ptrs((void**)labels, classes);
228  free_ptrs((void**)paths, plist->size);
229  free_list(plist);
230  free(base);
231 }
232 
233 void validate_attention_single(char *datacfg, char *filename, char *weightfile)
234 {
235  int i, j;
236  network *net = load_network(filename, weightfile, 0);
237  set_batch_network(net, 1);
238  srand(time(0));
239 
240  list *options = read_data_cfg(datacfg);
241 
242  char *label_list = option_find_str(options, "labels", "data/labels.list");
243  char *leaf_list = option_find_str(options, "leaves", 0);
244  if(leaf_list) change_leaves(net->hierarchy, leaf_list);
245  char *valid_list = option_find_str(options, "valid", "data/train.list");
246  int classes = option_find_int(options, "classes", 2);
247  int topk = option_find_int(options, "top", 1);
248 
249  char **labels = get_labels(label_list);
250  list *plist = get_paths(valid_list);
251 
252  char **paths = (char **)list_to_array(plist);
253  int m = plist->size;
254  free_list(plist);
255 
256  float avg_acc = 0;
257  float avg_topk = 0;
258  int *indexes = calloc(topk, sizeof(int));
259  int divs = 4;
260  int size = 2;
261  int extra = 0;
262  float *avgs = calloc(classes, sizeof(float));
263  int *inds = calloc(divs*divs, sizeof(int));
264 
265  for(i = 0; i < m; ++i){
266  int class = -1;
267  char *path = paths[i];
268  for(j = 0; j < classes; ++j){
269  if(strstr(path, labels[j])){
270  class = j;
271  break;
272  }
273  }
274  image im = load_image_color(paths[i], 0, 0);
275  image resized = resize_min(im, net->w*divs/size);
276  image crop = crop_image(resized, (resized.w - net->w*divs/size)/2, (resized.h - net->h*divs/size)/2, net->w*divs/size, net->h*divs/size);
277  image rcrop = resize_image(crop, net->w, net->h);
278  //show_image(im, "orig");
279  //show_image(crop, "cropped");
280  //cvWaitKey(0);
281  float *pred = network_predict(net, rcrop.data);
282  //pred[classes + 56] = 0;
283  for(j = 0; j < divs*divs; ++j){
284  printf("%.2f ", pred[classes + j]);
285  if((j+1)%divs == 0) printf("\n");
286  }
287  printf("\n");
288  copy_cpu(classes, pred, 1, avgs, 1);
289  top_k(pred + classes, divs*divs, divs*divs, inds);
290  show_image(crop, "crop");
291  for(j = 0; j < extra; ++j){
292  int index = inds[j];
293  int row = index / divs;
294  int col = index % divs;
295  int y = row * crop.h / divs - (net->h - crop.h/divs)/2;
296  int x = col * crop.w / divs - (net->w - crop.w/divs)/2;
297  printf("%d %d %d %d\n", row, col, y, x);
298  image tile = crop_image(crop, x, y, net->w, net->h);
299  float *pred = network_predict(net, tile.data);
300  axpy_cpu(classes, 1., pred, 1, avgs, 1);
301  show_image(tile, "tile");
302  //cvWaitKey(10);
303  }
304  if(net->hierarchy) hierarchy_predictions(pred, net->outputs, net->hierarchy, 1, 1);
305 
306  if(rcrop.data != resized.data) free_image(rcrop);
307  if(resized.data != im.data) free_image(resized);
308  free_image(im);
309  free_image(crop);
310  top_k(pred, classes, topk, indexes);
311 
312  if(indexes[0] == class) avg_acc += 1;
313  for(j = 0; j < topk; ++j){
314  if(indexes[j] == class) avg_topk += 1;
315  }
316 
317  printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
318  }
319 }
320 
321 void validate_attention_multi(char *datacfg, char *filename, char *weightfile)
322 {
323  int i, j;
324  network *net = load_network(filename, weightfile, 0);
325  set_batch_network(net, 1);
326  srand(time(0));
327 
328  list *options = read_data_cfg(datacfg);
329 
330  char *label_list = option_find_str(options, "labels", "data/labels.list");
331  char *valid_list = option_find_str(options, "valid", "data/train.list");
332  int classes = option_find_int(options, "classes", 2);
333  int topk = option_find_int(options, "top", 1);
334 
335  char **labels = get_labels(label_list);
336  list *plist = get_paths(valid_list);
337  int scales[] = {224, 288, 320, 352, 384};
338  int nscales = sizeof(scales)/sizeof(scales[0]);
339 
340  char **paths = (char **)list_to_array(plist);
341  int m = plist->size;
342  free_list(plist);
343 
344  float avg_acc = 0;
345  float avg_topk = 0;
346  int *indexes = calloc(topk, sizeof(int));
347 
348  for(i = 0; i < m; ++i){
349  int class = -1;
350  char *path = paths[i];
351  for(j = 0; j < classes; ++j){
352  if(strstr(path, labels[j])){
353  class = j;
354  break;
355  }
356  }
357  float *pred = calloc(classes, sizeof(float));
358  image im = load_image_color(paths[i], 0, 0);
359  for(j = 0; j < nscales; ++j){
360  image r = resize_min(im, scales[j]);
361  resize_network(net, r.w, r.h);
362  float *p = network_predict(net, r.data);
363  if(net->hierarchy) hierarchy_predictions(p, net->outputs, net->hierarchy, 1 , 1);
364  axpy_cpu(classes, 1, p, 1, pred, 1);
365  flip_image(r);
366  p = network_predict(net, r.data);
367  axpy_cpu(classes, 1, p, 1, pred, 1);
368  if(r.data != im.data) free_image(r);
369  }
370  free_image(im);
371  top_k(pred, classes, topk, indexes);
372  free(pred);
373  if(indexes[0] == class) avg_acc += 1;
374  for(j = 0; j < topk; ++j){
375  if(indexes[j] == class) avg_topk += 1;
376  }
377 
378  printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1));
379  }
380 }
381 
382 void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top)
383 {
384  network *net = load_network(cfgfile, weightfile, 0);
385  set_batch_network(net, 1);
386  srand(2222222);
387 
388  list *options = read_data_cfg(datacfg);
389 
390  char *name_list = option_find_str(options, "names", 0);
391  if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
392  if(top == 0) top = option_find_int(options, "top", 1);
393 
394  int i = 0;
395  char **names = get_labels(name_list);
396  clock_t time;
397  int *indexes = calloc(top, sizeof(int));
398  char buff[256];
399  char *input = buff;
400  while(1){
401  if(filename){
402  strncpy(input, filename, 256);
403  }else{
404  printf("Enter Image Path: ");
405  fflush(stdout);
406  input = fgets(input, 256, stdin);
407  if(!input) return;
408  strtok(input, "\n");
409  }
410  image im = load_image_color(input, 0, 0);
411  image r = letterbox_image(im, net->w, net->h);
412  //resize_network(&net, r.w, r.h);
413  //printf("%d %d\n", r.w, r.h);
414 
415  float *X = r.data;
416  time=clock();
417  float *predictions = network_predict(net, X);
418  if(net->hierarchy) hierarchy_predictions(predictions, net->outputs, net->hierarchy, 1, 1);
419  top_k(predictions, net->outputs, top, indexes);
420  fprintf(stderr, "%s: Predicted in %f seconds.\n", input, sec(clock()-time));
421  for(i = 0; i < top; ++i){
422  int index = indexes[i];
423  //if(net->hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net->hierarchy->parent[index] >= 0) ? names[net->hierarchy->parent[index]] : "Root");
424  //else printf("%s: %f\n",names[index], predictions[index]);
425  printf("%5.2f%%: %s\n", predictions[index]*100, names[index]);
426  }
427  if(r.data != im.data) free_image(r);
428  free_image(im);
429  if (filename) break;
430  }
431 }
432 
433 
434 void run_attention(int argc, char **argv)
435 {
436  if(argc < 4){
437  fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
438  return;
439  }
440 
441  char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
442  int ngpus;
443  int *gpus = read_intlist(gpu_list, &ngpus, gpu_index);
444 
445 
446  int top = find_int_arg(argc, argv, "-t", 0);
447  int clear = find_arg(argc, argv, "-clear");
448  char *data = argv[3];
449  char *cfg = argv[4];
450  char *weights = (argc > 5) ? argv[5] : 0;
451  char *filename = (argc > 6) ? argv[6]: 0;
452  char *layer_s = (argc > 7) ? argv[7]: 0;
453  if(0==strcmp(argv[2], "predict")) predict_attention(data, cfg, weights, filename, top);
454  else if(0==strcmp(argv[2], "train")) train_attention(data, cfg, weights, gpus, ngpus, clear);
455  else if(0==strcmp(argv[2], "valid")) validate_attention_single(data, cfg, weights);
456  else if(0==strcmp(argv[2], "validmulti")) validate_attention_multi(data, cfg, weights);
457 }
458 
459 
void train_attention(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
Definition: attention.c:54
float min_ratio
Definition: darknet.h:472
data select_data(data *orig, int *inds)
Definition: data.c:1270
float hue
Definition: darknet.h:576
float decay
Definition: darknet.h:447
data * tile_data(data orig, int divs, int size)
Definition: data.c:1293
char ** paths
Definition: darknet.h:553
image resize_image(image im, int w, int h)
Definition: image.c:1351
int batch
Definition: darknet.h:436
int rows
Definition: darknet.h:533
int find_arg(int argc, char *argv[], char *arg)
Definition: utils.c:120
data resize_data(data orig, int w, int h)
Definition: data.c:1320
void set_batch_network(network *net, int b)
Definition: network.c:339
int w
Definition: darknet.h:559
float learning_rate
Definition: darknet.h:445
int cols
Definition: darknet.h:533
int max_index(float *a, int n)
Definition: utils.c:619
float hue
Definition: darknet.h:478
float momentum
Definition: darknet.h:446
void free_data(data d)
Definition: data.c:665
matrix network_loss_data(network *net, data test)
Definition: attention.c:18
int max
Definition: darknet.h:565
float * truth
Definition: darknet.h:485
int show_image(image p, const char *name, int ms)
Definition: image.c:575
char * find_char_arg(int argc, char **argv, char *arg, char *def)
Definition: utils.c:163
float aspect
Definition: darknet.h:573
char * basecfg(char *cfgfile)
Definition: utils.c:179
void change_leaves(tree *t, char *leaf_list)
Definition: tree.c:7
void ** list_to_array(list *l)
Definition: list.c:82
size_t * seen
Definition: darknet.h:437
char * option_find_str(list *l, char *key, char *def)
Definition: option_list.c:104
float train_network(network *net, data d)
Definition: network.c:314
void top_k(float *a, int n, int k, int *index)
Definition: utils.c:237
int size
Definition: darknet.h:603
void free_list(list *l)
Definition: list.c:67
Definition: darknet.h:512
float max_ratio
Definition: darknet.h:471
int h
Definition: darknet.h:558
void free_network(network *net)
Definition: network.c:716
void flip_image(image a)
Definition: image.c:349
data_type type
Definition: darknet.h:580
image float_to_image(int w, int h, int c, float *data)
Definition: image.c:774
void save_weights(network *net, char *filename)
Definition: parser.c:1080
int size
Definition: darknet.h:565
float * delta
Definition: darknet.h:486
network_predict
Definition: darknet.py:79
float exposure
Definition: darknet.h:575
int h
Definition: darknet.h:514
int max_batches
Definition: darknet.h:453
void run_attention(int argc, char **argv)
Definition: attention.c:434
int resize_network(network *net, int w, int h)
Definition: network.c:358
image resize_min(image im, int min)
Definition: image.c:1001
letterbox_image
Definition: darknet.py:98
float aspect
Definition: darknet.h:475
int threads
Definition: darknet.h:552
int m
Definition: darknet.h:556
layer * layers
Definition: darknet.h:441
int * read_intlist(char *s, int *n, int d)
Definition: utils.c:36
int train
Definition: darknet.h:488
data * d
Definition: darknet.h:577
free_image
Definition: darknet.py:95
int subdivisions
Definition: darknet.h:440
void forward_network(network *net)
Definition: network.c:188
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
image load_image_color(char *filename, int w, int h)
Definition: image.c:1486
int classes
Definition: darknet.h:566
float get_current_rate(network *net)
Definition: network.c:90
tree * hierarchy
Definition: darknet.h:581
float * output
Definition: darknet.h:246
void extend_data_truth(data *d, int n, float val)
Definition: attention.c:6
float saturation
Definition: darknet.h:477
float sum_array(float *a, int n)
Definition: utils.c:479
float sec(clock_t clocks)
Definition: utils.c:232
float saturation
Definition: darknet.h:574
int find_int_arg(int argc, char **argv, char *arg, int def)
Definition: utils.c:133
network * load_network(char *cfg, char *weights, int clear)
Definition: network.c:53
char ** get_labels(char *filename)
Definition: data.c:657
int n
Definition: darknet.h:555
image crop_image(image im, int dx, int dy, int w, int h)
Definition: image.c:861
void predict_attention(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top)
Definition: attention.c:382
void free_matrix(matrix m)
Definition: matrix.c:10
void * load_thread(void *ptr)
Definition: data.c:1090
int w
Definition: darknet.h:513
void cuda_set_device(int n)
Definition: cuda.c:176
Definition: darknet.h:602
int min
Definition: darknet.h:565
list * read_data_cfg(char *filename)
Definition: option_list.c:7
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:226
char ** labels
Definition: darknet.h:557
matrix X
Definition: darknet.h:540
tree * hierarchy
Definition: darknet.h:482
int gpu_index
Definition: cuda.c:1
int outputs
Definition: darknet.h:465
#define SECRET_NUM
Definition: darknet.h:8
float * input
Definition: darknet.h:484
size_t get_current_batch(network *net)
Definition: network.c:63
float ** vals
Definition: darknet.h:534
matrix make_matrix(int rows, int cols)
Definition: matrix.c:91
int h
Definition: darknet.h:468
int n
Definition: darknet.h:435
float angle
Definition: darknet.h:572
list * get_paths(char *filename)
Definition: data.c:12
free_ptrs
Definition: darknet.py:76
pthread_t load_data(load_args args)
Definition: data.c:1180
int option_find_int(list *l, char *key, int def)
Definition: option_list.c:112
void validate_attention_multi(char *datacfg, char *filename, char *weightfile)
Definition: attention.c:321
int w
Definition: darknet.h:468
list classes
Definition: voc_label.py:9
Definition: darknet.h:538
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride)
Definition: tree.c:37
double what_time_is_it_now()
Definition: utils.c:27
float * data
Definition: darknet.h:516
float exposure
Definition: darknet.h:476
matrix y
Definition: darknet.h:541
void validate_attention_single(char *datacfg, char *filename, char *weightfile)
Definition: attention.c:233
float angle
Definition: darknet.h:474