darknet  v3
yolo_layer.c
Go to the documentation of this file.
1 #include "yolo_layer.h"
2 #include "activations.h"
3 #include "blas.h"
4 #include "box.h"
5 #include "cuda.h"
6 #include "utils.h"
7 
8 #include <stdio.h>
9 #include <assert.h>
10 #include <string.h>
11 #include <stdlib.h>
12 
13 layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes)
14 {
15  int i;
16  layer l = {0};
17  l.type = YOLO;
18 
19  l.n = n;
20  l.total = total;
21  l.batch = batch;
22  l.h = h;
23  l.w = w;
24  l.c = n*(classes + 4 + 1);
25  l.out_w = l.w;
26  l.out_h = l.h;
27  l.out_c = l.c;
28  l.classes = classes;
29  l.cost = calloc(1, sizeof(float));
30  l.biases = calloc(total*2, sizeof(float));
31  if(mask) l.mask = mask;
32  else{
33  l.mask = calloc(n, sizeof(int));
34  for(i = 0; i < n; ++i){
35  l.mask[i] = i;
36  }
37  }
38  l.bias_updates = calloc(n*2, sizeof(float));
39  l.outputs = h*w*n*(classes + 4 + 1);
40  l.inputs = l.outputs;
41  l.truths = 90*(4 + 1);
42  l.delta = calloc(batch*l.outputs, sizeof(float));
43  l.output = calloc(batch*l.outputs, sizeof(float));
44  for(i = 0; i < total*2; ++i){
45  l.biases[i] = .5;
46  }
47 
50 #ifdef GPU
51  l.forward_gpu = forward_yolo_layer_gpu;
52  l.backward_gpu = backward_yolo_layer_gpu;
53  l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
54  l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
55 #endif
56 
57  fprintf(stderr, "yolo\n");
58  srand(0);
59 
60  return l;
61 }
62 
63 void resize_yolo_layer(layer *l, int w, int h)
64 {
65  l->w = w;
66  l->h = h;
67 
68  l->outputs = h*w*l->n*(l->classes + 4 + 1);
69  l->inputs = l->outputs;
70 
71  l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
72  l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));
73 
74 #ifdef GPU
75  cuda_free(l->delta_gpu);
76  cuda_free(l->output_gpu);
77 
78  l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs);
79  l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
80 #endif
81 }
82 
83 box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
84 {
85  box b;
86  b.x = (i + x[index + 0*stride]) / lw;
87  b.y = (j + x[index + 1*stride]) / lh;
88  b.w = exp(x[index + 2*stride]) * biases[2*n] / w;
89  b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
90  return b;
91 }
92 
93 float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride)
94 {
95  box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride);
96  float iou = box_iou(pred, truth);
97 
98  float tx = (truth.x*lw - i);
99  float ty = (truth.y*lh - j);
100  float tw = log(truth.w*w / biases[2*n]);
101  float th = log(truth.h*h / biases[2*n + 1]);
102 
103  delta[index + 0*stride] = scale * (tx - x[index + 0*stride]);
104  delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
105  delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
106  delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
107  return iou;
108 }
109 
110 
111 void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat)
112 {
113  int n;
114  if (delta[index]){
115  delta[index + stride*class] = 1 - output[index + stride*class];
116  if(avg_cat) *avg_cat += output[index + stride*class];
117  return;
118  }
119  for(n = 0; n < classes; ++n){
120  delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n];
121  if(n == class && avg_cat) *avg_cat += output[index + stride*n];
122  }
123 }
124 
125 static int entry_index(layer l, int batch, int location, int entry)
126 {
127  int n = location / (l.w*l.h);
128  int loc = location % (l.w*l.h);
129  return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc;
130 }
131 
133 {
134  int i,j,b,t,n;
135  memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
136 
137 #ifndef GPU
138  for (b = 0; b < l.batch; ++b){
139  for(n = 0; n < l.n; ++n){
140  int index = entry_index(l, b, n*l.w*l.h, 0);
141  activate_array(l.output + index, 2*l.w*l.h, LOGISTIC);
142  index = entry_index(l, b, n*l.w*l.h, 4);
143  activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC);
144  }
145  }
146 #endif
147 
148  memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
149  if(!net.train) return;
150  float avg_iou = 0;
151  float recall = 0;
152  float recall75 = 0;
153  float avg_cat = 0;
154  float avg_obj = 0;
155  float avg_anyobj = 0;
156  int count = 0;
157  int class_count = 0;
158  *(l.cost) = 0;
159  for (b = 0; b < l.batch; ++b) {
160  for (j = 0; j < l.h; ++j) {
161  for (i = 0; i < l.w; ++i) {
162  for (n = 0; n < l.n; ++n) {
163  int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
164  box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.w*l.h);
165  float best_iou = 0;
166  int best_t = 0;
167  for(t = 0; t < l.max_boxes; ++t){
168  box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1);
169  if(!truth.x) break;
170  float iou = box_iou(pred, truth);
171  if (iou > best_iou) {
172  best_iou = iou;
173  best_t = t;
174  }
175  }
176  int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4);
177  avg_anyobj += l.output[obj_index];
178  l.delta[obj_index] = 0 - l.output[obj_index];
179  if (best_iou > l.ignore_thresh) {
180  l.delta[obj_index] = 0;
181  }
182  if (best_iou > l.truth_thresh) {
183  l.delta[obj_index] = 1 - l.output[obj_index];
184 
185  int class = net.truth[best_t*(4 + 1) + b*l.truths + 4];
186  if (l.map) class = l.map[class];
187  int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1);
188  delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0);
189  box truth = float_to_box(net.truth + best_t*(4 + 1) + b*l.truths, 1);
190  delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
191  }
192  }
193  }
194  }
195  for(t = 0; t < l.max_boxes; ++t){
196  box truth = float_to_box(net.truth + t*(4 + 1) + b*l.truths, 1);
197 
198  if(!truth.x) break;
199  float best_iou = 0;
200  int best_n = 0;
201  i = (truth.x * l.w);
202  j = (truth.y * l.h);
203  box truth_shift = truth;
204  truth_shift.x = truth_shift.y = 0;
205  for(n = 0; n < l.total; ++n){
206  box pred = {0};
207  pred.w = l.biases[2*n]/net.w;
208  pred.h = l.biases[2*n+1]/net.h;
209  float iou = box_iou(pred, truth_shift);
210  if (iou > best_iou){
211  best_iou = iou;
212  best_n = n;
213  }
214  }
215 
216  int mask_n = int_index(l.mask, best_n, l.n);
217  if(mask_n >= 0){
218  int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
219  float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, net.w, net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
220 
221  int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4);
222  avg_obj += l.output[obj_index];
223  l.delta[obj_index] = 1 - l.output[obj_index];
224 
225  int class = net.truth[t*(4 + 1) + b*l.truths + 4];
226  if (l.map) class = l.map[class];
227  int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1);
228  delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat);
229 
230  ++count;
231  ++class_count;
232  if(iou > .5) recall += 1;
233  if(iou > .75) recall75 += 1;
234  avg_iou += iou;
235  }
236  }
237  }
238  *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
239  printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", net.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count);
240 }
241 
243 {
244  axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, net.delta, 1);
245 }
246 
247 void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative)
248 {
249  int i;
250  int new_w=0;
251  int new_h=0;
252  if (((float)netw/w) < ((float)neth/h)) {
253  new_w = netw;
254  new_h = (h * netw)/w;
255  } else {
256  new_h = neth;
257  new_w = (w * neth)/h;
258  }
259  for (i = 0; i < n; ++i){
260  box b = dets[i].bbox;
261  b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw);
262  b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth);
263  b.w *= (float)netw/new_w;
264  b.h *= (float)neth/new_h;
265  if(!relative){
266  b.x *= w;
267  b.w *= w;
268  b.y *= h;
269  b.h *= h;
270  }
271  dets[i].bbox = b;
272  }
273 }
274 
275 int yolo_num_detections(layer l, float thresh)
276 {
277  int i, n;
278  int count = 0;
279  for (i = 0; i < l.w*l.h; ++i){
280  for(n = 0; n < l.n; ++n){
281  int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
282  if(l.output[obj_index] > thresh){
283  ++count;
284  }
285  }
286  }
287  return count;
288 }
289 
291 {
292  int i,j,n,z;
293  float *flip = l.output + l.outputs;
294  for (j = 0; j < l.h; ++j) {
295  for (i = 0; i < l.w/2; ++i) {
296  for (n = 0; n < l.n; ++n) {
297  for(z = 0; z < l.classes + 4 + 1; ++z){
298  int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i;
299  int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1);
300  float swap = flip[i1];
301  flip[i1] = flip[i2];
302  flip[i2] = swap;
303  if(z == 0){
304  flip[i1] = -flip[i1];
305  flip[i2] = -flip[i2];
306  }
307  }
308  }
309  }
310  }
311  for(i = 0; i < l.outputs; ++i){
312  l.output[i] = (l.output[i] + flip[i])/2.;
313  }
314 }
315 
316 int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets)
317 {
318  int i,j,n;
319  float *predictions = l.output;
320  if (l.batch == 2) avg_flipped_yolo(l);
321  int count = 0;
322  for (i = 0; i < l.w*l.h; ++i){
323  int row = i / l.w;
324  int col = i % l.w;
325  for(n = 0; n < l.n; ++n){
326  int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4);
327  float objectness = predictions[obj_index];
328  if(objectness <= thresh) continue;
329  int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
330  dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
331  dets[count].objectness = objectness;
332  dets[count].classes = l.classes;
333  for(j = 0; j < l.classes; ++j){
334  int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
335  float prob = objectness*predictions[class_index];
336  dets[count].prob[j] = (prob > thresh) ? prob : 0;
337  }
338  ++count;
339  }
340  }
341  correct_yolo_boxes(dets, count, w, h, netw, neth, relative);
342  return count;
343 }
344 
345 #ifdef GPU
346 
347 void forward_yolo_layer_gpu(const layer l, network net)
348 {
349  copy_gpu(l.batch*l.inputs, net.input_gpu, 1, l.output_gpu, 1);
350  int b, n;
351  for (b = 0; b < l.batch; ++b){
352  for(n = 0; n < l.n; ++n){
353  int index = entry_index(l, b, n*l.w*l.h, 0);
354  activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
355  index = entry_index(l, b, n*l.w*l.h, 4);
356  activate_array_gpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC);
357  }
358  }
359  if(!net.train || l.onlyforward){
360  cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
361  return;
362  }
363 
364  cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs);
365  forward_yolo_layer(l, net);
366  cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
367 }
368 
369 void backward_yolo_layer_gpu(const layer l, network net)
370 {
371  axpy_gpu(l.batch*l.inputs, 1, l.delta_gpu, 1, net.delta_gpu, 1);
372 }
373 #endif
374 
float ignore_thresh
Definition: darknet.h:196
int max_boxes
Definition: darknet.h:143
void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative)
Definition: yolo_layer.c:247
float * biases
Definition: darknet.h:236
int * map
Definition: darknet.h:218
int w
Definition: darknet.h:140
int n
Definition: darknet.h:142
int truths
Definition: darknet.h:139
float h
Definition: darknet.h:520
void(* forward_gpu)(struct layer, struct network)
Definition: darknet.h:126
int int_index(int *a, int val, int n)
Definition: utils.c:633
float * truth
Definition: darknet.h:485
void(* backward_gpu)(struct layer, struct network)
Definition: darknet.h:127
int yolo_num_detections(layer l, float thresh)
Definition: yolo_layer.c:275
layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes)
Definition: yolo_layer.c:13
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void(* forward)(struct layer, struct network)
Definition: darknet.h:123
int out_w
Definition: darknet.h:141
float * delta
Definition: darknet.h:486
int classes
Definition: darknet.h:525
int out_c
Definition: darknet.h:141
int entry_index(layer l, int batch, int location, int entry)
Definition: region_layer.c:151
float w
Definition: darknet.h:520
float x
Definition: darknet.h:520
float truth_thresh
Definition: darknet.h:197
int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets)
Definition: yolo_layer.c:316
void resize_yolo_layer(layer *l, int w, int h)
Definition: yolo_layer.c:63
int train
Definition: darknet.h:488
int h
Definition: darknet.h:140
int * mask
Definition: darknet.h:182
float * delta
Definition: darknet.h:245
int out_h
Definition: darknet.h:141
int inputs
Definition: darknet.h:134
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
void(* backward)(struct layer, struct network)
Definition: darknet.h:124
void avg_flipped_yolo(layer l)
Definition: yolo_layer.c:290
float * prob
Definition: darknet.h:526
float objectness
Definition: darknet.h:528
int batch
Definition: darknet.h:131
float * output
Definition: darknet.h:246
int onlyforward
Definition: darknet.h:203
void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat)
Definition: yolo_layer.c:111
void forward_yolo_layer(const layer l, network net)
Definition: yolo_layer.c:132
int index
Definition: darknet.h:489
Definition: darknet.h:88
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
box bbox
Definition: darknet.h:524
float * bias_updates
Definition: darknet.h:237
float box_iou(box a, box b)
Definition: box.c:179
int c
Definition: darknet.h:140
void activate_array(float *x, const int n, const ACTIVATION a)
Definition: activations.c:100
int classes
Definition: darknet.h:172
LAYER_TYPE type
Definition: darknet.h:120
float * input
Definition: darknet.h:484
void activate_array_gpu(float *x, int n, ACTIVATION a)
int h
Definition: darknet.h:468
float y
Definition: darknet.h:520
int total
Definition: darknet.h:183
box float_to_box(float *f, int stride)
Definition: box.c:91
int outputs
Definition: darknet.h:135
float mag_array(float *a, int n)
Definition: utils.c:574
void backward_yolo_layer(const layer l, network net)
Definition: yolo_layer.c:242
int w
Definition: darknet.h:468
list classes
Definition: voc_label.py:9
float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride)
Definition: yolo_layer.c:93
box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
Definition: yolo_layer.c:83
float * cost
Definition: darknet.h:222
Definition: darknet.h:519
Definition: darknet.h:119