darknet  v3
rnn_vid.c
Go to the documentation of this file.
1 #include "darknet.h"
2 
3 #ifdef OPENCV
4 image get_image_from_stream(CvCapture *cap);
5 image ipl_to_image(IplImage* src);
6 
7 void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters);
8 
9 
10 typedef struct {
11  float *x;
12  float *y;
13 } float_pair;
14 
15 float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps)
16 {
17  int b;
18  assert(net.batch == steps + 1);
19  image out_im = get_network_image(net);
20  int output_size = out_im.w*out_im.h*out_im.c;
21  printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
22  float *feats = calloc(net.batch*batch*output_size, sizeof(float));
23  for(b = 0; b < batch; ++b){
24  int input_size = net.w*net.h*net.c;
25  float *input = calloc(input_size*net.batch, sizeof(float));
26  char *filename = files[rand()%n];
27  CvCapture *cap = cvCaptureFromFile(filename);
28  int frames = cvGetCaptureProperty(cap, CV_CAP_PROP_FRAME_COUNT);
29  int index = rand() % (frames - steps - 2);
30  if (frames < (steps + 4)){
31  --b;
32  free(input);
33  continue;
34  }
35 
36  printf("frames: %d, index: %d\n", frames, index);
37  cvSetCaptureProperty(cap, CV_CAP_PROP_POS_FRAMES, index);
38 
39  int i;
40  for(i = 0; i < net.batch; ++i){
41  IplImage* src = cvQueryFrame(cap);
42  image im = ipl_to_image(src);
43  rgbgr_image(im);
44  image re = resize_image(im, net.w, net.h);
45  //show_image(re, "loaded");
46  //cvWaitKey(10);
47  memcpy(input + i*input_size, re.data, input_size*sizeof(float));
48  free_image(im);
49  free_image(re);
50  }
51  float *output = network_predict(net, input);
52 
53  free(input);
54 
55  for(i = 0; i < net.batch; ++i){
56  memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float));
57  }
58 
59  cvReleaseCapture(&cap);
60  }
61 
62  //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c);
63  float_pair p = {0};
64  p.x = feats;
65  p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c;
66 
67  return p;
68 }
69 
70 
71 void train_vid_rnn(char *cfgfile, char *weightfile)
72 {
73  char *train_videos = "data/vid/train.txt";
74  char *backup_directory = "/home/pjreddie/backup/";
75  srand(time(0));
76  char *base = basecfg(cfgfile);
77  printf("%s\n", base);
78  float avg_loss = -1;
79  network net = parse_network_cfg(cfgfile);
80  if(weightfile){
81  load_weights(&net, weightfile);
82  }
83  printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
84  int imgs = net.batch*net.subdivisions;
85  int i = *net.seen/imgs;
86 
87  list *plist = get_paths(train_videos);
88  int N = plist->size;
89  char **paths = (char **)list_to_array(plist);
90  clock_t time;
91  int steps = net.time_steps;
92  int batch = net.batch / net.time_steps;
93 
94  network extractor = parse_network_cfg("cfg/extractor.cfg");
95  load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv");
96 
97  while(get_current_batch(net) < net.max_batches){
98  i += 1;
99  time=clock();
100  float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps);
101 
102  copy_cpu(net.inputs*net.batch, p.x, 1, net.input, 1);
103  copy_cpu(net.truths*net.batch, p.y, 1, net.truth, 1);
104  float loss = train_network_datum(net) / (net.batch);
105 
106 
107  free(p.x);
108  if (avg_loss < 0) avg_loss = loss;
109  avg_loss = avg_loss*.9 + loss*.1;
110 
111  fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time));
112  if(i%100==0){
113  char buff[256];
114  sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
115  save_weights(net, buff);
116  }
117  if(i%10==0){
118  char buff[256];
119  sprintf(buff, "%s/%s.backup", backup_directory, base);
120  save_weights(net, buff);
121  }
122  }
123  char buff[256];
124  sprintf(buff, "%s/%s_final.weights", backup_directory, base);
125  save_weights(net, buff);
126 }
127 
128 
129 image save_reconstruction(network net, image *init, float *feat, char *name, int i)
130 {
131  image recon;
132  if (init) {
133  recon = copy_image(*init);
134  } else {
135  recon = make_random_image(net.w, net.h, 3);
136  }
137 
138  image update = make_image(net.w, net.h, 3);
139  reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50);
140  char buff[256];
141  sprintf(buff, "%s%d", name, i);
142  save_image(recon, buff);
143  free_image(update);
144  return recon;
145 }
146 
147 void generate_vid_rnn(char *cfgfile, char *weightfile)
148 {
149  network extractor = parse_network_cfg("cfg/extractor.recon.cfg");
150  load_weights(&extractor, "/home/pjreddie/trained/yolo-coco.conv");
151 
152  network net = parse_network_cfg(cfgfile);
153  if(weightfile){
154  load_weights(&net, weightfile);
155  }
156  set_batch_network(&extractor, 1);
157  set_batch_network(&net, 1);
158 
159  int i;
160  CvCapture *cap = cvCaptureFromFile("/extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4");
161  float *feat;
162  float *next;
163  image last;
164  for(i = 0; i < 25; ++i){
165  image im = get_image_from_stream(cap);
166  image re = resize_image(im, extractor.w, extractor.h);
167  feat = network_predict(extractor, re.data);
168  if(i > 0){
169  printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512));
170  printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512));
171  printf("%f\n", mse_array(feat, 14*14*512));
172  axpy_cpu(14*14*512, -1, feat, 1, next, 1);
173  printf("%f\n", mse_array(next, 14*14*512));
174  }
175  next = network_predict(net, feat);
176 
177  free_image(im);
178 
179  free_image(save_reconstruction(extractor, 0, feat, "feat", i));
180  free_image(save_reconstruction(extractor, 0, next, "next", i));
181  if (i==24) last = copy_image(re);
182  free_image(re);
183  }
184  for(i = 0; i < 30; ++i){
185  next = network_predict(net, next);
186  image new = save_reconstruction(extractor, &last, next, "new", i);
187  free_image(last);
188  last = new;
189  }
190 }
191 
192 void run_vid_rnn(int argc, char **argv)
193 {
194  if(argc < 4){
195  fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
196  return;
197  }
198 
199  char *cfg = argv[3];
200  char *weights = (argc > 4) ? argv[4] : 0;
201  //char *filename = (argc > 5) ? argv[5]: 0;
202  if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights);
203  else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights);
204 }
205 #else
206 void run_vid_rnn(int argc, char **argv){}
207 #endif
208 
float decay
Definition: darknet.h:447
image resize_image(image im, int w, int h)
Definition: image.c:1351
image copy_image(image p)
Definition: image.c:519
int batch
Definition: darknet.h:436
make_image
Definition: darknet.py:61
float variance_array(float *a, int n)
Definition: utils.c:514
void set_batch_network(network *net, int b)
Definition: network.c:339
float * y
Definition: rnn.c:7
float learning_rate
Definition: darknet.h:445
float momentum
Definition: darknet.h:446
float * truth
Definition: darknet.h:485
char * basecfg(char *cfgfile)
Definition: utils.c:179
float mse_array(float *a, int n)
Definition: utils.c:546
void ** list_to_array(list *l)
Definition: list.c:82
size_t * seen
Definition: darknet.h:437
void reconstruct_picture(network *net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters)
Definition: nightmare.c:135
int size
Definition: darknet.h:603
Definition: darknet.h:512
void save_image(image p, const char *name)
Definition: image.c:717
void save_weights(network *net, char *filename)
Definition: parser.c:1080
network * parse_network_cfg(char *filename)
Definition: parser.c:742
rgbgr_image
Definition: darknet.py:110
network_predict
Definition: darknet.py:79
image make_random_image(int w, int h, int c)
Definition: image.c:763
image get_network_image(network *net)
Definition: network.c:466
int h
Definition: darknet.h:514
int max_batches
Definition: darknet.h:453
free_image
Definition: darknet.py:95
float mean_array(float *a, int n)
Definition: utils.c:487
int subdivisions
Definition: darknet.h:440
void run_vid_rnn(int argc, char **argv)
Definition: rnn_vid.c:206
void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:178
float get_current_rate(network *net)
Definition: network.c:90
float sec(clock_t clocks)
Definition: utils.c:232
int truths
Definition: darknet.h:466
int c
Definition: darknet.h:515
int w
Definition: darknet.h:513
Definition: darknet.h:602
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:226
int c
Definition: darknet.h:468
float * input
Definition: darknet.h:484
size_t get_current_batch(network *net)
Definition: network.c:63
int h
Definition: darknet.h:468
int time_steps
Definition: darknet.h:451
float * x
Definition: rnn.c:6
list * get_paths(char *filename)
Definition: data.c:12
float train_network_datum(network *net)
Definition: network.c:289
void load_weights(network *net, char *filename)
Definition: parser.c:1308
int w
Definition: darknet.h:468
Definition: rnn.c:5
int inputs
Definition: darknet.h:464
float * data
Definition: darknet.h:516