darknet  v3
blas.h
Go to the documentation of this file.
1 #ifndef BLAS_H
2 #define BLAS_H
3 #include "darknet.h"
4 
5 void flatten(float *x, int size, int layers, int batch, int forward);
6 void pm(int M, int N, float *A);
7 float *random_matrix(int rows, int cols);
8 void time_random_matrix(int TA, int TB, int m, int k, int n);
9 void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
10 
11 void test_blas();
12 
13 void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
14 void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
15 void mult_add_into_cpu(int N, float *X, float *Y, float *Z);
16 
17 void const_cpu(int N, float ALPHA, float *X, int INCX);
18 void constrain_gpu(int N, float ALPHA, float * X, int INCX);
19 void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
20 void mul_cpu(int N, float *X, int INCX, float *Y, int INCY);
21 
22 int test_gpu_blas();
23 void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out);
24 
25 void mean_cpu(float *x, int batch, int filters, int spatial, float *mean);
26 void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
27 
28 void scale_bias(float *output, float *scales, int batch, int n, int size);
29 void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
30 void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
31 void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
32 void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
33 void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial);
34 
35 void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
36 void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
37 void l1_cpu(int n, float *pred, float *truth, float *delta, float *error);
38 void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
39 void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error);
40 void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
41 void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc);
42 
43 void softmax(float *input, int n, float temp, int stride, float *output);
44 void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
45 void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out);
46 
47 #ifdef GPU
48 #include "cuda.h"
49 #include "tree.h"
50 
51 void axpy_gpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY);
52 void axpy_gpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
53 void copy_gpu(int N, float * X, int INCX, float * Y, int INCY);
54 void copy_gpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
55 void add_gpu(int N, float ALPHA, float * X, int INCX);
56 void supp_gpu(int N, float ALPHA, float * X, int INCX);
57 void mask_gpu(int N, float * X, float mask_num, float * mask, float val);
58 void scale_mask_gpu(int N, float * X, float mask_num, float * mask, float scale);
59 void const_gpu(int N, float ALPHA, float *X, int INCX);
60 void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);
61 void mul_gpu(int N, float *X, int INCX, float *Y, int INCY);
62 
63 void mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
64 void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
65 void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial);
66 void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial);
67 
68 void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta);
69 
70 void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta);
71 void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta);
72 
73 void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
74 void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
75 void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out);
76 void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
77 void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates);
78 void scale_bias_gpu(float *output, float *biases, int batch, int n, int size);
79 void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
80 void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
81 
82 void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error);
83 void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error);
84 void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
85 void l2_gpu(int n, float *pred, float *truth, float *delta, float *error);
86 void l1_gpu(int n, float *pred, float *truth, float *delta, float *error);
87 void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error);
88 void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc);
89 void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c);
90 void mult_add_into_gpu(int num, float *a, float *b, float *c);
91 void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
92 void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT);
93 
94 void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out);
95 
96 void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output);
97 void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t);
98 void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t);
99 
100 void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out);
101 void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier);
102 void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out);
103 
104 #endif
105 #endif
void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
Definition: blas.c:324
void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:166
void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out)
Definition: blas.c:68
void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
void add_bias_gpu(float *output, float *biases, int batch, int n, int size)
Definition: blas_kernels.cu:69
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c)
Definition: blas.c:50
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
Definition: blas.c:238
void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size)
void l2normalize_gpu(float *x, float *dx, int batch, int filters, int spatial)
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
Definition: blas.c:287
void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
Definition: blas.c:196
void axpy_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
Definition: blas_kernels.cu:50
void pm(int M, int N, float *A)
Definition: utils.c:203
void l2normalize_cpu(float *x, float *dx, int batch, int filters, int spatial)
Definition: blas.c:126
void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
void supp_gpu(int N, float ALPHA, float *X, int INCX)
void flatten_gpu(float *x, int spatial, int layers, int batch, int forward, float *out)
void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
void mul_gpu(int N, float *X, int INCX, float *Y, int INCY)
void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error)
Definition: blas.c:265
void flatten(float *x, int size, int layers, int batch, int forward)
Definition: blas.c:32
void copy_gpu_offset(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error)
Definition: blas.c:276
void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error)
void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
Definition: blas.c:172
Definition: darknet.h:42
void constrain_gpu(int N, float ALPHA, float *X, int INCX)
void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
void axpy_gpu_offset(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
void deinter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
Definition: blas.c:232
void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
void scale_mask_gpu(int N, float *X, float mask_num, float *mask, float scale)
void time_random_matrix(int TA, int TB, int m, int k, int n)
Definition: gemm.c:40
int test_gpu_blas()
void mult_add_into_gpu(int num, float *a, float *b, float *c)
void scale_bias_gpu(float *output, float *biases, int batch, int n, int size)
Definition: blas_kernels.cu:21
void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
Definition: blas.c:334
void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc)
Definition: blas.c:58
void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
Definition: blas.c:94
void test_blas()
void add_gpu(int N, float ALPHA, float *X, int INCX)
void l1_gpu(int n, float *pred, float *truth, float *delta, float *error)
void softmax(float *input, int n, float temp, int stride, float *output)
Definition: blas.c:305
void logistic_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error)
void copy_gpu(int N, float *X, int INCX, float *Y, int INCY)
void const_cpu(int N, float ALPHA, float *X, int INCX)
Definition: blas.c:160
void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
Definition: blas.c:9
void wgan_gpu(int n, float *pred, float *truth, float *delta, float *error)
void mask_gpu(int N, float *X, float mask_num, float *mask, float val)
void scale_bias(float *output, float *scales, int batch, int n, int size)
void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out)
void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
void softmax_gpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
Definition: blas.c:212
void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier)
void inter_gpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
Definition: blas.c:110
void mean_gpu(float *x, int batch, int filters, int spatial, float *mean)
void l2_gpu(int n, float *pred, float *truth, float *delta, float *error)
void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
float * random_matrix(int rows, int cols)
Definition: gemm.c:30
void const_gpu(int N, float ALPHA, float *X, int INCX)
void pow_gpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
void error(const char *s)
Definition: utils.c:253
void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
void reorg_gpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
Definition: blas.c:255
void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c)
void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc)