![]() |
darknet
v3
|
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"
#include <assert.h>
#include "blas.h"
#include "cuda.h"
#include "utils.h"
Go to the source code of this file.
Functions | |
__global__ void | scale_bias_kernel (float *output, float *biases, int n, int size) |
void | scale_bias_gpu (float *output, float *biases, int batch, int n, int size) |
__global__ void | backward_scale_kernel (float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) |
void | backward_scale_gpu (float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) |
__global__ void | add_bias_kernel (float *output, float *biases, int batch, int n, int size) |
void | add_bias_gpu (float *output, float *biases, int batch, int n, int size) |
__global__ void | backward_bias_conn_kernel (float *bias_updates, float *delta, int batch, int n) |
__global__ void | backward_bias_kernel (float *bias_updates, float *delta, int batch, int n, int size) |
void | backward_bias_gpu (float *bias_updates, float *delta, int batch, int n, int size) |
__global__ void | adam_kernel (int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) |
void | adam_gpu (int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) |
void | adam_update_gpu (float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) |
__global__ void | normalize_kernel (int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) |
__global__ void | normalize_delta_kernel (int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) |
void | normalize_delta_gpu (float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) |
__global__ void | variance_delta_kernel (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) |
__global__ void | accumulate_kernel (float *x, int n, int groups, float *sum) |
__global__ void | fast_mean_delta_kernel (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) |
__global__ void | fast_variance_delta_kernel (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) |
__global__ void | mean_delta_kernel (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) |
void | mean_delta_gpu (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) |
void | fast_mean_delta_gpu (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) |
void | fast_variance_delta_gpu (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) |
__global__ void | mean_kernel (float *x, int batch, int filters, int spatial, float *mean) |
__global__ void | variance_kernel (float *x, float *mean, int batch, int filters, int spatial, float *variance) |
__global__ void | reorg_kernel (int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) |
__global__ void | axpy_kernel (int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) |
__global__ void | pow_kernel (int N, float ALPHA, float *X, int INCX, float *Y, int INCY) |
__global__ void | const_kernel (int N, float ALPHA, float *X, int INCX) |
__global__ void | constrain_kernel (int N, float ALPHA, float *X, int INCX) |
__global__ void | supp_kernel (int N, float ALPHA, float *X, int INCX) |
__global__ void | add_kernel (int N, float ALPHA, float *X, int INCX) |
__global__ void | scal_kernel (int N, float ALPHA, float *X, int INCX) |
__global__ void | fill_kernel (int N, float ALPHA, float *X, int INCX) |
__global__ void | copy_kernel (int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) |
__global__ void | mul_kernel (int N, float *X, int INCX, float *Y, int INCY) |
void | normalize_gpu (float *x, float *mean, float *variance, int batch, int filters, int spatial) |
__global__ void | l2norm_kernel (int N, float *x, float *dx, int batch, int filters, int spatial) |
void | l2normalize_gpu (float *x, float *dx, int batch, int filters, int spatial) |
__global__ void | fast_mean_kernel (float *x, int batch, int filters, int spatial, float *mean) |
__global__ void | fast_variance_kernel (float *x, float *mean, int batch, int filters, int spatial, float *variance) |
void | fast_mean_gpu (float *x, int batch, int filters, int spatial, float *mean) |
void | fast_variance_gpu (float *x, float *mean, int batch, int filters, int spatial, float *variance) |
void | mean_gpu (float *x, int batch, int filters, int spatial, float *mean) |
void | variance_gpu (float *x, float *mean, int batch, int filters, int spatial, float *variance) |
void | axpy_gpu (int N, float ALPHA, float *X, int INCX, float *Y, int INCY) |
void | pow_gpu (int N, float ALPHA, float *X, int INCX, float *Y, int INCY) |
void | axpy_gpu_offset (int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) |
void | copy_gpu (int N, float *X, int INCX, float *Y, int INCY) |
void | mul_gpu (int N, float *X, int INCX, float *Y, int INCY) |
void | copy_gpu_offset (int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) |
__global__ void | flatten_kernel (int N, float *x, int spatial, int layers, int batch, int forward, float *out) |
void | flatten_gpu (float *x, int spatial, int layers, int batch, int forward, float *out) |
void | reorg_gpu (float *x, int w, int h, int c, int batch, int stride, int forward, float *out) |
__global__ void | mask_kernel (int n, float *x, float mask_num, float *mask, float val) |
void | mask_gpu (int N, float *X, float mask_num, float *mask, float val) |
__global__ void | scale_mask_kernel (int n, float *x, float mask_num, float *mask, float scale) |
void | scale_mask_gpu (int N, float *X, float mask_num, float *mask, float scale) |
void | const_gpu (int N, float ALPHA, float *X, int INCX) |
void | constrain_gpu (int N, float ALPHA, float *X, int INCX) |
void | add_gpu (int N, float ALPHA, float *X, int INCX) |
void | scal_gpu (int N, float ALPHA, float *X, int INCX) |
void | supp_gpu (int N, float ALPHA, float *X, int INCX) |
void | fill_gpu (int N, float ALPHA, float *X, int INCX) |
__global__ void | shortcut_kernel (int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) |
void | shortcut_gpu (int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out) |
__global__ void | smooth_l1_kernel (int n, float *pred, float *truth, float *delta, float *error) |
void | smooth_l1_gpu (int n, float *pred, float *truth, float *delta, float *error) |
__global__ void | softmax_x_ent_kernel (int n, float *pred, float *truth, float *delta, float *error) |
void | softmax_x_ent_gpu (int n, float *pred, float *truth, float *delta, float *error) |
__global__ void | logistic_x_ent_kernel (int n, float *pred, float *truth, float *delta, float *error) |
void | logistic_x_ent_gpu (int n, float *pred, float *truth, float *delta, float *error) |
__global__ void | l2_kernel (int n, float *pred, float *truth, float *delta, float *error) |
void | l2_gpu (int n, float *pred, float *truth, float *delta, float *error) |
__global__ void | l1_kernel (int n, float *pred, float *truth, float *delta, float *error) |
void | l1_gpu (int n, float *pred, float *truth, float *delta, float *error) |
__global__ void | wgan_kernel (int n, float *pred, float *truth, float *delta, float *error) |
void | wgan_gpu (int n, float *pred, float *truth, float *delta, float *error) |
__global__ void | weighted_sum_kernel (int n, float *a, float *b, float *s, float *c) |
__global__ void | deinter_kernel (int NX, float *X, int NY, float *Y, int B, float *OUT) |
void | deinter_gpu (int NX, float *X, int NY, float *Y, int B, float *OUT) |
__global__ void | inter_kernel (int NX, float *X, int NY, float *Y, int B, float *OUT) |
void | inter_gpu (int NX, float *X, int NY, float *Y, int B, float *OUT) |
void | weighted_sum_gpu (float *a, float *b, float *s, int num, float *c) |
__global__ void | weighted_delta_kernel (int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) |
void | weighted_delta_gpu (float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) |
__global__ void | mult_add_into_kernel (int n, float *a, float *b, float *c) |
void | mult_add_into_gpu (int num, float *a, float *b, float *c) |
__device__ void | softmax_device (float *input, int n, float temp, int stride, float *output) |
__global__ void | softmax_tree_kernel (float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) |
void | softmax_tree (float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) |
__global__ void | softmax_kernel (float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) |
void | softmax_gpu (float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) |
__global__ void | upsample_kernel (size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) |
void | upsample_gpu (float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) |
__global__ void accumulate_kernel | ( | float * | x, |
int | n, | ||
int | groups, | ||
float * | sum | ||
) |
Definition at line 234 of file blas_kernels.cu.
void adam_gpu | ( | int | n, |
float * | x, | ||
float * | m, | ||
float * | v, | ||
float | B1, | ||
float | B2, | ||
float | rate, | ||
float | eps, | ||
int | t | ||
) |
Definition at line 174 of file blas_kernels.cu.
__global__ void adam_kernel | ( | int | N, |
float * | x, | ||
float * | m, | ||
float * | v, | ||
float | B1, | ||
float | B2, | ||
float | rate, | ||
float | eps, | ||
int | t | ||
) |
Definition at line 163 of file blas_kernels.cu.
void adam_update_gpu | ( | float * | w, |
float * | d, | ||
float * | m, | ||
float * | v, | ||
float | B1, | ||
float | B2, | ||
float | eps, | ||
float | decay, | ||
float | rate, | ||
int | n, | ||
int | batch, | ||
int | t | ||
) |
Definition at line 180 of file blas_kernels.cu.
void add_bias_gpu | ( | float * | output, |
float * | biases, | ||
int | batch, | ||
int | n, | ||
int | size | ||
) |
Definition at line 69 of file blas_kernels.cu.
__global__ void add_bias_kernel | ( | float * | output, |
float * | biases, | ||
int | batch, | ||
int | n, | ||
int | size | ||
) |
Definition at line 56 of file blas_kernels.cu.
void add_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 687 of file blas_kernels.cu.
__global__ void add_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 434 of file blas_kernels.cu.
void axpy_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX, | ||
float * | Y, | ||
int | INCY | ||
) |
Definition at line 585 of file blas_kernels.cu.
void axpy_gpu_offset | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | OFFX, | ||
int | INCX, | ||
float * | Y, | ||
int | OFFY, | ||
int | INCY | ||
) |
Definition at line 596 of file blas_kernels.cu.
__global__ void axpy_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | OFFX, | ||
int | INCX, | ||
float * | Y, | ||
int | OFFY, | ||
int | INCY | ||
) |
Definition at line 402 of file blas_kernels.cu.
__global__ void backward_bias_conn_kernel | ( | float * | bias_updates, |
float * | delta, | ||
int | batch, | ||
int | n | ||
) |
Definition at line 77 of file blas_kernels.cu.
void backward_bias_gpu | ( | float * | bias_updates, |
float * | delta, | ||
int | batch, | ||
int | n, | ||
int | size | ||
) |
Definition at line 110 of file blas_kernels.cu.
__global__ void backward_bias_kernel | ( | float * | bias_updates, |
float * | delta, | ||
int | batch, | ||
int | n, | ||
int | size | ||
) |
Definition at line 90 of file blas_kernels.cu.
void backward_scale_gpu | ( | float * | x_norm, |
float * | delta, | ||
int | batch, | ||
int | n, | ||
int | size, | ||
float * | scale_updates | ||
) |
Definition at line 50 of file blas_kernels.cu.
__global__ void backward_scale_kernel | ( | float * | x_norm, |
float * | delta, | ||
int | batch, | ||
int | n, | ||
int | size, | ||
float * | scale_updates | ||
) |
Definition at line 30 of file blas_kernels.cu.
void const_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 674 of file blas_kernels.cu.
__global__ void const_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 414 of file blas_kernels.cu.
void constrain_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 680 of file blas_kernels.cu.
__global__ void constrain_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 420 of file blas_kernels.cu.
void copy_gpu | ( | int | N, |
float * | X, | ||
int | INCX, | ||
float * | Y, | ||
int | INCY | ||
) |
Definition at line 602 of file blas_kernels.cu.
void copy_gpu_offset | ( | int | N, |
float * | X, | ||
int | OFFX, | ||
int | INCX, | ||
float * | Y, | ||
int | OFFY, | ||
int | INCY | ||
) |
Definition at line 613 of file blas_kernels.cu.
__global__ void copy_kernel | ( | int | N, |
float * | X, | ||
int | OFFX, | ||
int | INCX, | ||
float * | Y, | ||
int | OFFY, | ||
int | INCY | ||
) |
Definition at line 452 of file blas_kernels.cu.
void deinter_gpu | ( | int | NX, |
float * | X, | ||
int | NY, | ||
float * | Y, | ||
int | B, | ||
float * | OUT | ||
) |
Definition at line 876 of file blas_kernels.cu.
__global__ void deinter_kernel | ( | int | NX, |
float * | X, | ||
int | NY, | ||
float * | Y, | ||
int | B, | ||
float * | OUT | ||
) |
Definition at line 862 of file blas_kernels.cu.
void fast_mean_delta_gpu | ( | float * | delta, |
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean_delta | ||
) |
Definition at line 326 of file blas_kernels.cu.
__global__ void fast_mean_delta_kernel | ( | float * | delta, |
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean_delta | ||
) |
Definition at line 245 of file blas_kernels.cu.
void fast_mean_gpu | ( | float * | x, |
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean | ||
) |
Definition at line 560 of file blas_kernels.cu.
__global__ void fast_mean_kernel | ( | float * | x, |
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean | ||
) |
Definition at line 501 of file blas_kernels.cu.
void fast_variance_delta_gpu | ( | float * | x, |
float * | delta, | ||
float * | mean, | ||
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | variance_delta | ||
) |
Definition at line 332 of file blas_kernels.cu.
__global__ void fast_variance_delta_kernel | ( | float * | x, |
float * | delta, | ||
float * | mean, | ||
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | variance_delta | ||
) |
Definition at line 274 of file blas_kernels.cu.
void fast_variance_gpu | ( | float * | x, |
float * | mean, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | variance | ||
) |
Definition at line 566 of file blas_kernels.cu.
__global__ void fast_variance_kernel | ( | float * | x, |
float * | mean, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | variance | ||
) |
Definition at line 530 of file blas_kernels.cu.
void fill_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 705 of file blas_kernels.cu.
__global__ void fill_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 446 of file blas_kernels.cu.
void flatten_gpu | ( | float * | x, |
int | spatial, | ||
int | layers, | ||
int | batch, | ||
int | forward, | ||
float * | out | ||
) |
Definition at line 636 of file blas_kernels.cu.
__global__ void flatten_kernel | ( | int | N, |
float * | x, | ||
int | spatial, | ||
int | layers, | ||
int | batch, | ||
int | forward, | ||
float * | out | ||
) |
Definition at line 619 of file blas_kernels.cu.
void inter_gpu | ( | int | NX, |
float * | X, | ||
int | NY, | ||
float * | Y, | ||
int | B, | ||
float * | OUT | ||
) |
Definition at line 896 of file blas_kernels.cu.
__global__ void inter_kernel | ( | int | NX, |
float * | X, | ||
int | NY, | ||
float * | Y, | ||
int | B, | ||
float * | OUT | ||
) |
Definition at line 882 of file blas_kernels.cu.
void l1_gpu | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 830 of file blas_kernels.cu.
__global__ void l1_kernel | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 820 of file blas_kernels.cu.
void l2_gpu | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 814 of file blas_kernels.cu.
__global__ void l2_kernel | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 804 of file blas_kernels.cu.
__global__ void l2norm_kernel | ( | int | N, |
float * | x, | ||
float * | dx, | ||
int | batch, | ||
int | filters, | ||
int | spatial | ||
) |
Definition at line 472 of file blas_kernels.cu.
void l2normalize_gpu | ( | float * | x, |
float * | dx, | ||
int | batch, | ||
int | filters, | ||
int | spatial | ||
) |
Definition at line 494 of file blas_kernels.cu.
void logistic_x_ent_gpu | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 798 of file blas_kernels.cu.
__global__ void logistic_x_ent_kernel | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 787 of file blas_kernels.cu.
void mask_gpu | ( | int | N, |
float * | X, | ||
float | mask_num, | ||
float * | mask, | ||
float | val | ||
) |
Definition at line 656 of file blas_kernels.cu.
__global__ void mask_kernel | ( | int | n, |
float * | x, | ||
float | mask_num, | ||
float * | mask, | ||
float | val | ||
) |
Definition at line 650 of file blas_kernels.cu.
void mean_delta_gpu | ( | float * | delta, |
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean_delta | ||
) |
Definition at line 320 of file blas_kernels.cu.
__global__ void mean_delta_kernel | ( | float * | delta, |
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean_delta | ||
) |
Definition at line 305 of file blas_kernels.cu.
void mean_gpu | ( | float * | x, |
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean | ||
) |
Definition at line 573 of file blas_kernels.cu.
__global__ void mean_kernel | ( | float * | x, |
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | mean | ||
) |
Definition at line 338 of file blas_kernels.cu.
void mul_gpu | ( | int | N, |
float * | X, | ||
int | INCX, | ||
float * | Y, | ||
int | INCY | ||
) |
Definition at line 607 of file blas_kernels.cu.
__global__ void mul_kernel | ( | int | N, |
float * | X, | ||
int | INCX, | ||
float * | Y, | ||
int | INCY | ||
) |
Definition at line 458 of file blas_kernels.cu.
void mult_add_into_gpu | ( | int | num, |
float * | a, | ||
float * | b, | ||
float * | c | ||
) |
Definition at line 932 of file blas_kernels.cu.
__global__ void mult_add_into_kernel | ( | int | n, |
float * | a, | ||
float * | b, | ||
float * | c | ||
) |
Definition at line 924 of file blas_kernels.cu.
void normalize_delta_gpu | ( | float * | x, |
float * | mean, | ||
float * | variance, | ||
float * | mean_delta, | ||
float * | variance_delta, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | delta | ||
) |
Definition at line 212 of file blas_kernels.cu.
__global__ void normalize_delta_kernel | ( | int | N, |
float * | x, | ||
float * | mean, | ||
float * | variance, | ||
float * | mean_delta, | ||
float * | variance_delta, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | delta | ||
) |
Definition at line 203 of file blas_kernels.cu.
void normalize_gpu | ( | float * | x, |
float * | mean, | ||
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial | ||
) |
Definition at line 465 of file blas_kernels.cu.
__global__ void normalize_kernel | ( | int | N, |
float * | x, | ||
float * | mean, | ||
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial | ||
) |
Definition at line 194 of file blas_kernels.cu.
void pow_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX, | ||
float * | Y, | ||
int | INCY | ||
) |
Definition at line 590 of file blas_kernels.cu.
__global__ void pow_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX, | ||
float * | Y, | ||
int | INCY | ||
) |
Definition at line 408 of file blas_kernels.cu.
void reorg_gpu | ( | float * | x, |
int | w, | ||
int | h, | ||
int | c, | ||
int | batch, | ||
int | stride, | ||
int | forward, | ||
float * | out | ||
) |
Definition at line 643 of file blas_kernels.cu.
__global__ void reorg_kernel | ( | int | N, |
float * | x, | ||
int | w, | ||
int | h, | ||
int | c, | ||
int | batch, | ||
int | stride, | ||
int | forward, | ||
float * | out | ||
) |
Definition at line 370 of file blas_kernels.cu.
void scal_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 693 of file blas_kernels.cu.
__global__ void scal_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 440 of file blas_kernels.cu.
void scale_bias_gpu | ( | float * | output, |
float * | biases, | ||
int | batch, | ||
int | n, | ||
int | size | ||
) |
Definition at line 21 of file blas_kernels.cu.
__global__ void scale_bias_kernel | ( | float * | output, |
float * | biases, | ||
int | n, | ||
int | size | ||
) |
Definition at line 12 of file blas_kernels.cu.
void scale_mask_gpu | ( | int | N, |
float * | X, | ||
float | mask_num, | ||
float * | mask, | ||
float | scale | ||
) |
Definition at line 668 of file blas_kernels.cu.
__global__ void scale_mask_kernel | ( | int | n, |
float * | x, | ||
float | mask_num, | ||
float * | mask, | ||
float | scale | ||
) |
Definition at line 662 of file blas_kernels.cu.
void shortcut_gpu | ( | int | batch, |
int | w1, | ||
int | h1, | ||
int | c1, | ||
float * | add, | ||
int | w2, | ||
int | h2, | ||
int | c2, | ||
float | s1, | ||
float | s2, | ||
float * | out | ||
) |
Definition at line 729 of file blas_kernels.cu.
__global__ void shortcut_kernel | ( | int | size, |
int | minw, | ||
int | minh, | ||
int | minc, | ||
int | stride, | ||
int | sample, | ||
int | batch, | ||
int | w1, | ||
int | h1, | ||
int | c1, | ||
float * | add, | ||
int | w2, | ||
int | h2, | ||
int | c2, | ||
float | s1, | ||
float | s2, | ||
float * | out | ||
) |
Definition at line 711 of file blas_kernels.cu.
void smooth_l1_gpu | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 764 of file blas_kernels.cu.
__global__ void smooth_l1_kernel | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 747 of file blas_kernels.cu.
__device__ void softmax_device | ( | float * | input, |
int | n, | ||
float | temp, | ||
int | stride, | ||
float * | output | ||
) |
Definition at line 939 of file blas_kernels.cu.
void softmax_gpu | ( | float * | input, |
int | n, | ||
int | batch, | ||
int | batch_offset, | ||
int | groups, | ||
int | group_offset, | ||
int | stride, | ||
float | temp, | ||
float * | output | ||
) |
Definition at line 1000 of file blas_kernels.cu.
__global__ void softmax_kernel | ( | float * | input, |
int | n, | ||
int | batch, | ||
int | batch_offset, | ||
int | groups, | ||
int | group_offset, | ||
int | stride, | ||
float | temp, | ||
float * | output | ||
) |
Definition at line 991 of file blas_kernels.cu.
void softmax_tree | ( | float * | input, |
int | spatial, | ||
int | batch, | ||
int | stride, | ||
float | temp, | ||
float * | output, | ||
tree | hier | ||
) |
Definition at line 972 of file blas_kernels.cu.
__global__ void softmax_tree_kernel | ( | float * | input, |
int | spatial, | ||
int | batch, | ||
int | stride, | ||
float | temp, | ||
float * | output, | ||
int | groups, | ||
int * | group_size, | ||
int * | group_offset | ||
) |
Definition at line 959 of file blas_kernels.cu.
void softmax_x_ent_gpu | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 781 of file blas_kernels.cu.
__global__ void softmax_x_ent_kernel | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 770 of file blas_kernels.cu.
void supp_gpu | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 699 of file blas_kernels.cu.
__global__ void supp_kernel | ( | int | N, |
float | ALPHA, | ||
float * | X, | ||
int | INCX | ||
) |
Definition at line 426 of file blas_kernels.cu.
void upsample_gpu | ( | float * | in, |
int | w, | ||
int | h, | ||
int | c, | ||
int | batch, | ||
int | stride, | ||
int | forward, | ||
float | scale, | ||
float * | out | ||
) |
Definition at line 1030 of file blas_kernels.cu.
__global__ void upsample_kernel | ( | size_t | N, |
float * | x, | ||
int | w, | ||
int | h, | ||
int | c, | ||
int | batch, | ||
int | stride, | ||
int | forward, | ||
float | scale, | ||
float * | out | ||
) |
Definition at line 1007 of file blas_kernels.cu.
__global__ void variance_delta_kernel | ( | float * | x, |
float * | delta, | ||
float * | mean, | ||
float * | variance, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | variance_delta | ||
) |
Definition at line 219 of file blas_kernels.cu.
void variance_gpu | ( | float * | x, |
float * | mean, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | variance | ||
) |
Definition at line 579 of file blas_kernels.cu.
__global__ void variance_kernel | ( | float * | x, |
float * | mean, | ||
int | batch, | ||
int | filters, | ||
int | spatial, | ||
float * | variance | ||
) |
Definition at line 354 of file blas_kernels.cu.
void weighted_delta_gpu | ( | float * | a, |
float * | b, | ||
float * | s, | ||
float * | da, | ||
float * | db, | ||
float * | ds, | ||
int | num, | ||
float * | dc | ||
) |
Definition at line 918 of file blas_kernels.cu.
__global__ void weighted_delta_kernel | ( | int | n, |
float * | a, | ||
float * | b, | ||
float * | s, | ||
float * | da, | ||
float * | db, | ||
float * | ds, | ||
float * | dc | ||
) |
Definition at line 908 of file blas_kernels.cu.
void weighted_sum_gpu | ( | float * | a, |
float * | b, | ||
float * | s, | ||
int | num, | ||
float * | c | ||
) |
Definition at line 902 of file blas_kernels.cu.
__global__ void weighted_sum_kernel | ( | int | n, |
float * | a, | ||
float * | b, | ||
float * | s, | ||
float * | c | ||
) |
Definition at line 854 of file blas_kernels.cu.
void wgan_gpu | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 845 of file blas_kernels.cu.
__global__ void wgan_kernel | ( | int | n, |
float * | pred, | ||
float * | truth, | ||
float * | delta, | ||
float * | error | ||
) |
Definition at line 836 of file blas_kernels.cu.