darknet  v3
Functions
blas_kernels.cu File Reference
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"
#include <assert.h>
#include "blas.h"
#include "cuda.h"
#include "utils.h"
Include dependency graph for blas_kernels.cu:

Go to the source code of this file.

Functions

__global__ void scale_bias_kernel (float *output, float *biases, int n, int size)
 
void scale_bias_gpu (float *output, float *biases, int batch, int n, int size)
 
__global__ void backward_scale_kernel (float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
 
void backward_scale_gpu (float *x_norm, float *delta, int batch, int n, int size, float *scale_updates)
 
__global__ void add_bias_kernel (float *output, float *biases, int batch, int n, int size)
 
void add_bias_gpu (float *output, float *biases, int batch, int n, int size)
 
__global__ void backward_bias_conn_kernel (float *bias_updates, float *delta, int batch, int n)
 
__global__ void backward_bias_kernel (float *bias_updates, float *delta, int batch, int n, int size)
 
void backward_bias_gpu (float *bias_updates, float *delta, int batch, int n, int size)
 
__global__ void adam_kernel (int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
 
void adam_gpu (int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t)
 
void adam_update_gpu (float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
 
__global__ void normalize_kernel (int N, float *x, float *mean, float *variance, int batch, int filters, int spatial)
 
__global__ void normalize_delta_kernel (int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
 
void normalize_delta_gpu (float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta)
 
__global__ void variance_delta_kernel (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
 
__global__ void accumulate_kernel (float *x, int n, int groups, float *sum)
 
__global__ void fast_mean_delta_kernel (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
__global__ void fast_variance_delta_kernel (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
 
__global__ void mean_delta_kernel (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
void mean_delta_gpu (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
void fast_mean_delta_gpu (float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta)
 
void fast_variance_delta_gpu (float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta)
 
__global__ void mean_kernel (float *x, int batch, int filters, int spatial, float *mean)
 
__global__ void variance_kernel (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
__global__ void reorg_kernel (int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
 
__global__ void axpy_kernel (int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
__global__ void pow_kernel (int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
 
__global__ void const_kernel (int N, float ALPHA, float *X, int INCX)
 
__global__ void constrain_kernel (int N, float ALPHA, float *X, int INCX)
 
__global__ void supp_kernel (int N, float ALPHA, float *X, int INCX)
 
__global__ void add_kernel (int N, float ALPHA, float *X, int INCX)
 
__global__ void scal_kernel (int N, float ALPHA, float *X, int INCX)
 
__global__ void fill_kernel (int N, float ALPHA, float *X, int INCX)
 
__global__ void copy_kernel (int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
__global__ void mul_kernel (int N, float *X, int INCX, float *Y, int INCY)
 
void normalize_gpu (float *x, float *mean, float *variance, int batch, int filters, int spatial)
 
__global__ void l2norm_kernel (int N, float *x, float *dx, int batch, int filters, int spatial)
 
void l2normalize_gpu (float *x, float *dx, int batch, int filters, int spatial)
 
__global__ void fast_mean_kernel (float *x, int batch, int filters, int spatial, float *mean)
 
__global__ void fast_variance_kernel (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
void fast_mean_gpu (float *x, int batch, int filters, int spatial, float *mean)
 
void fast_variance_gpu (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
void mean_gpu (float *x, int batch, int filters, int spatial, float *mean)
 
void variance_gpu (float *x, float *mean, int batch, int filters, int spatial, float *variance)
 
void axpy_gpu (int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
 
void pow_gpu (int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
 
void axpy_gpu_offset (int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
void copy_gpu (int N, float *X, int INCX, float *Y, int INCY)
 
void mul_gpu (int N, float *X, int INCX, float *Y, int INCY)
 
void copy_gpu_offset (int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
 
__global__ void flatten_kernel (int N, float *x, int spatial, int layers, int batch, int forward, float *out)
 
void flatten_gpu (float *x, int spatial, int layers, int batch, int forward, float *out)
 
void reorg_gpu (float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
 
__global__ void mask_kernel (int n, float *x, float mask_num, float *mask, float val)
 
void mask_gpu (int N, float *X, float mask_num, float *mask, float val)
 
__global__ void scale_mask_kernel (int n, float *x, float mask_num, float *mask, float scale)
 
void scale_mask_gpu (int N, float *X, float mask_num, float *mask, float scale)
 
void const_gpu (int N, float ALPHA, float *X, int INCX)
 
void constrain_gpu (int N, float ALPHA, float *X, int INCX)
 
void add_gpu (int N, float ALPHA, float *X, int INCX)
 
void scal_gpu (int N, float ALPHA, float *X, int INCX)
 
void supp_gpu (int N, float ALPHA, float *X, int INCX)
 
void fill_gpu (int N, float ALPHA, float *X, int INCX)
 
__global__ void shortcut_kernel (int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out)
 
void shortcut_gpu (int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float s1, float s2, float *out)
 
__global__ void smooth_l1_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void smooth_l1_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void softmax_x_ent_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void softmax_x_ent_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void logistic_x_ent_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void logistic_x_ent_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void l2_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void l2_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void l1_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void l1_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void wgan_kernel (int n, float *pred, float *truth, float *delta, float *error)
 
void wgan_gpu (int n, float *pred, float *truth, float *delta, float *error)
 
__global__ void weighted_sum_kernel (int n, float *a, float *b, float *s, float *c)
 
__global__ void deinter_kernel (int NX, float *X, int NY, float *Y, int B, float *OUT)
 
void deinter_gpu (int NX, float *X, int NY, float *Y, int B, float *OUT)
 
__global__ void inter_kernel (int NX, float *X, int NY, float *Y, int B, float *OUT)
 
void inter_gpu (int NX, float *X, int NY, float *Y, int B, float *OUT)
 
void weighted_sum_gpu (float *a, float *b, float *s, int num, float *c)
 
__global__ void weighted_delta_kernel (int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc)
 
void weighted_delta_gpu (float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc)
 
__global__ void mult_add_into_kernel (int n, float *a, float *b, float *c)
 
void mult_add_into_gpu (int num, float *a, float *b, float *c)
 
__device__ void softmax_device (float *input, int n, float temp, int stride, float *output)
 
__global__ void softmax_tree_kernel (float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset)
 
void softmax_tree (float *input, int spatial, int batch, int stride, float temp, float *output, tree hier)
 
__global__ void softmax_kernel (float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
 
void softmax_gpu (float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
 
__global__ void upsample_kernel (size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
 
void upsample_gpu (float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
 

Function Documentation

◆ accumulate_kernel()

__global__ void accumulate_kernel ( float *  x,
int  n,
int  groups,
float *  sum 
)

Definition at line 234 of file blas_kernels.cu.

◆ adam_gpu()

void adam_gpu ( int  n,
float *  x,
float *  m,
float *  v,
float  B1,
float  B2,
float  rate,
float  eps,
int  t 
)

Definition at line 174 of file blas_kernels.cu.

◆ adam_kernel()

__global__ void adam_kernel ( int  N,
float *  x,
float *  m,
float *  v,
float  B1,
float  B2,
float  rate,
float  eps,
int  t 
)

Definition at line 163 of file blas_kernels.cu.

◆ adam_update_gpu()

void adam_update_gpu ( float *  w,
float *  d,
float *  m,
float *  v,
float  B1,
float  B2,
float  eps,
float  decay,
float  rate,
int  n,
int  batch,
int  t 
)

Definition at line 180 of file blas_kernels.cu.

◆ add_bias_gpu()

void add_bias_gpu ( float *  output,
float *  biases,
int  batch,
int  n,
int  size 
)

Definition at line 69 of file blas_kernels.cu.

◆ add_bias_kernel()

__global__ void add_bias_kernel ( float *  output,
float *  biases,
int  batch,
int  n,
int  size 
)

Definition at line 56 of file blas_kernels.cu.

◆ add_gpu()

void add_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 687 of file blas_kernels.cu.

◆ add_kernel()

__global__ void add_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 434 of file blas_kernels.cu.

◆ axpy_gpu()

void axpy_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)

Definition at line 585 of file blas_kernels.cu.

◆ axpy_gpu_offset()

void axpy_gpu_offset ( int  N,
float  ALPHA,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)

Definition at line 596 of file blas_kernels.cu.

◆ axpy_kernel()

__global__ void axpy_kernel ( int  N,
float  ALPHA,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)

Definition at line 402 of file blas_kernels.cu.

◆ backward_bias_conn_kernel()

__global__ void backward_bias_conn_kernel ( float *  bias_updates,
float *  delta,
int  batch,
int  n 
)

Definition at line 77 of file blas_kernels.cu.

◆ backward_bias_gpu()

void backward_bias_gpu ( float *  bias_updates,
float *  delta,
int  batch,
int  n,
int  size 
)

Definition at line 110 of file blas_kernels.cu.

◆ backward_bias_kernel()

__global__ void backward_bias_kernel ( float *  bias_updates,
float *  delta,
int  batch,
int  n,
int  size 
)

Definition at line 90 of file blas_kernels.cu.

◆ backward_scale_gpu()

void backward_scale_gpu ( float *  x_norm,
float *  delta,
int  batch,
int  n,
int  size,
float *  scale_updates 
)

Definition at line 50 of file blas_kernels.cu.

◆ backward_scale_kernel()

__global__ void backward_scale_kernel ( float *  x_norm,
float *  delta,
int  batch,
int  n,
int  size,
float *  scale_updates 
)

Definition at line 30 of file blas_kernels.cu.

◆ const_gpu()

void const_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 674 of file blas_kernels.cu.

◆ const_kernel()

__global__ void const_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 414 of file blas_kernels.cu.

◆ constrain_gpu()

void constrain_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 680 of file blas_kernels.cu.

◆ constrain_kernel()

__global__ void constrain_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 420 of file blas_kernels.cu.

◆ copy_gpu()

void copy_gpu ( int  N,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)

Definition at line 602 of file blas_kernels.cu.

◆ copy_gpu_offset()

void copy_gpu_offset ( int  N,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)

Definition at line 613 of file blas_kernels.cu.

◆ copy_kernel()

__global__ void copy_kernel ( int  N,
float *  X,
int  OFFX,
int  INCX,
float *  Y,
int  OFFY,
int  INCY 
)

Definition at line 452 of file blas_kernels.cu.

◆ deinter_gpu()

void deinter_gpu ( int  NX,
float *  X,
int  NY,
float *  Y,
int  B,
float *  OUT 
)

Definition at line 876 of file blas_kernels.cu.

◆ deinter_kernel()

__global__ void deinter_kernel ( int  NX,
float *  X,
int  NY,
float *  Y,
int  B,
float *  OUT 
)

Definition at line 862 of file blas_kernels.cu.

◆ fast_mean_delta_gpu()

void fast_mean_delta_gpu ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)

Definition at line 326 of file blas_kernels.cu.

◆ fast_mean_delta_kernel()

__global__ void fast_mean_delta_kernel ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)

Definition at line 245 of file blas_kernels.cu.

◆ fast_mean_gpu()

void fast_mean_gpu ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)

Definition at line 560 of file blas_kernels.cu.

◆ fast_mean_kernel()

__global__ void fast_mean_kernel ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)

Definition at line 501 of file blas_kernels.cu.

◆ fast_variance_delta_gpu()

void fast_variance_delta_gpu ( float *  x,
float *  delta,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  variance_delta 
)

Definition at line 332 of file blas_kernels.cu.

◆ fast_variance_delta_kernel()

__global__ void fast_variance_delta_kernel ( float *  x,
float *  delta,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  variance_delta 
)

Definition at line 274 of file blas_kernels.cu.

◆ fast_variance_gpu()

void fast_variance_gpu ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)

Definition at line 566 of file blas_kernels.cu.

◆ fast_variance_kernel()

__global__ void fast_variance_kernel ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)

Definition at line 530 of file blas_kernels.cu.

◆ fill_gpu()

void fill_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 705 of file blas_kernels.cu.

◆ fill_kernel()

__global__ void fill_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 446 of file blas_kernels.cu.

◆ flatten_gpu()

void flatten_gpu ( float *  x,
int  spatial,
int  layers,
int  batch,
int  forward,
float *  out 
)

Definition at line 636 of file blas_kernels.cu.

◆ flatten_kernel()

__global__ void flatten_kernel ( int  N,
float *  x,
int  spatial,
int  layers,
int  batch,
int  forward,
float *  out 
)

Definition at line 619 of file blas_kernels.cu.

◆ inter_gpu()

void inter_gpu ( int  NX,
float *  X,
int  NY,
float *  Y,
int  B,
float *  OUT 
)

Definition at line 896 of file blas_kernels.cu.

◆ inter_kernel()

__global__ void inter_kernel ( int  NX,
float *  X,
int  NY,
float *  Y,
int  B,
float *  OUT 
)

Definition at line 882 of file blas_kernels.cu.

◆ l1_gpu()

void l1_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 830 of file blas_kernels.cu.

◆ l1_kernel()

__global__ void l1_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 820 of file blas_kernels.cu.

◆ l2_gpu()

void l2_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 814 of file blas_kernels.cu.

◆ l2_kernel()

__global__ void l2_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 804 of file blas_kernels.cu.

◆ l2norm_kernel()

__global__ void l2norm_kernel ( int  N,
float *  x,
float *  dx,
int  batch,
int  filters,
int  spatial 
)

Definition at line 472 of file blas_kernels.cu.

◆ l2normalize_gpu()

void l2normalize_gpu ( float *  x,
float *  dx,
int  batch,
int  filters,
int  spatial 
)

Definition at line 494 of file blas_kernels.cu.

◆ logistic_x_ent_gpu()

void logistic_x_ent_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 798 of file blas_kernels.cu.

◆ logistic_x_ent_kernel()

__global__ void logistic_x_ent_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 787 of file blas_kernels.cu.

◆ mask_gpu()

void mask_gpu ( int  N,
float *  X,
float  mask_num,
float *  mask,
float  val 
)

Definition at line 656 of file blas_kernels.cu.

◆ mask_kernel()

__global__ void mask_kernel ( int  n,
float *  x,
float  mask_num,
float *  mask,
float  val 
)

Definition at line 650 of file blas_kernels.cu.

◆ mean_delta_gpu()

void mean_delta_gpu ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)

Definition at line 320 of file blas_kernels.cu.

◆ mean_delta_kernel()

__global__ void mean_delta_kernel ( float *  delta,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  mean_delta 
)

Definition at line 305 of file blas_kernels.cu.

◆ mean_gpu()

void mean_gpu ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)

Definition at line 573 of file blas_kernels.cu.

◆ mean_kernel()

__global__ void mean_kernel ( float *  x,
int  batch,
int  filters,
int  spatial,
float *  mean 
)

Definition at line 338 of file blas_kernels.cu.

◆ mul_gpu()

void mul_gpu ( int  N,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)

Definition at line 607 of file blas_kernels.cu.

◆ mul_kernel()

__global__ void mul_kernel ( int  N,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)

Definition at line 458 of file blas_kernels.cu.

◆ mult_add_into_gpu()

void mult_add_into_gpu ( int  num,
float *  a,
float *  b,
float *  c 
)

Definition at line 932 of file blas_kernels.cu.

◆ mult_add_into_kernel()

__global__ void mult_add_into_kernel ( int  n,
float *  a,
float *  b,
float *  c 
)

Definition at line 924 of file blas_kernels.cu.

◆ normalize_delta_gpu()

void normalize_delta_gpu ( float *  x,
float *  mean,
float *  variance,
float *  mean_delta,
float *  variance_delta,
int  batch,
int  filters,
int  spatial,
float *  delta 
)

Definition at line 212 of file blas_kernels.cu.

◆ normalize_delta_kernel()

__global__ void normalize_delta_kernel ( int  N,
float *  x,
float *  mean,
float *  variance,
float *  mean_delta,
float *  variance_delta,
int  batch,
int  filters,
int  spatial,
float *  delta 
)

Definition at line 203 of file blas_kernels.cu.

◆ normalize_gpu()

void normalize_gpu ( float *  x,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial 
)

Definition at line 465 of file blas_kernels.cu.

◆ normalize_kernel()

__global__ void normalize_kernel ( int  N,
float *  x,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial 
)

Definition at line 194 of file blas_kernels.cu.

◆ pow_gpu()

void pow_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)

Definition at line 590 of file blas_kernels.cu.

◆ pow_kernel()

__global__ void pow_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX,
float *  Y,
int  INCY 
)

Definition at line 408 of file blas_kernels.cu.

◆ reorg_gpu()

void reorg_gpu ( float *  x,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float *  out 
)

Definition at line 643 of file blas_kernels.cu.

◆ reorg_kernel()

__global__ void reorg_kernel ( int  N,
float *  x,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float *  out 
)

Definition at line 370 of file blas_kernels.cu.

◆ scal_gpu()

void scal_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 693 of file blas_kernels.cu.

◆ scal_kernel()

__global__ void scal_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 440 of file blas_kernels.cu.

◆ scale_bias_gpu()

void scale_bias_gpu ( float *  output,
float *  biases,
int  batch,
int  n,
int  size 
)

Definition at line 21 of file blas_kernels.cu.

◆ scale_bias_kernel()

__global__ void scale_bias_kernel ( float *  output,
float *  biases,
int  n,
int  size 
)

Definition at line 12 of file blas_kernels.cu.

◆ scale_mask_gpu()

void scale_mask_gpu ( int  N,
float *  X,
float  mask_num,
float *  mask,
float  scale 
)

Definition at line 668 of file blas_kernels.cu.

◆ scale_mask_kernel()

__global__ void scale_mask_kernel ( int  n,
float *  x,
float  mask_num,
float *  mask,
float  scale 
)

Definition at line 662 of file blas_kernels.cu.

◆ shortcut_gpu()

void shortcut_gpu ( int  batch,
int  w1,
int  h1,
int  c1,
float *  add,
int  w2,
int  h2,
int  c2,
float  s1,
float  s2,
float *  out 
)

Definition at line 729 of file blas_kernels.cu.

◆ shortcut_kernel()

__global__ void shortcut_kernel ( int  size,
int  minw,
int  minh,
int  minc,
int  stride,
int  sample,
int  batch,
int  w1,
int  h1,
int  c1,
float *  add,
int  w2,
int  h2,
int  c2,
float  s1,
float  s2,
float *  out 
)

Definition at line 711 of file blas_kernels.cu.

◆ smooth_l1_gpu()

void smooth_l1_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 764 of file blas_kernels.cu.

◆ smooth_l1_kernel()

__global__ void smooth_l1_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 747 of file blas_kernels.cu.

◆ softmax_device()

__device__ void softmax_device ( float *  input,
int  n,
float  temp,
int  stride,
float *  output 
)

Definition at line 939 of file blas_kernels.cu.

◆ softmax_gpu()

void softmax_gpu ( float *  input,
int  n,
int  batch,
int  batch_offset,
int  groups,
int  group_offset,
int  stride,
float  temp,
float *  output 
)

Definition at line 1000 of file blas_kernels.cu.

◆ softmax_kernel()

__global__ void softmax_kernel ( float *  input,
int  n,
int  batch,
int  batch_offset,
int  groups,
int  group_offset,
int  stride,
float  temp,
float *  output 
)

Definition at line 991 of file blas_kernels.cu.

◆ softmax_tree()

void softmax_tree ( float *  input,
int  spatial,
int  batch,
int  stride,
float  temp,
float *  output,
tree  hier 
)

Definition at line 972 of file blas_kernels.cu.

◆ softmax_tree_kernel()

__global__ void softmax_tree_kernel ( float *  input,
int  spatial,
int  batch,
int  stride,
float  temp,
float *  output,
int  groups,
int *  group_size,
int *  group_offset 
)

Definition at line 959 of file blas_kernels.cu.

◆ softmax_x_ent_gpu()

void softmax_x_ent_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 781 of file blas_kernels.cu.

◆ softmax_x_ent_kernel()

__global__ void softmax_x_ent_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 770 of file blas_kernels.cu.

◆ supp_gpu()

void supp_gpu ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 699 of file blas_kernels.cu.

◆ supp_kernel()

__global__ void supp_kernel ( int  N,
float  ALPHA,
float *  X,
int  INCX 
)

Definition at line 426 of file blas_kernels.cu.

◆ upsample_gpu()

void upsample_gpu ( float *  in,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float  scale,
float *  out 
)

Definition at line 1030 of file blas_kernels.cu.

◆ upsample_kernel()

__global__ void upsample_kernel ( size_t  N,
float *  x,
int  w,
int  h,
int  c,
int  batch,
int  stride,
int  forward,
float  scale,
float *  out 
)

Definition at line 1007 of file blas_kernels.cu.

◆ variance_delta_kernel()

__global__ void variance_delta_kernel ( float *  x,
float *  delta,
float *  mean,
float *  variance,
int  batch,
int  filters,
int  spatial,
float *  variance_delta 
)

Definition at line 219 of file blas_kernels.cu.

◆ variance_gpu()

void variance_gpu ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)

Definition at line 579 of file blas_kernels.cu.

◆ variance_kernel()

__global__ void variance_kernel ( float *  x,
float *  mean,
int  batch,
int  filters,
int  spatial,
float *  variance 
)

Definition at line 354 of file blas_kernels.cu.

◆ weighted_delta_gpu()

void weighted_delta_gpu ( float *  a,
float *  b,
float *  s,
float *  da,
float *  db,
float *  ds,
int  num,
float *  dc 
)

Definition at line 918 of file blas_kernels.cu.

◆ weighted_delta_kernel()

__global__ void weighted_delta_kernel ( int  n,
float *  a,
float *  b,
float *  s,
float *  da,
float *  db,
float *  ds,
float *  dc 
)

Definition at line 908 of file blas_kernels.cu.

◆ weighted_sum_gpu()

void weighted_sum_gpu ( float *  a,
float *  b,
float *  s,
int  num,
float *  c 
)

Definition at line 902 of file blas_kernels.cu.

◆ weighted_sum_kernel()

__global__ void weighted_sum_kernel ( int  n,
float *  a,
float *  b,
float *  s,
float *  c 
)

Definition at line 854 of file blas_kernels.cu.

◆ wgan_gpu()

void wgan_gpu ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 845 of file blas_kernels.cu.

◆ wgan_kernel()

__global__ void wgan_kernel ( int  n,
float *  pred,
float *  truth,
float *  delta,
float *  error 
)

Definition at line 836 of file blas_kernels.cu.