|
template<typename PtrList , typename OpFn > |
void | vectorized_binary_kernel (PtrList ptr_, OpFn fn_, size_t i) |
|
template<typename PtrList , typename OpFn > |
void | binary_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size) |
|
template<template< typename > class OpFunctor, typename... Args> |
void | binary_dispatch_handler (BinaryMetaHandler &meta, Args &&... args) |
|
template void | binary_dispatch_handler< AddFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< SubFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< MulFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< DivFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< PowFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< EqFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< NeFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< GeFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< GtFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< LeFunctor > (BinaryMetaHandler &) |
|
template void | binary_dispatch_handler< LtFunctor > (BinaryMetaHandler &) |
|
template<typename PtrList , typename OpFn > |
void | vectorized_expand_kernel (PtrList ptr_, OpFn fn_, size_t i, const CPUOffsetUtil< kNArgs > *align) |
|
template<typename PtrList , typename OpFn > |
void | expand_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size, const CPUOffsetUtil< kNArgs > *align) |
|
template<template< typename > class OpFunctor, typename... Args> |
void | expand_dispatch_handler (BinaryMetaHandler &meta, Args &&... args) |
|
template void | expand_dispatch_handler< AddFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< SubFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< MulFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< DivFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< PowFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< EqFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< NeFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< GeFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< GtFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< LeFunctor > (BinaryMetaHandler &) |
|
template void | expand_dispatch_handler< LtFunctor > (BinaryMetaHandler &) |
|
| LMP_FOR_EACH_CARTESIAN_PRODUCT (DECLARE_EXPAND_OPS_CPU,((add, AddFunctor),(sub, SubFunctor),(mul, MulFunctor),(div, DivFunctor),(pow, PowFunctor),(eq, EqFunctor),(ne, NeFunctor),(le, LeFunctor),(lt, LtFunctor),(ge, GeFunctor),(gt, GtFunctor),)) |
|
| LMP_FOR_EACH_CARTESIAN_PRODUCT (DECLARE_UNARY_OPS_CPU,((neg, NegFunctor),(log, LogFunctor),(exp, ExpFunctor),(sqrt, SqrtFunctor),(abs, AbsFunctor),(sin, SinFunctor),(cos, CosFunctor),(tan, TanFunctor),)) |
|
TensorImpl | clamp_cpu (const TensorImpl &a, Scalar min_val, Scalar max_val) |
|
TensorImpl | transpose_cpu (const TensorImpl &a) |
|
TensorImpl | matmul_cpu (const TensorImpl &a, const TensorImpl &b) |
|
| LMP_FOR_EACH_CARTESIAN_PRODUCT (DECLARE_REDUCT_OPS_CPU,((sum, SumFunctor),(max, MaxFunctor),(min, MinFunctor),(prod, ProdFunctor),)) |
|
| LMP_REGISTER_DISPATCH (ops::add_stub, DeviceType::CPU, add_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::sub_stub, DeviceType::CPU, sub_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::mul_stub, DeviceType::CPU, mul_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::div_stub, DeviceType::CPU, div_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::pow_stub, DeviceType::CPU, pow_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::eq_stub, DeviceType::CPU, eq_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::ne_stub, DeviceType::CPU, ne_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::le_stub, DeviceType::CPU, le_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::lt_stub, DeviceType::CPU, lt_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::ge_stub, DeviceType::CPU, ge_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::gt_stub, DeviceType::CPU, gt_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::neg_stub, DeviceType::CPU, neg_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::abs_stub, DeviceType::CPU, abs_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::clamp_stub, DeviceType::CPU, clamp_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::cos_stub, DeviceType::CPU, cos_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::exp_stub, DeviceType::CPU, exp_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::log_stub, DeviceType::CPU, log_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::sin_stub, DeviceType::CPU, sin_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::sqrt_stub, DeviceType::CPU, sqrt_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::tan_stub, DeviceType::CPU, tan_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::transpose_stub, DeviceType::CPU, transpose_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::matmul_stub, DeviceType::CPU, matmul_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::sum_stub, DeviceType::CPU, sum_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::max_stub, DeviceType::CPU, max_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::min_stub, DeviceType::CPU, min_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::prod_stub, DeviceType::CPU, prod_cpu) |
|
template<typename U , typename V , typename OutType > |
void | cpuMatmulKernel (const U *A, const V *B, OutType *C, size_t m, size_t n, size_t k, size_t i, size_t j) |
|
template<typename T > |
void | cpuTransposeKernel (const T *in, T *out, size_t m, size_t n, size_t i, size_t j) |
|
template<typename U , typename V , typename OutType > |
void | cpuMatMul (const U *A, const V *B, OutType *C, size_t m, size_t n, size_t k) |
|
template<typename T > |
void | cpuTranspose (const T *in, T *out, size_t m, size_t n) |
|
| LMP_FOR_EACH_CARTESIAN_PRODUCT (INSTANTIATE_MATMUL, LMP_LIST_TYPES, LMP_LIST_TYPES, LMP_LIST_TYPES) |
|
| LMP_FOR_EACH_CARTESIAN_PRODUCT (INSTANTIATE_TRANSPOSE, LMP_LIST_TYPES) |
|
void | fill_cpu (void *ptr, size_t size, Scalar t, DataType type) |
|
void | resize_cpu (DataPtr dptr, size_t old_byte_size, size_t new_byte_size) |
|
DataPtr | empty_cpu (size_t byte_size) |
|
| LMP_REGISTER_DISPATCH (ops::empty_stub, DeviceType::CPU, empty_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::resize_stub, DeviceType::CPU, resize_cpu) |
|
| LMP_REGISTER_DISPATCH (ops::fill_stub, DeviceType::CPU, fill_cpu) |
|
void | copy_cpu (DeviceType to_device, const void *src, void *dest, size_t size, DataType src_dtype, DataType dest_dtype) |
|
template<typename U , typename V > |
void | vecCopy (size_t size, const U *in, V *out) |
| Small parallized copy function using OMP.
|
|
| LMP_REGISTER_DISPATCH (ops::copy_stub, DeviceType::CPU, copy_cpu) |
|
| LMP_REGISTER_DISPATCH (offset_util_stub_2, DeviceType::CPU, offset_util_cpu_2) |
|
| LMP_REGISTER_DISPATCH (offset_util_stub_3, DeviceType::CPU, offset_util_cpu_3) |
|
template<typename PtrList , typename OpFn > |
void | vectorized_reduct_kernel (PtrList ptr_, OpFn fn_, size_t i, size_t axis, const size_t *shape, const stride_t *strides) |
|
template<typename PtrList , typename OpFn > |
void | reduct_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size, size_t axis, const size_t *shape, const stride_t *strides, size_t) |
|
template<template< typename > class OpFunctor, typename... Args> |
void | reduct_dispatch_handler (ReductMetaHandler &meta, size_t axis, Args &&... args) |
|
template void | reduct_dispatch_handler< SumFunctor > (ReductMetaHandler &, size_t) |
|
template void | reduct_dispatch_handler< MaxFunctor > (ReductMetaHandler &, size_t) |
|
template void | reduct_dispatch_handler< MinFunctor > (ReductMetaHandler &, size_t) |
|
template void | reduct_dispatch_handler< ProdFunctor > (ReductMetaHandler &, size_t) |
|
template<typename PtrList , typename OpFn > |
void | vectorized_unary_kernel (PtrList ptr_, OpFn fn_, size_t i) |
|
template<typename PtrList , typename OpFn > |
void | unary_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size) |
|
template<template< typename > class OpFunctor, typename... Args> |
void | unary_dispatch_handler (UnaryMetaHandler &meta, Args &&... args) |
|
template void | unary_dispatch_handler< NegFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< ExpFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< LogFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< SqrtFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< AbsFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< SinFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< CosFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< TanFunctor > (UnaryMetaHandler &) |
|
template void | unary_dispatch_handler< ClampFunctor > (UnaryMetaHandler &, Scalar &, Scalar &) |
|
TensorImpl | add_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | sub_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | mul_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | div_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | pow_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | eq_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | ne_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | le_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | lt_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | ge_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | gt_cpu (const TensorImpl &a, const TensorImpl &b) |
|
TensorImpl | neg_cpu (const TensorImpl &a) |
|
TensorImpl | log_cpu (const TensorImpl &a) |
|
TensorImpl | exp_cpu (const TensorImpl &a) |
|
TensorImpl | sqrt_cpu (const TensorImpl &a) |
|
TensorImpl | abs_cpu (const TensorImpl &a) |
|
TensorImpl | sin_cpu (const TensorImpl &a) |
|
TensorImpl | cos_cpu (const TensorImpl &a) |
|
TensorImpl | tan_cpu (const TensorImpl &a) |
|
TensorImpl | sum_cpu (const TensorImpl &a, size_t axis) |
|
TensorImpl | max_cpu (const TensorImpl &a, size_t axis) |
|
TensorImpl | min_cpu (const TensorImpl &a, size_t axis) |
|
TensorImpl | prod_cpu (const TensorImpl &a, size_t axis) |
|
template<typename U , typename V , typename OutType > |
void | cpuMatmulKernel (const U *A, const V *B, OutType *C, size_t m, size_t n, size_t k) |
|
template<typename T > |
void | cpuTransposeKernel (const T *in, T *out, size_t m, size_t n) |
|
template<size_t NArgs> |
std::unique_ptr< OffsetUtil > | offset_util_cpu (::std::array< const TensorImpl *, NArgs > ins, const TensorImpl &out) |
|