lamppp
Loading...
Searching...
No Matches
Classes | Functions | Variables
lmp::tensor::detail::cpu Namespace Reference

Classes

struct  AbsFunctor
 
struct  AddFunctor
 
struct  ClampFunctor
 
struct  CosFunctor
 
class  CPUOffsetUtil
 
struct  DivFunctor
 
struct  EqFunctor
 
struct  ExpFunctor
 
struct  GeFunctor
 
struct  GtFunctor
 
struct  LeFunctor
 
struct  LogFunctor
 
struct  LtFunctor
 
struct  MaxFunctor
 
struct  MinFunctor
 
struct  MulFunctor
 
struct  NeFunctor
 
struct  NegFunctor
 
struct  PowFunctor
 
struct  ProdFunctor
 
struct  SinFunctor
 
struct  SqrtFunctor
 
struct  SubFunctor
 
struct  SumFunctor
 
struct  TanFunctor
 

Functions

template<typename PtrList , typename OpFn >
void vectorized_binary_kernel (PtrList ptr_, OpFn fn_, size_t i)
 
template<typename PtrList , typename OpFn >
void binary_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size)
 
template<template< typename > class OpFunctor, typename... Args>
void binary_dispatch_handler (BinaryMetaHandler &meta, Args &&... args)
 
template void binary_dispatch_handler< AddFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< SubFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< MulFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< DivFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< PowFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< EqFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< NeFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< GeFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< GtFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< LeFunctor > (BinaryMetaHandler &)
 
template void binary_dispatch_handler< LtFunctor > (BinaryMetaHandler &)
 
template<typename PtrList , typename OpFn >
void vectorized_expand_kernel (PtrList ptr_, OpFn fn_, size_t i, const CPUOffsetUtil< kNArgs > *align)
 
template<typename PtrList , typename OpFn >
void expand_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size, const CPUOffsetUtil< kNArgs > *align)
 
template<template< typename > class OpFunctor, typename... Args>
void expand_dispatch_handler (BinaryMetaHandler &meta, Args &&... args)
 
template void expand_dispatch_handler< AddFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< SubFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< MulFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< DivFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< PowFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< EqFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< NeFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< GeFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< GtFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< LeFunctor > (BinaryMetaHandler &)
 
template void expand_dispatch_handler< LtFunctor > (BinaryMetaHandler &)
 
 LMP_FOR_EACH_CARTESIAN_PRODUCT (DECLARE_EXPAND_OPS_CPU,((add, AddFunctor),(sub, SubFunctor),(mul, MulFunctor),(div, DivFunctor),(pow, PowFunctor),(eq, EqFunctor),(ne, NeFunctor),(le, LeFunctor),(lt, LtFunctor),(ge, GeFunctor),(gt, GtFunctor),))
 
 LMP_FOR_EACH_CARTESIAN_PRODUCT (DECLARE_UNARY_OPS_CPU,((neg, NegFunctor),(log, LogFunctor),(exp, ExpFunctor),(sqrt, SqrtFunctor),(abs, AbsFunctor),(sin, SinFunctor),(cos, CosFunctor),(tan, TanFunctor),))
 
TensorImpl clamp_cpu (const TensorImpl &a, Scalar min_val, Scalar max_val)
 
TensorImpl transpose_cpu (const TensorImpl &a)
 
TensorImpl matmul_cpu (const TensorImpl &a, const TensorImpl &b)
 
 LMP_FOR_EACH_CARTESIAN_PRODUCT (DECLARE_REDUCT_OPS_CPU,((sum, SumFunctor),(max, MaxFunctor),(min, MinFunctor),(prod, ProdFunctor),))
 
 LMP_REGISTER_DISPATCH (ops::add_stub, DeviceType::CPU, add_cpu)
 
 LMP_REGISTER_DISPATCH (ops::sub_stub, DeviceType::CPU, sub_cpu)
 
 LMP_REGISTER_DISPATCH (ops::mul_stub, DeviceType::CPU, mul_cpu)
 
 LMP_REGISTER_DISPATCH (ops::div_stub, DeviceType::CPU, div_cpu)
 
 LMP_REGISTER_DISPATCH (ops::pow_stub, DeviceType::CPU, pow_cpu)
 
 LMP_REGISTER_DISPATCH (ops::eq_stub, DeviceType::CPU, eq_cpu)
 
 LMP_REGISTER_DISPATCH (ops::ne_stub, DeviceType::CPU, ne_cpu)
 
 LMP_REGISTER_DISPATCH (ops::le_stub, DeviceType::CPU, le_cpu)
 
 LMP_REGISTER_DISPATCH (ops::lt_stub, DeviceType::CPU, lt_cpu)
 
 LMP_REGISTER_DISPATCH (ops::ge_stub, DeviceType::CPU, ge_cpu)
 
 LMP_REGISTER_DISPATCH (ops::gt_stub, DeviceType::CPU, gt_cpu)
 
 LMP_REGISTER_DISPATCH (ops::neg_stub, DeviceType::CPU, neg_cpu)
 
 LMP_REGISTER_DISPATCH (ops::abs_stub, DeviceType::CPU, abs_cpu)
 
 LMP_REGISTER_DISPATCH (ops::clamp_stub, DeviceType::CPU, clamp_cpu)
 
 LMP_REGISTER_DISPATCH (ops::cos_stub, DeviceType::CPU, cos_cpu)
 
 LMP_REGISTER_DISPATCH (ops::exp_stub, DeviceType::CPU, exp_cpu)
 
 LMP_REGISTER_DISPATCH (ops::log_stub, DeviceType::CPU, log_cpu)
 
 LMP_REGISTER_DISPATCH (ops::sin_stub, DeviceType::CPU, sin_cpu)
 
 LMP_REGISTER_DISPATCH (ops::sqrt_stub, DeviceType::CPU, sqrt_cpu)
 
 LMP_REGISTER_DISPATCH (ops::tan_stub, DeviceType::CPU, tan_cpu)
 
 LMP_REGISTER_DISPATCH (ops::transpose_stub, DeviceType::CPU, transpose_cpu)
 
 LMP_REGISTER_DISPATCH (ops::matmul_stub, DeviceType::CPU, matmul_cpu)
 
 LMP_REGISTER_DISPATCH (ops::sum_stub, DeviceType::CPU, sum_cpu)
 
 LMP_REGISTER_DISPATCH (ops::max_stub, DeviceType::CPU, max_cpu)
 
 LMP_REGISTER_DISPATCH (ops::min_stub, DeviceType::CPU, min_cpu)
 
 LMP_REGISTER_DISPATCH (ops::prod_stub, DeviceType::CPU, prod_cpu)
 
template<typename U , typename V , typename OutType >
void cpuMatmulKernel (const U *A, const V *B, OutType *C, size_t m, size_t n, size_t k, size_t i, size_t j)
 
template<typename T >
void cpuTransposeKernel (const T *in, T *out, size_t m, size_t n, size_t i, size_t j)
 
template<typename U , typename V , typename OutType >
void cpuMatMul (const U *A, const V *B, OutType *C, size_t m, size_t n, size_t k)
 
template<typename T >
void cpuTranspose (const T *in, T *out, size_t m, size_t n)
 
 LMP_FOR_EACH_CARTESIAN_PRODUCT (INSTANTIATE_MATMUL, LMP_LIST_TYPES, LMP_LIST_TYPES, LMP_LIST_TYPES)
 
 LMP_FOR_EACH_CARTESIAN_PRODUCT (INSTANTIATE_TRANSPOSE, LMP_LIST_TYPES)
 
void fill_cpu (void *ptr, size_t size, Scalar t, DataType type)
 
void resize_cpu (DataPtr dptr, size_t old_byte_size, size_t new_byte_size)
 
DataPtr empty_cpu (size_t byte_size)
 
 LMP_REGISTER_DISPATCH (ops::empty_stub, DeviceType::CPU, empty_cpu)
 
 LMP_REGISTER_DISPATCH (ops::resize_stub, DeviceType::CPU, resize_cpu)
 
 LMP_REGISTER_DISPATCH (ops::fill_stub, DeviceType::CPU, fill_cpu)
 
void copy_cpu (DeviceType to_device, const void *src, void *dest, size_t size, DataType src_dtype, DataType dest_dtype)
 
template<typename U , typename V >
void vecCopy (size_t size, const U *in, V *out)
 Small parallized copy function using OMP.
 
 LMP_REGISTER_DISPATCH (ops::copy_stub, DeviceType::CPU, copy_cpu)
 
 LMP_REGISTER_DISPATCH (offset_util_stub_2, DeviceType::CPU, offset_util_cpu_2)
 
 LMP_REGISTER_DISPATCH (offset_util_stub_3, DeviceType::CPU, offset_util_cpu_3)
 
template<typename PtrList , typename OpFn >
void vectorized_reduct_kernel (PtrList ptr_, OpFn fn_, size_t i, size_t axis, const size_t *shape, const stride_t *strides)
 
template<typename PtrList , typename OpFn >
void reduct_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size, size_t axis, const size_t *shape, const stride_t *strides, size_t)
 
template<template< typename > class OpFunctor, typename... Args>
void reduct_dispatch_handler (ReductMetaHandler &meta, size_t axis, Args &&... args)
 
template void reduct_dispatch_handler< SumFunctor > (ReductMetaHandler &, size_t)
 
template void reduct_dispatch_handler< MaxFunctor > (ReductMetaHandler &, size_t)
 
template void reduct_dispatch_handler< MinFunctor > (ReductMetaHandler &, size_t)
 
template void reduct_dispatch_handler< ProdFunctor > (ReductMetaHandler &, size_t)
 
template<typename PtrList , typename OpFn >
void vectorized_unary_kernel (PtrList ptr_, OpFn fn_, size_t i)
 
template<typename PtrList , typename OpFn >
void unary_kernel_launcher (PtrList ptr_, OpFn fn_, size_t size)
 
template<template< typename > class OpFunctor, typename... Args>
void unary_dispatch_handler (UnaryMetaHandler &meta, Args &&... args)
 
template void unary_dispatch_handler< NegFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< ExpFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< LogFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< SqrtFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< AbsFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< SinFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< CosFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< TanFunctor > (UnaryMetaHandler &)
 
template void unary_dispatch_handler< ClampFunctor > (UnaryMetaHandler &, Scalar &, Scalar &)
 
TensorImpl add_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl sub_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl mul_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl div_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl pow_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl eq_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl ne_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl le_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl lt_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl ge_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl gt_cpu (const TensorImpl &a, const TensorImpl &b)
 
TensorImpl neg_cpu (const TensorImpl &a)
 
TensorImpl log_cpu (const TensorImpl &a)
 
TensorImpl exp_cpu (const TensorImpl &a)
 
TensorImpl sqrt_cpu (const TensorImpl &a)
 
TensorImpl abs_cpu (const TensorImpl &a)
 
TensorImpl sin_cpu (const TensorImpl &a)
 
TensorImpl cos_cpu (const TensorImpl &a)
 
TensorImpl tan_cpu (const TensorImpl &a)
 
TensorImpl sum_cpu (const TensorImpl &a, size_t axis)
 
TensorImpl max_cpu (const TensorImpl &a, size_t axis)
 
TensorImpl min_cpu (const TensorImpl &a, size_t axis)
 
TensorImpl prod_cpu (const TensorImpl &a, size_t axis)
 
template<typename U , typename V , typename OutType >
void cpuMatmulKernel (const U *A, const V *B, OutType *C, size_t m, size_t n, size_t k)
 
template<typename T >
void cpuTransposeKernel (const T *in, T *out, size_t m, size_t n)
 
template<size_t NArgs>
std::unique_ptr< OffsetUtiloffset_util_cpu (::std::array< const TensorImpl *, NArgs > ins, const TensorImpl &out)
 

Variables

constexpr size_t kNArgs = BinaryMetaHandler::kNumElem
 

Detailed Description

Function Documentation

◆ vecCopy()

template<typename U , typename V >
void lmp::tensor::detail::cpu::vecCopy ( size_t  size,
const U *  in,
V *  out 
)

Small parallized copy function using OMP.

Template Parameters
UInput template type
VOutput template type
Parameters
sizeSize of the array being used
inInput array
outOutput array