gko::HipExecutor#
AMD-GPU executor. The HIP / ROCm counterpart of
CudaExecutor: same constructor surface — device id, host
master executor, HIP allocator, optional hipStream_t — but built on
ROCm rather than CUDA. Use this on AMD MI- and Radeon-series GPUs.
-
class HipExecutor #
Inherits from
public gko::detail::ExecutorBase<HipExecutor>
public std::enable_shared_from_this<HipExecutor>
public gko::detail::EnableDeviceReset
This is the Executor subclass which represents the HIP enhanced device.
Public Functions
-
virtual std::shared_ptr<Executor> get_master() noexcept override#
Returns the master OmpExecutor of this Executor.
- Returns:
the master OmpExecutor of this Executor.
- virtual std::shared_ptr<const Executor> get_master(
Returns the master OmpExecutor of this Executor.
- Returns:
the master OmpExecutor of this Executor.
-
virtual void synchronize() const override#
Synchronize the operations launched on the executor with its master.
-
virtual std::string get_description() const override#
- Returns:
a textual representation of the executor and its device.
-
inline int get_device_id() const noexcept#
Get the HIP device id of the device associated to this executor.
-
inline int get_num_warps_per_sm() const noexcept#
Get the number of warps per SM of this executor.
-
inline int get_num_multiprocessor() const noexcept#
Get the number of multiprocessor of this executor.
-
inline int get_major_version() const noexcept#
Get the major version of compute capability.
-
inline int get_minor_version() const noexcept#
Get the minor version of compute capability.
-
inline int get_num_warps() const noexcept#
Get the number of warps of this executor.
-
inline int get_warp_size() const noexcept#
Get the warp size of this executor.
-
inline hipblasContext *get_hipblas_handle() const#
Get the hipblas handle for this executor
- Returns:
the hipblas handle (hipblasContext*) for this executor
-
inline hipblasContext *get_blas_handle() const#
Get the hipblas handle for this executor
- Returns:
the hipblas handle (hipblasContext*) for this executor
-
inline hipsparseContext *get_hipsparse_handle() const#
Get the hipsparse handle for this executor
- Returns:
the hipsparse handle (hipsparseContext*) for this executor
-
inline hipsparseContext *get_sparselib_handle() const#
Get the hipsparse handle for this executor
- Returns:
the hipsparse handle (hipsparseContext*) for this executor
-
inline int get_closest_numa() const#
Get the closest NUMA node
- Returns:
the closest NUMA node closest to this device
-
inline std::vector<int> get_closest_pus() const#
Get the closest PUs
- Returns:
the array of PUs closest to this device
-
virtual void run(const Operation &op) const = 0#
Runs the specified Operation using this Executor.
- Parameters:
op – the operation to run
-
template<typename ClosureOmp, typename ClosureCuda, typename ClosureHip, typename ClosureDpcpp>
inline void run( - const ClosureOmp &op_omp,
- const ClosureCuda &op_cuda,
- const ClosureHip &op_hip,
- const ClosureDpcpp &op_dpcpp,
Runs one of the passed in functors, depending on the Executor type.
- Template Parameters:
ClosureOmp – type of op_omp
ClosureCuda – type of op_cuda
ClosureHip – type of op_hip
ClosureDpcpp – type of op_dpcpp
- Parameters:
op_omp – functor to run in case of a OmpExecutor or ReferenceExecutor
op_cuda – functor to run in case of a CudaExecutor
op_hip – functor to run in case of a HipExecutor
op_dpcpp – functor to run in case of a DpcppExecutor
-
template<typename ClosureReference, typename ClosureOmp, typename ClosureCuda, typename ClosureHip, typename ClosureDpcpp>
inline void run( - std::string name,
- const ClosureReference &op_ref,
- const ClosureOmp &op_omp,
- const ClosureCuda &op_cuda,
- const ClosureHip &op_hip,
- const ClosureDpcpp &op_dpcpp,
Runs one of the passed in functors, depending on the Executor type.
- Template Parameters:
ClosureReference – type of op_ref
ClosureOmp – type of op_omp
ClosureCuda – type of op_cuda
ClosureHip – type of op_hip
ClosureDpcpp – type of op_dpcpp
- Parameters:
name – the name of the operation
op_ref – functor to run in case of a ReferenceExecutor
op_omp – functor to run in case of a OmpExecutor
op_cuda – functor to run in case of a CudaExecutor
op_hip – functor to run in case of a HipExecutor
op_dpcpp – functor to run in case of a DpcppExecutor
Public Static Functions
- int device_id,
- std::shared_ptr<Executor> master,
- bool device_reset,
- allocation_mode alloc_mode = default_hip_alloc_mode,
- GKO_HIP_STREAM_STRUCT *stream = nullptr,
Creates a new HipExecutor.
- Parameters:
device_id – the HIP device id of this device
master – an executor on the host that is used to invoke the device kernels
device_reset – whether to reset the device after the object exits the scope.
alloc_mode – the allocation mode that the executor should operate on. See @allocation_mode for more details
-
static int get_num_devices()#
Get the number of devices present on the system.