gko::DpcppExecutor#
SYCL / DPC++ executor. Runs on Intel GPUs and CPUs (and on any SYCL target a Ginkgo build picks up). The constructor takes a SYCL device type and a host master executor; the runtime selects a concrete device matching the type. Kernels dispatch through SYCL queues; allocations come from SYCL’s USM (Unified Shared Memory) allocators.
-
class DpcppExecutor #
Inherits from
public gko::detail::ExecutorBase<DpcppExecutor>
public std::enable_shared_from_this<DpcppExecutor>
This is the Executor subclass which represents a DPC++ enhanced device.
Public Functions
-
virtual std::shared_ptr<Executor> get_master() noexcept override#
Returns the master OmpExecutor of this Executor.
- Returns:
the master OmpExecutor of this Executor.
- virtual std::shared_ptr<const Executor> get_master(
Returns the master OmpExecutor of this Executor.
- Returns:
the master OmpExecutor of this Executor.
-
virtual void synchronize() const override#
Synchronize the operations launched on the executor with its master.
-
virtual std::string get_description() const override#
- Returns:
a textual representation of the executor and its device.
-
inline int get_device_id() const noexcept#
Get the DPCPP device id of the device associated to this executor.
- Returns:
the DPCPP device id of the device associated to this executor
-
inline const std::vector<int> &get_subgroup_sizes() const noexcept#
Get the available subgroup sizes for this device.
- Returns:
the available subgroup sizes for this device
-
inline int get_num_computing_units() const noexcept#
Get the number of Computing Units of this executor.
- Returns:
the number of Computing Units of this executor
-
inline int get_num_subgroups() const noexcept#
Get the number of subgroups of this executor.
- inline const std::vector<int> &get_max_workitem_sizes(
Get the maximum work item sizes.
- Returns:
the maximum work item sizes
-
inline int get_max_workgroup_size() const noexcept#
Get the maximum workgroup size.
- Returns:
the maximum workgroup size
-
inline int get_max_subgroup_size() const noexcept#
Get the maximum subgroup size.
- Returns:
the maximum subgroup size
-
inline std::string get_device_type() const noexcept#
Get a string representing the device type.
- Returns:
a string representing the device type
-
virtual void run(const Operation &op) const = 0#
Runs the specified Operation using this Executor.
- Parameters:
op – the operation to run
-
template<typename ClosureOmp, typename ClosureCuda, typename ClosureHip, typename ClosureDpcpp>
inline void run( - const ClosureOmp &op_omp,
- const ClosureCuda &op_cuda,
- const ClosureHip &op_hip,
- const ClosureDpcpp &op_dpcpp,
Runs one of the passed in functors, depending on the Executor type.
- Template Parameters:
ClosureOmp – type of op_omp
ClosureCuda – type of op_cuda
ClosureHip – type of op_hip
ClosureDpcpp – type of op_dpcpp
- Parameters:
op_omp – functor to run in case of a OmpExecutor or ReferenceExecutor
op_cuda – functor to run in case of a CudaExecutor
op_hip – functor to run in case of a HipExecutor
op_dpcpp – functor to run in case of a DpcppExecutor
-
template<typename ClosureReference, typename ClosureOmp, typename ClosureCuda, typename ClosureHip, typename ClosureDpcpp>
inline void run( - std::string name,
- const ClosureReference &op_ref,
- const ClosureOmp &op_omp,
- const ClosureCuda &op_cuda,
- const ClosureHip &op_hip,
- const ClosureDpcpp &op_dpcpp,
Runs one of the passed in functors, depending on the Executor type.
- Template Parameters:
ClosureReference – type of op_ref
ClosureOmp – type of op_omp
ClosureCuda – type of op_cuda
ClosureHip – type of op_hip
ClosureDpcpp – type of op_dpcpp
- Parameters:
name – the name of the operation
op_ref – functor to run in case of a ReferenceExecutor
op_omp – functor to run in case of a OmpExecutor
op_cuda – functor to run in case of a CudaExecutor
op_hip – functor to run in case of a HipExecutor
op_dpcpp – functor to run in case of a DpcppExecutor
Public Static Functions
- int device_id,
- std::shared_ptr<Executor> master,
- std::string device_type = "all",
- dpcpp_queue_property property = dpcpp_queue_property::in_order,
Creates a new DpcppExecutor.
- Parameters:
device_id – the DPCPP device id of this device
master – an executor on the host that is used to invoke the device kernels
device_type – a string representing the type of device to consider (accelerator, cpu, gpu or all).
-
static int get_num_devices(std::string device_type)#
Get the number of devices present on the system.
- Parameters:
device_type – a string representing the device type
- Returns:
the number of devices present on the system