47 #ifndef KOKKOS_PARALLEL_HPP 48 #define KOKKOS_PARALLEL_HPP 51 #include <Kokkos_Core_fwd.hpp> 52 #include <Kokkos_View.hpp> 53 #include <Kokkos_ExecPolicy.hpp> 55 #ifdef KOKKOSP_ENABLE_PROFILING 56 #include <impl/Kokkos_Profiling_Interface.hpp> 60 #include <impl/Kokkos_AllocationTracker.hpp> 61 #include <impl/Kokkos_Tags.hpp> 62 #include <impl/Kokkos_Traits.hpp> 63 #include <impl/Kokkos_FunctorAdapter.hpp> 65 #ifdef KOKKOS_HAVE_DEBUG 83 template<
class Functor
85 ,
class EnableFunctor = void
86 ,
class EnablePolicy =
void 89 typedef Kokkos::DefaultExecutionSpace execution_space ;
92 template<
class Functor ,
class Policy >
95 , typename enable_if_type< typename Functor::device_type >::type
96 , typename enable_if_type< typename Policy ::execution_space >::type
99 typedef typename Policy ::execution_space execution_space ;
102 template<
class Functor ,
class Policy >
105 , typename enable_if_type< typename Functor::execution_space >::type
106 , typename enable_if_type< typename Policy ::execution_space >::type
109 typedef typename Policy ::execution_space execution_space ;
112 template<
class Functor ,
class Policy ,
class EnableFunctor >
116 , typename enable_if_type< typename Policy::execution_space >::type
119 typedef typename Policy ::execution_space execution_space ;
122 template<
class Functor ,
class Policy ,
class EnablePolicy >
125 , typename enable_if_type< typename Functor::device_type >::type
129 typedef typename Functor::device_type execution_space ;
132 template<
class Functor ,
class Policy ,
class EnablePolicy >
135 , typename enable_if_type< typename Functor::execution_space >::type
139 typedef typename Functor::execution_space execution_space ;
149 template<
class FunctorType ,
class ExecPolicy >
class ParallelFor ;
195 template<
class ExecPolicy ,
class FunctorType >
198 ,
const FunctorType & functor
199 ,
const std::string& str =
"" 200 ,
typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
203 #ifdef KOKKOSP_ENABLE_PROFILING 205 if(Kokkos::Experimental::profileLibraryLoaded()) {
206 Kokkos::Experimental::beginParallelFor(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
212 #ifdef KOKKOSP_ENABLE_PROFILING 213 if(Kokkos::Experimental::profileLibraryLoaded()) {
214 Kokkos::Experimental::endParallelFor(kpID);
219 template<
class FunctorType >
222 ,
const FunctorType & functor
223 ,
const std::string& str =
"" 227 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
231 #ifdef KOKKOSP_ENABLE_PROFILING 233 if(Kokkos::Experimental::profileLibraryLoaded()) {
234 Kokkos::Experimental::beginParallelFor(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
240 #ifdef KOKKOSP_ENABLE_PROFILING 241 if(Kokkos::Experimental::profileLibraryLoaded()) {
242 Kokkos::Experimental::endParallelFor(kpID);
247 template<
class ExecPolicy ,
class FunctorType >
250 ,
const ExecPolicy & policy
251 ,
const FunctorType & functor )
253 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 255 std::cout <<
"KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
260 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 262 std::cout <<
"KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
304 template<
class ExecPolicy ,
class FunctorType >
307 ,
const FunctorType & functor
308 ,
const std::string& str =
"" 309 ,
typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
316 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
318 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
319 ,
typename ValueTraits::value_type
320 ,
typename ValueTraits::pointer_type
325 , Kokkos::MemoryUnmanaged
329 #ifdef KOKKOSP_ENABLE_PROFILING 331 if(Kokkos::Experimental::profileLibraryLoaded()) {
332 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
338 #ifdef KOKKOSP_ENABLE_PROFILING 339 if(Kokkos::Experimental::profileLibraryLoaded()) {
340 Kokkos::Experimental::endParallelReduce(kpID);
346 template<
class FunctorType >
349 ,
const FunctorType & functor
350 ,
const std::string& str =
"" 354 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
359 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
361 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
362 ,
typename ValueTraits::value_type
363 ,
typename ValueTraits::pointer_type
368 , Kokkos::MemoryUnmanaged
372 #ifdef KOKKOSP_ENABLE_PROFILING 374 if(Kokkos::Experimental::profileLibraryLoaded()) {
375 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
381 #ifdef KOKKOSP_ENABLE_PROFILING 382 if(Kokkos::Experimental::profileLibraryLoaded()) {
383 Kokkos::Experimental::endParallelReduce(kpID);
390 template<
class ExecPolicy ,
class FunctorType ,
class ViewType >
393 ,
const FunctorType & functor
394 ,
const ViewType & result_view
395 ,
const std::string& str =
"" 396 ,
typename Impl::enable_if<
397 ( Impl::is_view<ViewType>::value && ! Impl::is_integral< ExecPolicy >::value
398 #ifdef KOKKOS_HAVE_CUDA
399 && ! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value
404 #ifdef KOKKOSP_ENABLE_PROFILING 406 if(Kokkos::Experimental::profileLibraryLoaded()) {
407 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
413 #ifdef KOKKOSP_ENABLE_PROFILING 414 if(Kokkos::Experimental::profileLibraryLoaded()) {
415 Kokkos::Experimental::endParallelReduce(kpID);
422 template<
class ExecPolicy ,
class FunctorType >
424 ,
const FunctorType & functor
425 #ifdef KOKKOS_HAVE_CUDA
426 ,
typename Impl::enable_if<
427 ( ! Impl::is_integral< ExecPolicy >::value &&
428 ! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value )
429 ,
typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type>::type result_ref
430 ,
const std::string& str =
"" 431 ,
typename Impl::enable_if<! Impl::is_same<typename ExecPolicy::execution_space,Kokkos::Cuda>::value >::type* = 0
434 ,
typename Impl::enable_if<
435 ( ! Impl::is_integral< ExecPolicy >::value)
436 ,
typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type
438 ,
const std::string& str =
"" 442 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ;
443 typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ;
448 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
449 ,
typename ValueTraits::value_type
450 ,
typename ValueTraits::pointer_type
455 , Kokkos::MemoryUnmanaged
457 result_view( ValueOps::pointer( result_ref )
458 , ValueTraits::value_count( functor )
461 #ifdef KOKKOSP_ENABLE_PROFILING 463 if(Kokkos::Experimental::profileLibraryLoaded()) {
464 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
470 #ifdef KOKKOSP_ENABLE_PROFILING 471 if(Kokkos::Experimental::profileLibraryLoaded()) {
472 Kokkos::Experimental::endParallelReduce(kpID);
479 template<
class FunctorType ,
class ViewType >
482 ,
const FunctorType & functor
483 ,
const ViewType & result_view
484 ,
const std::string& str =
"" 485 ,
typename Impl::enable_if<( Impl::is_view<ViewType>::value
486 #ifdef KOKKOS_HAVE_CUDA
488 typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
494 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
499 #ifdef KOKKOSP_ENABLE_PROFILING 501 if(Kokkos::Experimental::profileLibraryLoaded()) {
502 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
508 #ifdef KOKKOSP_ENABLE_PROFILING 509 if(Kokkos::Experimental::profileLibraryLoaded()) {
510 Kokkos::Experimental::endParallelReduce(kpID);
517 template<
class FunctorType >
520 ,
const FunctorType & functor
521 ,
typename Kokkos::Impl::FunctorValueTraits<
522 typename Impl::if_c<Impl::is_execution_policy<FunctorType>::value ||
523 Impl::is_integral<FunctorType>::value,
524 void,FunctorType>::type
525 ,
void >::reference_type result
526 ,
const std::string& str =
"" 527 ,
typename Impl::enable_if<
true 528 #ifdef KOKKOS_HAVE_CUDA
530 typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space,
535 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
536 typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ;
539 Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
547 typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0)
548 ,
typename ValueTraits::value_type
549 ,
typename ValueTraits::pointer_type
554 , Kokkos::MemoryUnmanaged
556 result_view( ValueOps::pointer( result )
557 , ValueTraits::value_count( functor )
560 #ifdef KOKKOSP_ENABLE_PROFILING 562 if(Kokkos::Experimental::profileLibraryLoaded()) {
563 Kokkos::Experimental::beginParallelReduce(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
569 #ifdef KOKKOSP_ENABLE_PROFILING 570 if(Kokkos::Experimental::profileLibraryLoaded()) {
571 Kokkos::Experimental::endParallelReduce(kpID);
576 #ifndef KOKKOS_HAVE_CUDA 577 template<
class ExecPolicy ,
class FunctorType ,
class ResultType >
580 ,
const ExecPolicy & policy
581 ,
const FunctorType & functor
582 , ResultType * result)
584 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 586 std::cout <<
"KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
591 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 593 std::cout <<
"KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
598 template<
class ExecPolicy ,
class FunctorType ,
class ResultType >
601 ,
const ExecPolicy & policy
602 ,
const FunctorType & functor
603 , ResultType & result)
605 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 607 std::cout <<
"KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
612 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 614 std::cout <<
"KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
619 template<
class ExecPolicy ,
class FunctorType >
622 ,
const ExecPolicy & policy
623 ,
const FunctorType & functor)
625 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 627 std::cout <<
"KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl;
632 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 634 std::cout <<
"KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl;
801 template<
class ExecutionPolicy ,
class FunctorType >
803 void parallel_scan(
const ExecutionPolicy & policy
804 ,
const FunctorType & functor
805 ,
const std::string& str =
"" 806 ,
typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0
809 #ifdef KOKKOSP_ENABLE_PROFILING 811 if(Kokkos::Experimental::profileLibraryLoaded()) {
812 Kokkos::Experimental::beginParallelScan(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
818 #ifdef KOKKOSP_ENABLE_PROFILING 819 if(Kokkos::Experimental::profileLibraryLoaded()) {
820 Kokkos::Experimental::endParallelScan(kpID);
826 template<
class FunctorType >
828 void parallel_scan(
const size_t work_count
829 ,
const FunctorType & functor
830 ,
const std::string& str =
"" )
833 Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
838 #ifdef KOKKOSP_ENABLE_PROFILING 840 if(Kokkos::Experimental::profileLibraryLoaded()) {
841 Kokkos::Experimental::beginParallelScan(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
847 #ifdef KOKKOSP_ENABLE_PROFILING 848 if(Kokkos::Experimental::profileLibraryLoaded()) {
849 Kokkos::Experimental::endParallelScan(kpID);
855 template<
class ExecutionPolicy ,
class FunctorType >
857 void parallel_scan(
const std::string& str
858 ,
const ExecutionPolicy & policy
859 ,
const FunctorType & functor)
861 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 863 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
866 parallel_scan(policy,functor,str);
868 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 870 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
883 template<
class FunctorType ,
class Enable =
void >
884 struct FunctorTeamShmemSize
886 static inline size_t value(
const FunctorType & ,
int ) {
return 0 ; }
889 template<
class FunctorType >
890 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
892 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.team_shmem_size( team_size ) ; }
895 template<
class FunctorType >
896 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
898 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.shmem_size( team_size ) ; }
Implementation detail of parallel_scan.
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Parallel reduction.
View to an array of data.
Memory management for host memory.
Implementation of the ParallelFor operator that has a partial specialization for the device...
Given a Functor and Execution Policy query an execution space.
Execution policy for work over a range of an integral type.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
Implementation detail of parallel_reduce.