47 #ifndef KOKKOS_SERIAL_HPP 48 #define KOKKOS_SERIAL_HPP 54 #include <Kokkos_HostSpace.hpp> 55 #include <Kokkos_ScratchSpace.hpp> 56 #include <Kokkos_MemoryTraits.hpp> 57 #include <impl/Kokkos_Tags.hpp> 58 #include <impl/Kokkos_FunctorAdapter.hpp> 60 #if defined( KOKKOS_HAVE_SERIAL ) 82 typedef Serial execution_space ;
84 typedef HostSpace::size_type size_type ;
86 typedef HostSpace memory_space ;
91 typedef LayoutRight array_layout ;
94 typedef ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ;
104 inline static int in_parallel() {
return false ; }
132 static void fence() {}
134 static void initialize(
unsigned threads_count = 1 ,
135 unsigned use_numa_count = 0 ,
136 unsigned use_cores_per_numa = 0 ,
137 bool allow_asynchronous_threadpool =
false) {
138 (void) threads_count;
139 (void) use_numa_count;
140 (void) use_cores_per_numa;
141 (void) allow_asynchronous_threadpool;
144 Impl::init_lock_array_host_space();
148 static int is_initialized() {
return 1 ; }
154 static void print_configuration( std::ostream & ,
const bool =
false ) {}
158 inline static int thread_pool_size(
int = 0 ) {
return 1 ; }
159 KOKKOS_INLINE_FUNCTION
static int thread_pool_rank() {
return 0 ; }
163 KOKKOS_INLINE_FUNCTION
static unsigned hardware_thread_id() {
return thread_pool_rank(); }
164 inline static unsigned max_hardware_threads() {
return thread_pool_size(0); }
168 static void * scratch_memory_resize(
unsigned reduce_size ,
unsigned shared_size );
182 struct VerifyExecutionCanAccessMemorySpace
183 <
Kokkos::Serial::memory_space
184 , Kokkos::Serial::scratch_memory_space
187 enum { value =
true };
188 inline static void verify(
void ) { }
189 inline static void verify(
const void * ) { }
192 namespace SerialImpl {
197 unsigned m_reduce_end ;
198 unsigned m_shared_end ;
202 static Sentinel & singleton();
206 unsigned align(
unsigned n );
217 class SerialTeamMember {
220 const scratch_memory_space m_space ;
221 const int m_league_rank ;
222 const int m_league_size ;
224 SerialTeamMember & operator = (
const SerialTeamMember & );
228 KOKKOS_INLINE_FUNCTION
229 const scratch_memory_space & team_shmem()
const {
return m_space ; }
231 KOKKOS_INLINE_FUNCTION
int league_rank()
const {
return m_league_rank ; }
232 KOKKOS_INLINE_FUNCTION
int league_size()
const {
return m_league_size ; }
233 KOKKOS_INLINE_FUNCTION
int team_rank()
const {
return 0 ; }
234 KOKKOS_INLINE_FUNCTION
int team_size()
const {
return 1 ; }
236 KOKKOS_INLINE_FUNCTION
void team_barrier()
const {}
238 template<
class ValueType>
239 KOKKOS_INLINE_FUNCTION
240 void team_broadcast(
const ValueType& ,
const int& )
const {}
242 template<
class ValueType,
class JoinOp >
243 KOKKOS_INLINE_FUNCTION
244 ValueType team_reduce(
const ValueType & value ,
const JoinOp & )
const 258 template<
typename Type >
259 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type & value , Type *
const global_accum )
const 261 const Type tmp = global_accum ? *global_accum : Type(0) ;
262 if ( global_accum ) { *global_accum += value ; }
271 template<
typename Type >
272 KOKKOS_INLINE_FUNCTION Type team_scan(
const Type & )
const 278 SerialTeamMember(
int arg_league_rank
279 ,
int arg_league_size
280 ,
int arg_shared_size
292 template<
class Arg0 ,
class Arg1 >
293 class TeamPolicy< Arg0 , Arg1 ,
Kokkos::Serial >
297 const int m_league_size ;
302 typedef TeamPolicy execution_policy ;
305 typedef Kokkos::Serial execution_space ;
308 Impl::if_c< ! Impl::is_same< Kokkos::Serial , Arg0 >::value , Arg0 , Arg1 >::type
313 template<
class FunctorType >
315 int team_size_max(
const FunctorType & ) {
return 1 ; }
317 template<
class FunctorType >
319 int team_size_recommended(
const FunctorType & ) {
return 1 ; }
321 template<
class FunctorType >
323 int team_size_recommended(
const FunctorType & ,
const int& ) {
return 1 ; }
327 inline int team_size()
const {
return 1 ; }
328 inline int league_size()
const {
return m_league_size ; }
331 TeamPolicy( execution_space &
332 ,
int league_size_request
335 : m_league_size( league_size_request )
338 TeamPolicy( execution_space &
339 ,
int league_size_request
340 ,
const Kokkos::AUTO_t &
342 : m_league_size( league_size_request )
345 TeamPolicy(
int league_size_request
348 : m_league_size( league_size_request )
351 TeamPolicy(
int league_size_request
352 ,
const Kokkos::AUTO_t &
354 : m_league_size( league_size_request )
357 typedef Impl::SerialTeamMember member_type ;
371 template<
class FunctorType ,
class Arg0 ,
class Arg1 ,
class Arg2 >
372 class ParallelFor< FunctorType ,
Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
380 template<
class PType >
382 ParallelFor(
typename Impl::enable_if<
383 ( Impl::is_same< PType , Policy >::value &&
384 Impl::is_same< typename PType::work_tag , void >::value
385 ),
const FunctorType & >::type functor
386 ,
const PType & policy )
388 const typename PType::member_type e = policy.end();
389 for (
typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
395 template<
class PType >
397 ParallelFor(
typename Impl::enable_if<
398 ( Impl::is_same< PType , Policy >::value &&
399 ! Impl::is_same< typename PType::work_tag , void >::value
400 ),
const FunctorType & >::type functor
401 ,
const PType & policy )
403 const typename PType::member_type e = policy.end();
404 for (
typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
405 functor(
typename PType::work_tag() , i );
410 template<
class FunctorType ,
class Arg0 ,
class Arg1 ,
class Arg2 >
411 class ParallelReduce< FunctorType ,
Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
415 typedef typename Policy::work_tag WorkTag ;
416 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ;
417 typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ;
419 typedef typename ValueTraits::pointer_type pointer_type ;
420 typedef typename ValueTraits::reference_type reference_type ;
423 template<
class ViewType ,
class PType >
424 ParallelReduce(
typename Impl::enable_if<
425 ( Impl::is_view< ViewType >::value &&
426 Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
427 Impl::is_same< PType , Policy >::value &&
428 Impl::is_same< typename PType::work_tag , void >::value
429 ),
const FunctorType & >::type functor
430 ,
const PType & policy
431 ,
const ViewType & result
434 pointer_type result_ptr = result.ptr_on_device();
436 if ( ! result_ptr ) {
437 result_ptr = (pointer_type)
438 Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
441 reference_type update = ValueInit::init( functor , result_ptr );
443 const typename PType::member_type e = policy.end();
444 for (
typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
445 functor( i , update );
448 Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
452 template<
class ViewType ,
class PType >
453 ParallelReduce(
typename Impl::enable_if<
454 ( Impl::is_view< ViewType >::value &&
455 Impl::is_same< typename ViewType::memory_space , HostSpace >::value &&
456 Impl::is_same< PType , Policy >::value &&
457 ! Impl::is_same< typename PType::work_tag , void >::value
458 ),
const FunctorType & >::type functor
459 ,
const PType & policy
460 ,
const ViewType & result
463 pointer_type result_ptr = result.ptr_on_device();
465 if ( ! result_ptr ) {
466 result_ptr = (pointer_type)
467 Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
470 typename ValueTraits::reference_type update = ValueInit::init( functor , result_ptr );
472 const typename PType::member_type e = policy.end();
473 for (
typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
474 functor(
typename PType::work_tag() , i , update );
477 Kokkos::Impl::FunctorFinal< FunctorType , WorkTag >::final( functor , result_ptr );
481 template<
class FunctorType ,
class Arg0 ,
class Arg1 ,
class Arg2 >
482 class ParallelScan< FunctorType ,
Kokkos::RangePolicy< Arg0 , Arg1 , Arg2 , Kokkos::Serial > >
488 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
489 typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
493 typedef typename ValueTraits::pointer_type pointer_type ;
494 typedef typename ValueTraits::reference_type reference_type ;
497 template<
class PType >
499 ParallelScan(
typename Impl::enable_if<
500 ( Impl::is_same< PType , Policy >::value &&
501 Impl::is_same< typename PType::work_tag , void >::value
502 ),
const FunctorType & >::type functor
503 ,
const PType & policy )
505 pointer_type result_ptr = (pointer_type)
506 Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
508 reference_type update = ValueInit::init( functor , result_ptr );
510 const typename PType::member_type e = policy.end();
511 for (
typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
512 functor( i , update ,
true );
515 Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
519 template<
class PType >
521 ParallelScan(
typename Impl::enable_if<
522 ( Impl::is_same< PType , Policy >::value &&
523 ! Impl::is_same< typename PType::work_tag , void >::value
524 ),
const FunctorType & >::type functor
525 ,
const PType & policy )
527 pointer_type result_ptr = (pointer_type)
528 Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( functor ) , 0 );
530 reference_type update = ValueInit::init( functor , result_ptr );
532 const typename PType::member_type e = policy.end();
533 for (
typename PType::member_type i = policy.begin() ; i < e ; ++i ) {
534 functor(
typename PType::work_tag() , i , update ,
true );
537 Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
550 template<
class FunctorType ,
class Arg0 ,
class Arg1 >
551 class ParallelFor< FunctorType ,
Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
557 template<
class TagType >
558 KOKKOS_FORCEINLINE_FUNCTION
static 559 void driver(
typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
560 const FunctorType & >::type functor
561 ,
const typename Policy::member_type & member )
562 { functor( member ); }
564 template<
class TagType >
565 KOKKOS_FORCEINLINE_FUNCTION
static 566 void driver(
typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
567 const FunctorType & >::type functor
568 ,
const typename Policy::member_type & member )
569 { functor( TagType() , member ); }
573 ParallelFor(
const FunctorType & functor
574 ,
const Policy & policy )
576 const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
578 Kokkos::Serial::scratch_memory_resize( 0 , shared_size );
580 for (
int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
581 ParallelFor::template driver< typename Policy::work_tag >
582 ( functor ,
typename Policy::member_type(ileague,policy.league_size(),shared_size) );
588 template<
class FunctorType ,
class Arg0 ,
class Arg1 >
589 class ParallelReduce< FunctorType ,
Kokkos::TeamPolicy< Arg0 , Arg1 , Kokkos::Serial > >
594 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename Policy::work_tag > ValueTraits ;
595 typedef Kokkos::Impl::FunctorValueInit< FunctorType , typename Policy::work_tag > ValueInit ;
599 typedef typename ValueTraits::pointer_type pointer_type ;
600 typedef typename ValueTraits::reference_type reference_type ;
604 template<
class TagType >
605 KOKKOS_FORCEINLINE_FUNCTION
static 606 void driver(
typename Impl::enable_if< Impl::is_same< TagType , void >::value ,
607 const FunctorType & >::type functor
608 ,
const typename Policy::member_type & member
609 , reference_type update )
610 { functor( member , update ); }
612 template<
class TagType >
613 KOKKOS_FORCEINLINE_FUNCTION
static 614 void driver(
typename Impl::enable_if< ! Impl::is_same< TagType , void >::value ,
615 const FunctorType & >::type functor
616 ,
const typename Policy::member_type & member
617 , reference_type update )
618 { functor( TagType() , member , update ); }
622 template<
class ViewType >
623 ParallelReduce(
const FunctorType & functor
624 ,
const Policy & policy
625 ,
const ViewType & result
628 const int reduce_size = ValueTraits::value_size( functor );
629 const int shared_size = FunctorTeamShmemSize< FunctorType >::value( functor , policy.team_size() );
630 void *
const scratch_reduce = Kokkos::Serial::scratch_memory_resize( reduce_size , shared_size );
632 const pointer_type result_ptr =
633 result.ptr_on_device() ? result.ptr_on_device()
634 : (pointer_type) scratch_reduce ;
636 reference_type update = ValueInit::init( functor , result_ptr );
638 for (
int ileague = 0 ; ileague < policy.league_size() ; ++ileague ) {
639 ParallelReduce::template driver< typename Policy::work_tag >
640 ( functor ,
typename Policy::member_type(ileague,policy.league_size(),shared_size) , update );
643 Kokkos::Impl::FunctorFinal< FunctorType , typename Policy::work_tag >::final( functor , result_ptr );
654 template<
typename iType>
655 struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> {
656 typedef iType index_type;
659 enum {increment = 1};
660 const SerialTeamMember& thread;
662 KOKKOS_INLINE_FUNCTION
663 TeamThreadRangeBoundariesStruct (
const SerialTeamMember& arg_thread,
const iType& arg_count)
669 KOKKOS_INLINE_FUNCTION
670 TeamThreadRangeBoundariesStruct (
const SerialTeamMember& arg_thread,
const iType& arg_begin,
const iType & arg_end )
673 , thread( arg_thread )
677 template<
typename iType>
678 struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> {
679 typedef iType index_type;
682 enum {increment = 1};
684 KOKKOS_INLINE_FUNCTION
685 ThreadVectorRangeBoundariesStruct (
const SerialTeamMember& thread,
const iType& count):
692 template<
typename iType>
693 KOKKOS_INLINE_FUNCTION
694 Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
695 TeamThreadRange(
const Impl::SerialTeamMember& thread,
const iType & count )
697 return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,count);
700 template<
typename iType>
701 KOKKOS_INLINE_FUNCTION
702 Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>
703 TeamThreadRange(
const Impl::SerialTeamMember& thread,
const iType & begin ,
const iType & end )
705 return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>(thread,begin,end);
708 template<
typename iType>
709 KOKKOS_INLINE_FUNCTION
710 Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >
712 return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count);
715 KOKKOS_INLINE_FUNCTION
716 Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(
const Impl::SerialTeamMember& thread) {
717 return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread);
720 KOKKOS_INLINE_FUNCTION
721 Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(
const Impl::SerialTeamMember& thread) {
722 return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread);
733 template<
typename iType,
class Lambda>
734 KOKKOS_INLINE_FUNCTION
735 void parallel_for(
const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
const Lambda& lambda) {
736 for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment)
744 template<
typename iType,
class Lambda,
typename ValueType >
745 KOKKOS_INLINE_FUNCTION
746 void parallel_reduce(
const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
747 const Lambda & lambda, ValueType& result) {
749 result = ValueType();
751 for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
752 ValueType tmp = ValueType();
757 result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>());
760 #ifdef KOKKOS_HAVE_CXX11 769 template<
typename iType,
class Lambda,
typename ValueType,
class JoinType >
770 KOKKOS_INLINE_FUNCTION
771 void parallel_reduce(
const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries,
772 const Lambda & lambda,
const JoinType& join, ValueType& init_result) {
774 ValueType result = init_result;
776 for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) {
777 ValueType tmp = ValueType();
782 init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join));
785 #endif // KOKKOS_HAVE_CXX11 794 template<
typename iType,
class Lambda>
795 KOKKOS_INLINE_FUNCTION
796 void parallel_for(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
797 loop_boundaries,
const Lambda& lambda) {
798 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 801 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment)
809 template<
typename iType,
class Lambda,
typename ValueType >
810 KOKKOS_INLINE_FUNCTION
811 void parallel_reduce(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
812 loop_boundaries,
const Lambda & lambda, ValueType& result) {
813 result = ValueType();
814 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 817 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
818 ValueType tmp = ValueType();
831 template<
typename iType,
class Lambda,
typename ValueType,
class JoinType >
832 KOKKOS_INLINE_FUNCTION
833 void parallel_reduce(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
834 loop_boundaries,
const Lambda & lambda,
const JoinType& join, ValueType& init_result) {
836 ValueType result = init_result;
837 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 840 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
841 ValueType tmp = ValueType();
845 init_result = result;
858 template<
typename iType,
class FunctorType >
859 KOKKOS_INLINE_FUNCTION
860 void parallel_scan(
const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >&
861 loop_boundaries,
const FunctorType & lambda) {
863 typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ;
864 typedef typename ValueTraits::value_type value_type ;
866 value_type scan_val = value_type();
868 #ifdef KOKKOS_HAVE_PRAGMA_IVDEP 871 for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) {
872 lambda(i,scan_val,
true);
880 template<
class FunctorType>
881 KOKKOS_INLINE_FUNCTION
882 void single(
const Impl::VectorSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda) {
886 template<
class FunctorType>
887 KOKKOS_INLINE_FUNCTION
888 void single(
const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda) {
892 template<
class FunctorType,
class ValueType>
893 KOKKOS_INLINE_FUNCTION
894 void single(
const Impl::VectorSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda, ValueType& val) {
898 template<
class FunctorType,
class ValueType>
899 KOKKOS_INLINE_FUNCTION
900 void single(
const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& ,
const FunctorType& lambda, ValueType& val) {
905 #endif // defined( KOKKOS_HAVE_SERIAL ) Scratch memory space associated with an execution space.
KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, TeamMemberType > TeamThreadRange(const TeamMemberType &, const iType &count)
Execution policy for parallel work over a threads within a team.
void parallel_reduce(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Parallel reduction.
Memory space for main process and CPU execution spaces.
Declaration of various MemoryLayout options.
Declaration of parallel operators.
void finalize()
Finalize the spaces that were initialized via Kokkos::initialize.
KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< iType, TeamMemberType > ThreadVectorRange(const TeamMemberType &, const iType &count)
Execution policy for a vector parallel loop.
Execution policy for work over a range of an integral type.
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< !Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
Execution policy for parallel work over a league of teams of threads.