10 #ifndef EIGEN_PARALLELIZER_H
11 #define EIGEN_PARALLELIZER_H
13 #if EIGEN_HAS_CXX11_ATOMIC
22 inline void manage_multi_threading(Action action,
int* v)
24 static int m_maxThreads = -1;
25 EIGEN_UNUSED_VARIABLE(m_maxThreads)
29 eigen_internal_assert(v!=0);
32 else if(action==GetAction)
34 eigen_internal_assert(v!=0);
35 #ifdef EIGEN_HAS_OPENMP
39 *v = omp_get_max_threads();
46 eigen_internal_assert(
false);
56 internal::manage_multi_threading(GetAction, &nbt);
57 std::ptrdiff_t l1, l2, l3;
58 internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);
66 internal::manage_multi_threading(GetAction, &ret);
74 internal::manage_multi_threading(SetAction, &v);
87 #if EIGEN_HAS_CXX11_ATOMIC
88 std::atomic<Index> sync;
89 std::atomic<int> users;
99 template<
bool Condition,
typename Functor,
typename Index>
100 void parallelize_gemm(
const Functor& func,
Index rows,
Index cols,
Index depth,
bool transpose)
107 #if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS) || ((!EIGEN_HAS_CXX11_ATOMIC) && !(EIGEN_ARCH_i386_OR_x86_64))
112 EIGEN_UNUSED_VARIABLE(depth);
113 EIGEN_UNUSED_VARIABLE(transpose);
114 func(0,rows, 0,cols);
125 Index size = transpose ? rows : cols;
126 Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
129 double work =
static_cast<double>(rows) *
static_cast<double>(cols) *
130 static_cast<double>(depth);
131 double kMinTaskSize = 50000;
132 pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads,
static_cast<Index>( work / kMinTaskSize ) ));
140 if((!Condition) || (threads==1) || (omp_get_num_threads()>1))
141 return func(0,rows, 0,cols);
144 func.initParallelSession(threads);
147 std::swap(rows,cols);
151 #pragma omp parallel num_threads(threads)
153 Index i = omp_get_thread_num();
155 Index actual_threads = omp_get_num_threads();
157 Index blockCols = (cols / actual_threads) & ~
Index(0x3);
158 Index blockRows = (rows / actual_threads);
159 blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
161 Index r0 = i*blockRows;
162 Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
164 Index c0 = i*blockCols;
165 Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;
167 info[i].lhs_start = r0;
168 info[i].lhs_length = actualBlockRows;
170 if(transpose) func(c0, actualBlockCols, 0, rows, info);
171 else func(0, rows, c0, actualBlockCols, info);
180 #endif // EIGEN_PARALLELIZER_H