10 #ifndef EIGEN_BLASUTIL_H
11 #define EIGEN_BLASUTIL_H
21 template<
typename LhsScalar,
typename RhsScalar,
typename Index,
typename DataMapper,
int mr,
int nr,
bool ConjugateLhs=false,
bool ConjugateRhs=false>
24 template<
typename Scalar,
typename Index,
typename DataMapper,
int nr,
int StorageOrder,
bool Conjugate = false,
bool PanelMode=false>
27 template<
typename Scalar,
typename Index,
typename DataMapper,
int Pack1,
int Pack2,
typename Packet,
int StorageOrder,
bool Conjugate = false,
bool PanelMode = false>
32 typename LhsScalar,
int LhsStorageOrder,
bool ConjugateLhs,
33 typename RhsScalar,
int RhsStorageOrder,
bool ConjugateRhs,
34 int ResStorageOrder,
int ResInnerStride>
37 template<
typename Index,
38 typename LhsScalar,
typename LhsMapper,
int LhsStorageOrder,
bool ConjugateLhs,
39 typename RhsScalar,
typename RhsMapper,
bool ConjugateRhs,
int Version=Specialized>
47 inline T operator()(
const T& x)
const {
return numext::conj(x); }
49 inline T pconj(
const T& x)
const {
return internal::pconj(x); }
54 inline const T& operator()(
const T& x)
const {
return x; }
56 inline const T& pconj(
const T& x)
const {
return x; }
60 template<
typename LhsScalar,
typename RhsScalar,
bool ConjLhs,
bool ConjRhs>
65 EIGEN_STRONG_INLINE Scalar pmadd(
const LhsScalar& x,
const RhsScalar& y,
const Scalar& c)
const
66 {
return padd(c, pmul(x,y)); }
68 EIGEN_STRONG_INLINE Scalar pmul(
const LhsScalar& x,
const RhsScalar& y)
const
72 template<
typename Scalar>
struct conj_helper<Scalar,Scalar,false,false>
74 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(
const Scalar& x,
const Scalar& y,
const Scalar& c)
const {
return internal::pmadd(x,y,c); }
75 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(
const Scalar& x,
const Scalar& y)
const {
return internal::pmul(x,y); }
78 template<
typename RealScalar>
struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
80 typedef std::complex<RealScalar> Scalar;
81 EIGEN_STRONG_INLINE Scalar pmadd(
const Scalar& x,
const Scalar& y,
const Scalar& c)
const
82 {
return c + pmul(x,y); }
84 EIGEN_STRONG_INLINE Scalar pmul(
const Scalar& x,
const Scalar& y)
const
85 {
return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
88 template<
typename RealScalar>
struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
90 typedef std::complex<RealScalar> Scalar;
91 EIGEN_STRONG_INLINE Scalar pmadd(
const Scalar& x,
const Scalar& y,
const Scalar& c)
const
92 {
return c + pmul(x,y); }
94 EIGEN_STRONG_INLINE Scalar pmul(
const Scalar& x,
const Scalar& y)
const
95 {
return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
98 template<
typename RealScalar>
struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
100 typedef std::complex<RealScalar> Scalar;
101 EIGEN_STRONG_INLINE Scalar pmadd(
const Scalar& x,
const Scalar& y,
const Scalar& c)
const
102 {
return c + pmul(x,y); }
104 EIGEN_STRONG_INLINE Scalar pmul(
const Scalar& x,
const Scalar& y)
const
105 {
return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
108 template<
typename RealScalar,
bool Conj>
struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
110 typedef std::complex<RealScalar> Scalar;
111 EIGEN_STRONG_INLINE Scalar pmadd(
const Scalar& x,
const RealScalar& y,
const Scalar& c)
const
112 {
return padd(c, pmul(x,y)); }
113 EIGEN_STRONG_INLINE Scalar pmul(
const Scalar& x,
const RealScalar& y)
const
117 template<
typename RealScalar,
bool Conj>
struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
119 typedef std::complex<RealScalar> Scalar;
120 EIGEN_STRONG_INLINE Scalar pmadd(
const RealScalar& x,
const Scalar& y,
const Scalar& c)
const
121 {
return padd(c, pmul(x,y)); }
122 EIGEN_STRONG_INLINE Scalar pmul(
const RealScalar& x,
const Scalar& y)
const
127 EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE To run(
const From& x) {
return To(x); }
136 template<
typename Scalar,
typename Index>
139 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
BlasVectorMapper(Scalar *data) : m_data(data) {}
141 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(
Index i)
const {
144 template <
typename Packet,
int AlignmentType>
145 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
Packet load(
Index i)
const {
146 return ploadt<Packet, AlignmentType>(m_data + i);
149 template <
typename Packet>
150 EIGEN_DEVICE_FUNC
bool aligned(
Index i)
const {
151 return (UIntPtr(m_data+i)%
sizeof(
Packet))==0;
158 template<
typename Scalar,
typename Index,
int AlignmentType,
int Incr=1>
161 template<
typename Scalar,
typename Index,
int AlignmentType>
168 EIGEN_ONLY_USED_FOR_DEBUG(incr);
169 eigen_assert(incr==1);
172 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void prefetch(
int i)
const {
173 internal::prefetch(&
operator()(i));
176 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(
Index i)
const {
180 template<
typename PacketType>
181 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(
Index i)
const {
182 return ploadt<PacketType, AlignmentType>(m_data + i);
185 template<
typename PacketType>
186 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void storePacket(
Index i,
const PacketType &p)
const {
187 pstoret<Scalar, PacketType, AlignmentType>(m_data + i, p);
195 template<
typename Scalar,
typename Index,
int StorageOrder,
int AlignmentType = Unaligned,
int Incr = 1>
202 template<
typename Index,
typename Scalar,
typename Packet,
int n,
int idx,
int StorageOrder>
207 pbm.store(to, stride, i, j, block);
208 pstoreu<Scalar>(to + i + (j + idx)*stride, block.packet[idx]);
213 template<
typename Index,
typename Scalar,
typename Packet,
int n,
int idx>
218 pbm.store(to, stride, i, j, block);
219 pstoreu<Scalar>(to + j + (i + idx)*stride, block.packet[idx]);
223 template<
typename Index,
typename Scalar,
typename Packet,
int n,
int StorageOrder>
227 EIGEN_UNUSED_VARIABLE(to);
228 EIGEN_UNUSED_VARIABLE(stride);
229 EIGEN_UNUSED_VARIABLE(i);
230 EIGEN_UNUSED_VARIABLE(j);
231 EIGEN_UNUSED_VARIABLE(block);
235 template<
typename Index,
typename Scalar,
typename Packet,
int n>
239 EIGEN_UNUSED_VARIABLE(to);
240 EIGEN_UNUSED_VARIABLE(stride);
241 EIGEN_UNUSED_VARIABLE(i);
242 EIGEN_UNUSED_VARIABLE(j);
243 EIGEN_UNUSED_VARIABLE(block);
247 template<
typename Scalar,
typename Index,
int StorageOrder,
int AlignmentType>
255 : m_data(data), m_stride(stride)
257 EIGEN_ONLY_USED_FOR_DEBUG(incr);
258 eigen_assert(incr==1);
276 EIGEN_ALWAYS_INLINE Scalar& operator()(
Index i,
Index j)
const {
277 return m_data[StorageOrder==
RowMajor ? j + i*m_stride : i + j*m_stride];
280 template<
typename PacketType>
281 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(
Index i,
Index j)
const {
282 return ploadt<PacketType, AlignmentType>(&
operator()(i, j));
285 template <
typename PacketT,
int AlignmentT>
286 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(
Index i,
Index j)
const {
287 return ploadt<PacketT, AlignmentT>(&
operator()(i, j));
290 template<
typename SubPacket>
291 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void scatterPacket(
Index i,
Index j,
const SubPacket &p)
const {
292 pscatter<Scalar, SubPacket>(&
operator()(i, j), p, m_stride);
295 template<
typename SubPacket>
296 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(
Index i,
Index j)
const {
297 return pgather<Scalar, SubPacket>(&
operator()(i, j), m_stride);
300 EIGEN_DEVICE_FUNC
const Index stride()
const {
return m_stride; }
301 EIGEN_DEVICE_FUNC
const Scalar* data()
const {
return m_data; }
303 EIGEN_DEVICE_FUNC
Index firstAligned(
Index size)
const {
304 if (UIntPtr(m_data)%
sizeof(Scalar)) {
307 return internal::first_default_aligned(m_data, size);
310 template<
typename SubPacket,
int n>
313 pbm.store(m_data, m_stride, i, j, block);
316 Scalar* EIGEN_RESTRICT m_data;
317 const Index m_stride;
323 template<
typename Scalar,
typename Index,
int AlignmentType,
int Incr>
327 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
BlasLinearMapper(Scalar *data,
Index incr) : m_data(data), m_incr(incr) {}
329 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void prefetch(
int i)
const {
330 internal::prefetch(&
operator()(i));
333 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(
Index i)
const {
334 return m_data[i*m_incr.value()];
337 template<
typename PacketType>
338 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(
Index i)
const {
339 return pgather<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value());
342 template<
typename PacketType>
343 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void storePacket(
Index i,
const PacketType &p)
const {
344 pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
352 template<
typename Scalar,
typename Index,
int StorageOrder,
int AlignmentType,
int Incr>
358 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
blas_data_mapper(Scalar* data,
Index stride,
Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
370 EIGEN_ALWAYS_INLINE Scalar& operator()(
Index i,
Index j)
const {
371 return m_data[StorageOrder==
RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
374 template<
typename PacketType>
375 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacket(
Index i,
Index j)
const {
376 return pgather<Scalar,PacketType>(&
operator()(i, j),m_incr.value());
379 template <
typename PacketT,
int AlignmentT>
380 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(
Index i,
Index j)
const {
381 return pgather<Scalar,PacketT>(&
operator()(i, j),m_incr.value());
384 template<
typename SubPacket>
385 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
void scatterPacket(
Index i,
Index j,
const SubPacket &p)
const {
386 pscatter<Scalar, SubPacket>(&
operator()(i, j), p, m_stride);
389 template<
typename SubPacket>
390 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(
Index i,
Index j)
const {
391 return pgather<Scalar, SubPacket>(&
operator()(i, j), m_stride);
395 template<
typename SubPacket,
typename ScalarT,
int n,
int idx>
400 spbh.store(sup, i,j,block);
401 for(
int l = 0; l < unpacket_traits<SubPacket>::size; l++)
403 ScalarT *v = &sup->operator()(i+l, j+idx);
404 *v = block.packet[idx][l];
409 template<
typename SubPacket,
int n,
int idx>
414 spbh.store(sup,i,j,block);
415 for(
int l = 0; l < unpacket_traits<SubPacket>::size; l++)
417 std::complex<float> *v = &sup->operator()(i+l, j+idx);
418 v->real(block.packet[idx].v[2*l+0]);
419 v->imag(block.packet[idx].v[2*l+1]);
424 template<
typename SubPacket,
int n,
int idx>
429 spbh.store(sup,i,j,block);
430 for(
int l = 0; l < unpacket_traits<SubPacket>::size; l++)
432 std::complex<double> *v = &sup->operator()(i+l, j+idx);
433 v->real(block.packet[idx].v[2*l+0]);
434 v->imag(block.packet[idx].v[2*l+1]);
439 template<
typename SubPacket,
typename ScalarT,
int n>
446 template<
typename SubPacket,
int n>
453 template<
typename SubPacket,
int n>
460 template<
typename SubPacket,
int n>
463 spb.store(
this, i,j,block);
466 Scalar* EIGEN_RESTRICT m_data;
467 const Index m_stride;
472 template<
typename Scalar,
typename Index,
int StorageOrder>
489 typedef const XprType& ExtractType;
490 typedef XprType _ExtractType;
493 IsTransposed =
false,
494 NeedToConjugate =
false,
496 && (
bool(XprType::IsVectorAtCompileTime)
499 HasScalarFactor =
false
501 typedef typename conditional<bool(HasUsableDirectAccess),
503 typename _ExtractType::PlainObject
505 static inline EIGEN_DEVICE_FUNC ExtractType extract(
const XprType& x) {
return x; }
506 static inline EIGEN_DEVICE_FUNC
const Scalar extractScalarFactor(
const XprType&) {
return Scalar(1); }
510 template<
typename Scalar,
typename NestedXpr>
516 typedef typename Base::ExtractType ExtractType;
520 NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
523 static inline Scalar extractScalarFactor(
const XprType& x) {
return conj(Base::extractScalarFactor(x.
nestedExpression())); }
527 template<
typename Scalar,
typename NestedXpr,
typename Plain>
532 HasScalarFactor =
true
536 typedef typename Base::ExtractType ExtractType;
537 static inline EIGEN_DEVICE_FUNC ExtractType extract(
const XprType& x) {
return Base::extract(x.
rhs()); }
538 static inline EIGEN_DEVICE_FUNC Scalar extractScalarFactor(
const XprType& x)
539 {
return x.
lhs().functor().m_other * Base::extractScalarFactor(x.
rhs()); }
541 template<
typename Scalar,
typename NestedXpr,
typename Plain>
546 HasScalarFactor =
true
550 typedef typename Base::ExtractType ExtractType;
551 static inline ExtractType extract(
const XprType& x) {
return Base::extract(x.
lhs()); }
552 static inline Scalar extractScalarFactor(
const XprType& x)
553 {
return Base::extractScalarFactor(x.
lhs()) * x.
rhs().functor().m_other; }
555 template<
typename Scalar,
typename Plain1,
typename Plain2>
558 :
blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
562 template<
typename Scalar,
typename NestedXpr>
567 HasScalarFactor =
true
571 typedef typename Base::ExtractType ExtractType;
573 static inline Scalar extractScalarFactor(
const XprType& x)
578 template<
typename NestedXpr>
582 typedef typename NestedXpr::Scalar Scalar;
587 typedef typename conditional<bool(Base::HasUsableDirectAccess),
589 typename ExtractType::PlainObject
592 IsTransposed = Base::IsTransposed ? 0 : 1
595 static inline Scalar extractScalarFactor(
const XprType& x) {
return Base::extractScalarFactor(x.
nestedExpression()); }
603 template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
605 static const typename T::Scalar* run(
const T& m)
613 static typename T::Scalar* run(
const T&) {
return 0; }
616 template<
typename T>
const typename T::Scalar* extract_data(
const T& m)
625 #endif // EIGEN_BLASUTIL_H