11 #ifndef EIGEN_COMPLEX_NEON_H
12 #define EIGEN_COMPLEX_NEON_H
18 inline uint32x4_t p4ui_CONJ_XOR() {
21 uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
24 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
25 return vld1q_u32( conj_XOR_DATA );
29 inline uint32x2_t p2ui_CONJ_XOR() {
30 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
31 return vld1_u32( conj_XOR_DATA );
37 EIGEN_STRONG_INLINE Packet2cf() {}
38 EIGEN_STRONG_INLINE
explicit Packet2cf(
const Packet4f& a) : v(a) {}
42 template<>
struct packet_traits<std::complex<float> > : default_packet_traits
44 typedef Packet2cf type;
45 typedef Packet2cf half;
65 template<>
struct unpacket_traits<Packet2cf> {
typedef std::complex<float> type;
enum {size=2, alignment=
Aligned16};
typedef Packet2cf half; };
67 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(
const std::complex<float>& from)
70 r64 = vld1_f32((
float *)&from);
72 return Packet2cf(vcombine_f32(r64, r64));
75 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(padd<Packet4f>(a.v,b.v)); }
76 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b) {
return Packet2cf(psub<Packet4f>(a.v,b.v)); }
77 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(
const Packet2cf& a) {
return Packet2cf(pnegate<Packet4f>(a.v)); }
78 template<> EIGEN_STRONG_INLINE Packet2cf pconj(
const Packet2cf& a)
80 Packet4ui b = vreinterpretq_u32_f32(a.v);
81 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
84 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
89 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
91 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
93 v1 = vmulq_f32(v1, b.v);
95 v2 = vmulq_f32(v2, b.v);
97 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
101 return Packet2cf(vaddq_f32(v1, v2));
104 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
106 return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
108 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
110 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
112 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
114 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
116 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
118 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
121 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet2cf(pload<Packet4f>((
const float*)from)); }
122 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(
const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet2cf(ploadu<Packet4f>((
const float*)from)); }
124 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(
const std::complex<float>* from) {
return pset1<Packet2cf>(*from); }
126 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
float*)to, from.v); }
127 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<float> >(std::complex<float> * to,
const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
float*)to, from.v); }
129 template<> EIGEN_DEVICE_FUNC
inline Packet2cf pgather<std::complex<float>, Packet2cf>(
const std::complex<float>* from,
Index stride)
131 Packet4f res = pset1<Packet4f>(0.f);
132 res = vsetq_lane_f32(std::real(from[0*stride]), res, 0);
133 res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
134 res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
135 res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
136 return Packet2cf(res);
139 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to,
const Packet2cf& from,
Index stride)
141 to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
142 to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
145 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<float> >(
const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((
float *)addr); }
147 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(
const Packet2cf& a)
149 std::complex<float> EIGEN_ALIGN16 x[2];
150 vst1q_f32((
float *)x, a.v);
154 template<> EIGEN_STRONG_INLINE Packet2cf preverse(
const Packet2cf& a)
156 float32x2_t a_lo, a_hi;
159 a_lo = vget_low_f32(a.v);
160 a_hi = vget_high_f32(a.v);
161 a_r128 = vcombine_f32(a_hi, a_lo);
163 return Packet2cf(a_r128);
166 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(
const Packet2cf& a)
168 return Packet2cf(vrev64q_f32(a.v));
171 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(
const Packet2cf& a)
174 std::complex<float> s;
176 a1 = vget_low_f32(a.v);
177 a2 = vget_high_f32(a.v);
178 a2 = vadd_f32(a1, a2);
179 vst1_f32((
float *)&s, a2);
184 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(
const Packet2cf* vecs)
186 Packet4f sum1, sum2, sum;
189 sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
190 sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
191 sum = vaddq_f32(sum1, sum2);
193 return Packet2cf(sum);
196 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(
const Packet2cf& a)
198 float32x2_t a1, a2, v1, v2, prod;
199 std::complex<float> s;
201 a1 = vget_low_f32(a.v);
202 a2 = vget_high_f32(a.v);
204 v1 = vdup_lane_f32(a1, 0);
206 v2 = vdup_lane_f32(a1, 1);
208 v1 = vmul_f32(v1, a2);
210 v2 = vmul_f32(v2, a2);
212 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
216 prod = vadd_f32(v1, v2);
218 vst1_f32((
float *)&s, prod);
224 struct palign_impl<Offset,Packet2cf>
226 EIGEN_STRONG_INLINE
static void run(Packet2cf& first,
const Packet2cf& second)
230 first.v = vextq_f32(first.v, second.v, 2);
235 template<>
struct conj_helper<Packet2cf, Packet2cf, false,true>
237 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
238 {
return padd(pmul(x,y),c); }
240 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
242 return internal::pmul(a, pconj(b));
246 template<>
struct conj_helper<Packet2cf, Packet2cf, true,false>
248 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
249 {
return padd(pmul(x,y),c); }
251 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
253 return internal::pmul(pconj(a), b);
257 template<>
struct conj_helper<Packet2cf, Packet2cf, true,true>
259 EIGEN_STRONG_INLINE Packet2cf pmadd(
const Packet2cf& x,
const Packet2cf& y,
const Packet2cf& c)
const
260 {
return padd(pmul(x,y),c); }
262 EIGEN_STRONG_INLINE Packet2cf pmul(
const Packet2cf& a,
const Packet2cf& b)
const
264 return pconj(internal::pmul(a, b));
268 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(
const Packet2cf& a,
const Packet2cf& b)
271 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
275 s = vmulq_f32(b.v, b.v);
276 rev_s = vrev64q_f32(s);
278 return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
281 EIGEN_DEVICE_FUNC
inline void
282 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
283 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
284 kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
285 kernel.packet[1].v = tmp;
289 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
293 static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
295 const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
296 static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
301 EIGEN_STRONG_INLINE Packet1cd() {}
302 EIGEN_STRONG_INLINE
explicit Packet1cd(
const Packet2d& a) : v(a) {}
306 template<>
struct packet_traits<std::complex<double> > : default_packet_traits
308 typedef Packet1cd type;
309 typedef Packet1cd half;
329 template<>
struct unpacket_traits<Packet1cd> {
typedef std::complex<double> type;
enum {size=1, alignment=
Aligned16};
typedef Packet1cd half; };
331 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_ALIGNED_LOAD
return Packet1cd(pload<Packet2d>((
const double*)from)); }
332 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(
const std::complex<double>* from) { EIGEN_DEBUG_UNALIGNED_LOAD
return Packet1cd(ploadu<Packet2d>((
const double*)from)); }
334 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(
const std::complex<double>& from)
335 {
return ploadu<Packet1cd>(&from); }
337 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(padd<Packet2d>(a.v,b.v)); }
338 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b) {
return Packet1cd(psub<Packet2d>(a.v,b.v)); }
339 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(
const Packet1cd& a) {
return Packet1cd(pnegate<Packet2d>(a.v)); }
340 template<> EIGEN_STRONG_INLINE Packet1cd pconj(
const Packet1cd& a) {
return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
342 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
347 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
349 v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
351 v1 = vmulq_f64(v1, b.v);
353 v2 = vmulq_f64(v2, b.v);
355 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
357 v2 = preverse<Packet2d>(v2);
359 return Packet1cd(vaddq_f64(v1, v2));
362 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
364 return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
366 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
368 return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
370 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
372 return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
374 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
376 return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
379 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(
const std::complex<double>* from) {
return pset1<Packet1cd>(*from); }
381 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v); }
382 template<> EIGEN_STRONG_INLINE
void pstoreu<std::complex<double> >(std::complex<double> * to,
const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((
double*)to, from.v); }
384 template<> EIGEN_STRONG_INLINE
void prefetch<std::complex<double> >(
const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((
double *)addr); }
386 template<> EIGEN_DEVICE_FUNC
inline Packet1cd pgather<std::complex<double>, Packet1cd>(
const std::complex<double>* from,
Index stride)
388 Packet2d res = pset1<Packet2d>(0.0);
389 res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
390 res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
391 return Packet1cd(res);
394 template<> EIGEN_DEVICE_FUNC
inline void pscatter<std::complex<double>, Packet1cd>(std::complex<double>* to,
const Packet1cd& from,
Index stride)
396 to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1));
400 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(
const Packet1cd& a)
402 std::complex<double> EIGEN_ALIGN16 res;
403 pstore<std::complex<double> >(&res, a);
408 template<> EIGEN_STRONG_INLINE Packet1cd preverse(
const Packet1cd& a) {
return a; }
410 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
412 template<> EIGEN_STRONG_INLINE Packet1cd preduxp<Packet1cd>(
const Packet1cd* vecs) {
return vecs[0]; }
414 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(
const Packet1cd& a) {
return pfirst(a); }
417 struct palign_impl<Offset,Packet1cd>
419 static EIGEN_STRONG_INLINE
void run(Packet1cd& ,
const Packet1cd& )
426 template<>
struct conj_helper<Packet1cd, Packet1cd, false,true>
428 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
429 {
return padd(pmul(x,y),c); }
431 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
433 return internal::pmul(a, pconj(b));
437 template<>
struct conj_helper<Packet1cd, Packet1cd, true,false>
439 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
440 {
return padd(pmul(x,y),c); }
442 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
444 return internal::pmul(pconj(a), b);
448 template<>
struct conj_helper<Packet1cd, Packet1cd, true,true>
450 EIGEN_STRONG_INLINE Packet1cd pmadd(
const Packet1cd& x,
const Packet1cd& y,
const Packet1cd& c)
const
451 {
return padd(pmul(x,y),c); }
453 EIGEN_STRONG_INLINE Packet1cd pmul(
const Packet1cd& a,
const Packet1cd& b)
const
455 return pconj(internal::pmul(a, b));
459 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(
const Packet1cd& a,
const Packet1cd& b)
462 Packet1cd res = conj_helper<Packet1cd,Packet1cd,false,true>().pmul(a,b);
463 Packet2d s = pmul<Packet2d>(b.v, b.v);
464 Packet2d rev_s = preverse<Packet2d>(s);
466 return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
469 EIGEN_STRONG_INLINE Packet1cd pcplxflip(
const Packet1cd& x)
471 return Packet1cd(preverse(Packet2d(x.v)));
474 EIGEN_STRONG_INLINE
void ptranspose(PacketBlock<Packet1cd,2>& kernel)
476 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
477 kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
478 kernel.packet[1].v = tmp;
480 #endif // EIGEN_ARCH_ARM64
486 #endif // EIGEN_COMPLEX_NEON_H
Definition: Constants.h:230
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33