# Backport of: https://gitlab.com/libeigen/eigen/-/commit/4d05765345e7e4a984d600039f797e2fede924f3 --- Eigen/src/Core/util/ConfigureVectorization.h.orig 2021-08-19 04:41:58.000000000 +0800 +++ Eigen/src/Core/util/ConfigureVectorization.h 2023-01-18 02:25:19.000000000 +0800 @@ -363,10 +363,10 @@ #endif } // end extern "C" - #elif defined __VSX__ + #elif defined(__VSX__) && !defined(__APPLE__) #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_VSX + #define EIGEN_VECTORIZE_VSX 1 #include // We need to #undef all these ugly tokens defined in // => use __vector instead of vector --- Eigen/src/Core/util/Macros.h.orig 2021-08-19 04:41:58.000000000 +0800 +++ Eigen/src/Core/util/Macros.h 2023-01-18 02:21:23.000000000 +0800 @@ -329,7 +329,7 @@ #endif /// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC -#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) +#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) || defined(__POWERPC__) #define EIGEN_ARCH_PPC 1 #else #define EIGEN_ARCH_PPC 0 @@ -1127,8 +1127,13 @@ // This seems to be broken on clang. Packet4f is loaded into a single // register rather than a vector, zeroing out some entries. Integer // types also generate a compile error. - // General, Altivec, VSX. - #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X)); + #if EIGEN_OS_MAC + // General, Altivec for Apple (VSX were added in ISA v2.06): + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v" (X)); + #else + // General, Altivec, VSX otherwise: + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X)); + #endif #elif EIGEN_ARCH_ARM_OR_ARM64 // General, NEON. #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+g,w" (X)); --- Eigen/src/Core/arch/AltiVec/Complex.h.orig 2021-08-19 04:41:58.000000000 +0800 +++ Eigen/src/Core/arch/AltiVec/Complex.h 2023-01-18 02:27:46.000000000 +0800 @@ -16,7 +16,7 @@ namespace internal { static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX #if defined(_BIG_ENDIAN) static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; @@ -100,7 +100,7 @@ HasAbs2 = 0, HasMin = 0, HasMax = 0, -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX HasBlend = 1, #endif HasSetLinear = 0 @@ -130,7 +130,7 @@ EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex* from0, const std::complex* from1) { Packet4f res0, res1; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (*from0)); __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (*from1)); #ifdef _BIG_ENDIAN @@ -233,7 +233,7 @@ return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV))); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { Packet2cf result; result.v = reinterpret_cast(pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); @@ -247,7 +247,7 @@ } //---------- double ---------- -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX struct Packet1cd { EIGEN_STRONG_INLINE Packet1cd() {} --- Eigen/src/Core/arch/AltiVec/MathFunctions.h.orig 2021-08-19 04:41:58.000000000 +0800 +++ Eigen/src/Core/arch/AltiVec/MathFunctions.h 2023-01-18 02:28:17.000000000 +0800 @@ -48,7 +48,7 @@ } #endif -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX #ifndef EIGEN_COMP_CLANG template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d prsqrt(const Packet2d& x) --- Eigen/src/Core/arch/AltiVec/PacketMath.h.orig 2021-08-19 04:41:58.000000000 +0800 +++ Eigen/src/Core/arch/AltiVec/PacketMath.h 2023-01-18 02:32:28.000000000 +0800 @@ -84,7 +84,7 @@ static _EIGEN_DECLARE_CONST_FAST_Packet8us(ONE,1); //{ 1, 1, 1, 1, 1, 1, 1, 1} static _EIGEN_DECLARE_CONST_FAST_Packet16uc(ONE,1); static Packet4f p4f_MZERO = (Packet4f) vec_sl((Packet4ui)p4i_MINUS1, (Packet4ui)p4i_MINUS1); //{ 0x80000000, 0x80000000, 0x80000000, 0x80000000} -#ifndef __VSX__ +#ifndef EIGEN_VECTORIZE_VSX static Packet4f p4f_ONE = vec_ctf(p4i_ONE, 0); //{ 1.0, 1.0, 1.0, 1.0} #endif @@ -114,7 +114,7 @@ // Define global static constants: #ifdef _BIG_ENDIAN static Packet16uc p16uc_FORWARD = vec_lvsl(0, (float*)0); -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; #endif static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; @@ -168,7 +168,7 @@ HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX HasSqrt = 1, #if !EIGEN_COMP_CLANG HasRsqrt = 1, @@ -210,7 +210,7 @@ HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX HasSqrt = 1, #if !EIGEN_COMP_CLANG HasRsqrt = 1, @@ -432,7 +432,7 @@ // ignoring these warnings for now. EIGEN_UNUSED_VARIABLE(from); EIGEN_DEBUG_ALIGNED_LOAD -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); #else return vec_ld(0, from); @@ -481,7 +481,7 @@ // ignoring these warnings for now. EIGEN_UNUSED_VARIABLE(to); EIGEN_DEBUG_ALIGNED_STORE -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX vec_xst(from, 0, to); #else vec_st(from, 0, to); @@ -802,7 +802,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pdiv(const Packet4f& a, const Packet4f& b) { -#ifndef __VSX__ // VSX actually provides a div instruction +#ifndef EIGEN_VECTORIZE_VSX // VSX actually provides a div instruction Packet4f t, y_0, y_1; // Altivec does not offer a divide instruction, we have to do a reciprocal approximation @@ -831,7 +831,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { - #ifdef __VSX__ + #ifdef EIGEN_VECTORIZE_VSX // NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN Packet4f ret; __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); @@ -849,7 +849,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { - #ifdef __VSX__ + #ifdef EIGEN_VECTORIZE_VSX // NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN Packet4f ret; __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); @@ -923,7 +923,7 @@ Packet4f t = vec_add(reinterpret_cast(vec_or(vec_and(reinterpret_cast(a), p4ui_SIGN), p4ui_PREV0DOT5)), a); Packet4f res; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX __asm__("xvrspiz %x0, %x1\n\t" : "=&wa" (res) : "wa" (t)); @@ -2252,7 +2252,7 @@ //---------- double ---------- -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX typedef __vector double Packet2d; typedef __vector unsigned long long Packet2ul; typedef __vector long long Packet2l;