summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam James <sam@gentoo.org>2024-07-17 00:03:41 +0100
committerSam James <sam@gentoo.org>2024-07-17 00:03:41 +0100
commitc7a9b7d8cdac586964c6f68ba8095ba7471f37b9 (patch)
tree52306e23f322a2de23e448ccbd97067b1904a18a /dev-cpp/eigen
parentdev-libs/boost: fix build on ppc64 (diff)
downloadgentoo-c7a9b7d8cdac586964c6f68ba8095ba7471f37b9.tar.gz
gentoo-c7a9b7d8cdac586964c6f68ba8095ba7471f37b9.tar.bz2
gentoo-c7a9b7d8cdac586964c6f68ba8095ba7471f37b9.zip
dev-cpp/eigen: properly guard VSX use
Fixes build on VSX-less ppc* hardware. Closes: https://bugs.gentoo.org/936107 Thanks-to: jonys <vidra.jonas@seznam.cz> Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'dev-cpp/eigen')
-rw-r--r--dev-cpp/eigen/eigen-3.4.0-r3.ebuild458
-rw-r--r--dev-cpp/eigen/files/eigen-3.4.0-ppc-no-vsx.patch195
2 files changed, 653 insertions, 0 deletions
diff --git a/dev-cpp/eigen/eigen-3.4.0-r3.ebuild b/dev-cpp/eigen/eigen-3.4.0-r3.ebuild
new file mode 100644
index 000000000000..c39a0ba39812
--- /dev/null
+++ b/dev-cpp/eigen/eigen-3.4.0-r3.ebuild
@@ -0,0 +1,458 @@
+# Copyright 1999-2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+FORTRAN_NEEDED="test"
+inherit cmake cuda fortran-2 llvm toolchain-funcs
+
+DESCRIPTION="C++ template library for linear algebra"
+HOMEPAGE="https://eigen.tuxfamily.org/index.php?title=Main_Page"
+
+if [[ ${PV} = *9999* ]] ; then
+ inherit git-r3
+ EGIT_REPO_URI="https://gitlab.com/lib${PN}/${PN}.git"
+ if [[ ${PV} = 3.4.9999* ]] ; then
+ EGIT_COMMIT="3.4"
+ fi
+else
+ SRC_URI="
+ https://gitlab.com/lib${PN}/${PN}/-/archive/${PV}/${P}.tar.bz2
+ test? ( lapack? ( https://downloads.tuxfamily.org/${PN}/lapack_addons_3.4.1.tgz -> ${PN}-lapack_addons-3.4.1.tgz ) )
+ "
+ KEYWORDS="~alpha ~amd64 ~arm arm64 ~hppa ~ia64 ~loong ~ppc ~ppc64 ~riscv ~s390 sparc ~x86 ~amd64-linux ~x86-linux ~arm64-macos ~x64-macos"
+fi
+
+LICENSE="MPL-2.0"
+SLOT="3"
+
+# The following lines are shamelessly stolen from ffmpeg-9999.ebuild with modifications
+ARM_CPU_FEATURES=(
+ neon:NEON
+)
+PPC_CPU_FEATURES=(
+ altivec:ALTIVEC
+ vsx:VSX
+)
+X86_CPU_FEATURES=(
+ avx:AVX
+ avx2:AVX2
+ avx512f:AVX512
+ avx512dq:AVX512DQ
+ f16c:FP16C
+ fma3:FMA
+ popcnt:POPCNT
+ sse:SSE
+ sse2:SSE2
+ sse3:SSE3
+ ssse3:SSSE3
+ sse4_1:SSE4_1
+ sse4_2:SSE4_2
+)
+# MIPS_CPU_FEATURES=(
+# msa:MSA
+# )
+# S390_CPU_FEATURES=(
+# z13:Z13
+# z14:Z14
+# )
+
+CPU_FEATURES_MAP=(
+ "${ARM_CPU_FEATURES[@]/#/cpu_flags_arm_}"
+ "${PPC_CPU_FEATURES[@]/#/cpu_flags_ppc_}"
+ "${X86_CPU_FEATURES[@]/#/cpu_flags_x86_}"
+ # "${MIPS_CPU_FEATURES[@]/#/cpu_flags_mips_}"
+ # "${S390_CPU_FEATURES[@]/#/cpu_flags_s390_}"
+)
+
+IUSE_TEST_BACKENDS=(
+ "adolc"
+ "boost"
+ "cholmod"
+ "fftw"
+ "klu"
+ "opengl"
+ "openmp"
+ "pastix"
+ "sparsehash"
+ "spqr"
+ "superlu"
+ "umfpack"
+)
+
+IUSE="${CPU_FEATURES_MAP[*]%:*} clang cuda hip debug doc lapack mathjax test ${IUSE_TEST_BACKENDS[*]}" #zvector
+
+# Tests failing again because of compiler issues
+RESTRICT="!test? ( test )"
+
+BDEPEND="
+ doc? (
+ app-text/doxygen[dot]
+ dev-texlive/texlive-bibtexextra
+ dev-texlive/texlive-fontsextra
+ dev-texlive/texlive-fontutils
+ dev-texlive/texlive-latex
+ dev-texlive/texlive-latexextra
+ mathjax? ( dev-libs/mathjax )
+ )
+ test? ( virtual/pkgconfig )
+"
+
+# METIS
+# MPREAL
+# dev-libs/mpfr:0
+# dev-libs/gmp:0
+
+TEST_BACKENDS="
+ boost? ( dev-libs/boost )
+ adolc? ( sci-libs/adolc[sparse] )
+ cholmod? ( sci-libs/cholmod:= )
+ fftw? ( sci-libs/fftw )
+ spqr? ( sci-libs/spqr )
+ klu? ( sci-libs/klu )
+ opengl? (
+ media-libs/freeglut
+ media-libs/glew
+ media-libs/libglvnd
+ )
+ pastix? ( sci-libs/pastix[-mpi] )
+ sparsehash? (
+ amd64? ( dev-cpp/sparsehash )
+ arm64? ( dev-cpp/sparsehash )
+ ppc64? ( dev-cpp/sparsehash )
+ x86? ( dev-cpp/sparsehash )
+ )
+ superlu? ( sci-libs/superlu )
+ umfpack? ( sci-libs/umfpack )
+"
+DEPEND="
+ test? (
+ cuda? (
+ !clang? (
+ dev-util/nvidia-cuda-toolkit
+ )
+ clang? (
+ sys-devel/clang[llvm_targets_NVPTX]
+ openmp? ( sys-libs/libomp[llvm_targets_NVPTX,offload] )
+ )
+ )
+ hip? ( dev-util/hip )
+ lapack? ( virtual/lapacke )
+ ${TEST_BACKENDS}
+ )
+"
+
+REQUIRED_USE="
+ test? ( !lapack )
+ || ( ${IUSE_TEST_BACKENDS[*]} )
+"
+
+PATCHES=(
+ "${FILESDIR}/${PN}-3.3.9-max-macro.patch"
+ "${FILESDIR}/${PN}-3.4.0-doc-nocompress.patch" # bug 830064
+ "${FILESDIR}/${PN}-3.4.0-buildstring.patch"
+ "${FILESDIR}/${PN}-3.4.0-noansi.patch"
+ "${FILESDIR}/${PN}-3.4.0-cxxstandard.patch"
+ "${FILESDIR}/${PN}-3.4.0-ppc-no-vsx.patch" # bug 936107
+)
+
+# TODO should be in cuda.eclass
+cuda_set_CUDAHOSTCXX() {
+ local compiler
+ tc-is-gcc && compiler="gcc"
+ tc-is-clang && compiler="clang"
+ [[ -z "$compiler" ]] && die "no compiler specified"
+
+ local package="sys-devel/${compiler}"
+ local version="${package}"
+ local CUDAHOSTCXX_test
+ while
+ CUDAHOSTCXX="${CUDAHOSTCXX_test}"
+ version=$(best_version "${version}")
+ if [[ -z "${version}" ]]; then
+ if [[ -z "${CUDAHOSTCXX}" ]]; then
+ die "could not find supported version of ${package}"
+ fi
+ break
+ fi
+ CUDAHOSTCXX_test="$(
+ dirname "$(
+ realpath "$(
+ which "${compiler}-$(echo "${version}" | grep -oP "(?<=${package}-)[0-9]*")"
+ )"
+ )"
+ )"
+ version="<${version}"
+ do ! echo "int main(){}" | nvcc "-ccbin ${CUDAHOSTCXX_test}" - -x cu &>/dev/null; done
+
+ export CUDAHOSTCXX
+}
+
+pkg_setup() {
+ use test && use cuda && use clang && llvm_pkg_setup
+}
+
+src_unpack() {
+ if [[ ${PV} = *9999* ]] ; then
+ git-r3_src_unpack
+ else
+ unpack "${P}.tar.bz2"
+
+ if use test && use lapack; then
+ cd "${S}/lapack" || die
+ unpack "${PN}-lapack_addons-3.4.1.tgz"
+ fi
+ fi
+}
+
+src_prepare() {
+ cmake_src_prepare
+
+ sed \
+ -e "/add_subdirectory(bench\/spbench/s/^/#DONOTCOMPILE /g" \
+ -e "/add_subdirectory(demos/s/^/#DONOTCOMPILE /g" \
+ -i CMakeLists.txt || die
+
+ if ! use test; then
+ sed \
+ -e "/add_subdirectory(test/s/^/#DONOTCOMPILE /g" \
+ -e "/add_subdirectory(scripts/s/^/#DONOTCOMPILE /g" \
+ -e "/add_subdirectory(failtest/s/^/#DONOTCOMPILE /g" \
+ -e "/add_subdirectory(blas/s/^/#DONOTCOMPILE /g" \
+ -e "/add_subdirectory(lapack/s/^/#DONOTCOMPILE /g" \
+ -i CMakeLists.txt || die
+ fi
+}
+
+src_configure() {
+ local mycmakeargs=(
+ -DBUILD_SHARED_LIBS="yes"
+ -DBUILD_TESTING="$(usex test)"
+
+ -DEIGEN_BUILD_DOC="$(usex doc)" # Enable creation of Eigen documentation
+ -DEIGEN_BUILD_PKGCONFIG="yes" # Build pkg-config .pc file for Eigen
+ )
+ if use doc || use test; then
+ mycmakeargs+=(
+ # needs Qt4
+ -DEIGEN_TEST_NOQT="yes" # Disable Qt support in unit tests
+ )
+ fi
+
+ if use doc; then
+ mycmakeargs+=(
+ -DEIGEN_DOC_USE_MATHJAX="$(usex mathjax)" # Use MathJax for rendering math in HTML docs
+ -DEIGEN_INTERNAL_DOCUMENTATION=no # Build internal documentation
+ )
+ fi
+
+ if use test; then
+ mycmakeargs+=(
+ # the OpenGL testsuite is extremely brittle, bug #712808
+ -DOpenGL_GL_PREFERENCE="GLVND"
+ -DEIGEN_TEST_OPENGL="$(usex opengl)" # Enable OpenGL support in unit tests
+ -DEIGEN_TEST_OPENMP="$(usex openmp)" # Enable/Disable OpenMP in tests/examples
+
+ -DCMAKE_DISABLE_FIND_PACKAGE_MPREAL=ON
+
+ -DEIGEN_TEST_CXX11=yes
+
+ # -DEIGEN_TEST_CUSTOM_CXX_FLAGS= # Additional compiler flags when compiling unit tests.
+ # -DEIGEN_TEST_CUSTOM_LINKER_FLAGS= # Additional linker flags when linking unit tests.
+ # -DEIGEN_TEST_BUILD_FLAGS= # Options passed to the build command of unit tests
+
+ # -DEIGEN_BUILD_BTL=yes # Build benchmark suite
+
+ -DEIGEN_TEST_BUILD_DOCUMENTATION="$(usex doc)" # Test building the doxygen documentation
+
+ # -DEIGEN_COVERAGE_TESTING=no # Enable/disable gcov
+ # -DEIGEN_CTEST_ERROR_EXCEPTION= # Regular expression for build error messages to be filtered out
+ # -DEIGEN_DEBUG_ASSERTS=no # Enable advanced debugging of assertions
+ # -DEIGEN_NO_ASSERTION_CHECKING=no # Disable checking of assertions using exceptions
+ # -DEIGEN_TEST_NO_EXCEPTIONS=no # Disables C++ exceptions
+ # -DEIGEN_TEST_NO_EXPLICIT_ALIGNMENT=no # Disable explicit alignment (hence vectorization) in tests/examples
+ # -DEIGEN_TEST_NO_EXPLICIT_VECTORIZATION=no # Disable explicit vectorization in tests/examples
+
+ # -DEIGEN_DASHBOARD_BUILD_TARGET=buildtests # Target to be built in dashboard mode, default is buildtests
+
+ # -DEIGEN_DEFAULT_TO_ROW_MAJOR=no # Use row-major as default matrix storage order
+
+ # -DEIGEN_TEST_MATRIX_DIR=yes # Enable testing of realword sparse matrices contained in the specified path
+ # -DEIGEN_TEST_MAX_SIZE=320 # Maximal matrix/vector size, default is 320
+ # -DEIGEN_SPLIT_LARGE_TESTS=no # Split large tests into smaller executables
+ )
+
+ use !adolc && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_Adolc="TRUE" )
+ use !boost && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_Boost="TRUE" )
+ use !cholmod && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_CHOLMOD="TRUE" )
+ use !fftw && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_FFTW="TRUE" )
+ use !sparsehash && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_GoogleHash="TRUE" )
+ use !klu && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_KLU="TRUE" )
+ use !opengl && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_OpenGL="TRUE" )
+ use !openmp && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_OpenMP="TRUE" )
+ use !pastix && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_PASTIX="TRUE" )
+ use !spqr && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_SPQR="TRUE" )
+ use !superlu && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_SuperLU="TRUE" )
+ use !umfpack && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_UMFPACK="TRUE" )
+
+ if use lapack; then
+ mycmakeargs+=(
+ -DEIGEN_ENABLE_LAPACK_TESTS=yes
+ -DEIGEN_TEST_EXTERNAL_BLAS=yes # Use external BLAS library for testsuite
+ -DCMAKE_DISABLE_FIND_PACKAGE_SuperLU=ON
+ )
+ fi
+
+ if use arm; then
+ mycmakeargs+=(
+ -DEIGEN_TEST_NEON="$(usex cpu_flags_arm_neon)"
+ )
+ fi
+
+ if use arm64; then
+ mycmakeargs+=(
+ -DEIGEN_TEST_NEON64="$(usex cpu_flags_arm_neon)"
+ )
+ fi
+
+ if use ppc || use ppc64; then
+ mycmakeargs+=(
+ -DEIGEN_TEST_ALTIVEC="$(usex cpu_flags_ppc_altivec)"
+ -DEIGEN_TEST_VSX="$(usex cpu_flags_ppc_vsx)"
+ )
+ fi
+
+ if use amd64 || use x86; then
+ mycmakeargs+=(
+ # -DEIGEN_TEST_32BIT=no # Force generating 32bit code.
+ # -DEIGEN_TEST_X87=no # Force using X87 instructions. Implies no vectorization.
+ -DEIGEN_TEST_SSE2="$(usex cpu_flags_x86_sse2)"
+ -DEIGEN_TEST_SSE3="$(usex cpu_flags_x86_sse3)"
+ -DEIGEN_TEST_SSSE3="$(usex cpu_flags_x86_ssse3)"
+ -DEIGEN_TEST_FMA="$(usex cpu_flags_x86_fma3)"
+ -DEIGEN_TEST_SSE4_1="$(usex cpu_flags_x86_sse4_1)"
+ -DEIGEN_TEST_SSE4_2="$(usex cpu_flags_x86_sse4_2)"
+ -DEIGEN_TEST_AVX="$(usex cpu_flags_x86_avx)"
+ -DEIGEN_TEST_F16C="$(usex cpu_flags_x86_f16c)"
+ -DEIGEN_TEST_AVX2="$(usex cpu_flags_x86_avx2)"
+ -DEIGEN_TEST_AVX512="$(usex cpu_flags_x86_avx512f)"
+ -DEIGEN_TEST_AVX512DQ="$(usex cpu_flags_x86_avx512dq)"
+ )
+ fi
+
+ if use mips; then
+ mycmakeargs+=(
+ # -DEIGEN_TEST_MSA=no # Enable/Disable MSA in tests/examples
+ )
+ fi
+
+ if use s390; then
+ mycmakeargs+=(
+ # -DEIGEN_TEST_Z13=no # Enable/Disable S390X(zEC13) ZVECTOR in tests/examples
+ # -DEIGEN_TEST_Z14=no # Enable/Disable S390X(zEC14) ZVECTOR in tests/examples
+ )
+ fi
+
+ mycmakeargs+=(
+ -DEIGEN_TEST_CUDA="$(usex cuda)" # Enable CUDA support in unit tests
+ -DEIGEN_TEST_CUDA_CLANG="$(usex cuda "$(usex clang)")" # Use clang instead of nvcc to compile the CUDA tests
+
+ -DEIGEN_TEST_HIP="$(usex hip)" # Add HIP support.
+
+ # -DEIGEN_TEST_SYCL=no # Add Sycl support.
+ # -DEIGEN_SYCL_TRISYCL=no # Use the triSYCL Sycl implementation (ComputeCPP by default).
+ )
+
+ if use cuda; then
+ cuda_add_sandbox -w
+ if use clang; then
+ local llvm_prefix
+ llvm_prefix="$(get_llvm_prefix -b)"
+ export CC="${llvm_prefix}/bin/clang"
+ export CXX="${llvm_prefix}/bin/clang++"
+ export LIBRARY_PATH="${ESYSROOT}/usr/$(get_libdir)"
+ else
+ cuda_set_CUDAHOSTCXX
+ mycmakeargs+=(
+ -DCUDA_HOST_COMPILER="${CUDAHOSTCXX}"
+ )
+ fi
+ if [[ "${CUDA_VERBOSE}" == true ]]; then
+ mycmakeargs+=(
+ -DCUDA_VERBOSE_BUILD=yes
+ )
+ NVCCFLAGS+=" -v"
+ fi
+
+ export CUDAFLAGS="${NVCCFLAGS}"
+
+ [[ -z "${CUDAARCHS}" ]] && einfo "trying to determine host CUDAARCHS"
+ : "${CUDAARCHS:=$(__nvcc_device_query)}"
+ export CUDAARCHS
+
+ mycmakeargs+=(
+ -DEIGEN_CUDA_COMPUTE_ARCH="${CUDAARCHS}"
+ )
+ fi
+ fi
+
+ cmake_src_configure
+}
+
+src_compile() {
+ local targets=()
+ if use doc; then
+ targets+=( doc )
+ HTML_DOCS=( "${BUILD_DIR}"/doc/html/. )
+ fi
+ if use test; then
+ targets+=( buildtests )
+ if ! use lapack; then
+ targets+=( blas )
+ fi
+ # tests generate random data, which
+ # obviously fails for some seeds
+ export EIGEN_SEED=712808
+ fi
+
+ if use doc || use test; then
+ cmake_src_compile "${targets[@]}"
+ fi
+}
+
+src_test() {
+ CMAKE_SKIP_TESTS=(
+ product_small_32 # 143 (Subprocess aborted)
+ product_small_33 # 144 (Subprocess aborted)
+
+ eigensolver_selfadjoint_13 # 452 (Subprocess aborted)
+
+ cholmod_support_21 # 726 (Subprocess aborted)
+ cholmod_support_22 # 727 (Subprocess aborted)
+
+ NonLinearOptimization # 930 (Subprocess aborted)
+ openglsupport # 990 (Failed)
+ levenberg_marquardt # 1020 (Subprocess aborted)
+ )
+
+ if use cuda ; then
+ cuda_add_sandbox -w
+
+ CMAKE_SKIP_TESTS+=(
+ cxx11_tensor_cast_float16_gpu
+ cxx11_tensor_gpu_5
+ )
+ fi
+
+ if use lapack ; then
+ CMAKE_SKIP_TESTS+=(
+ "^LAPACK-.*$"
+ )
+ fi
+
+ local myctestargs=(
+ -j1 # otherwise breaks due to cmake reruns
+ )
+
+ cmake_src_test
+}
diff --git a/dev-cpp/eigen/files/eigen-3.4.0-ppc-no-vsx.patch b/dev-cpp/eigen/files/eigen-3.4.0-ppc-no-vsx.patch
new file mode 100644
index 000000000000..7cfeaaf129ff
--- /dev/null
+++ b/dev-cpp/eigen/files/eigen-3.4.0-ppc-no-vsx.patch
@@ -0,0 +1,195 @@
+https://gitlab.com/libeigen/eigen/-/merge_requests/1028
+https://bugs.gentoo.org/936107
+--- a/Eigen/Core
++++ b/Eigen/Core
+@@ -346,7 +346,7 @@
+ #include "src/Core/CoreIterators.h"
+ #include "src/Core/ConditionEstimator.h"
+
+-#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
++#if defined(EIGEN_VECTORIZE_VSX)
+ #include "src/Core/arch/AltiVec/MatrixProduct.h"
+ #elif defined EIGEN_VECTORIZE_NEON
+ #include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
+--- a/Eigen/src/Core/arch/AltiVec/Complex.h
++++ b/Eigen/src/Core/arch/AltiVec/Complex.h
+@@ -100,6 +100,7 @@
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
++ HasSqrt = 1,
+ #ifdef __VSX__
+ HasBlend = 1,
+ #endif
+@@ -320,6 +321,7 @@
+ HasAbs2 = 0,
+ HasMin = 0,
+ HasMax = 0,
++ HasSqrt = 1,
+ HasSetLinear = 0
+ };
+ };
+--- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h
++++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
+@@ -40,16 +40,14 @@
+ return pcos_float(_x);
+ }
+
++#ifdef __VSX__
+ #ifndef EIGEN_COMP_CLANG
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+ Packet4f prsqrt<Packet4f>(const Packet4f& x)
+ {
+ return vec_rsqrt(x);
+ }
+-#endif
+
+-#ifdef __VSX__
+-#ifndef EIGEN_COMP_CLANG
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
+ Packet2d prsqrt<Packet2d>(const Packet2d& x)
+ {
+@@ -74,6 +72,26 @@
+ {
+ return pexp_double(_x);
+ }
++
++template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
++ BF16_TO_F32_UNARY_OP_WRAPPER(psqrt<Packet4f>, a);
++}
++
++#ifndef EIGEN_COMP_CLANG
++template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
++ BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
++}
++#endif
++#else
++template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
++Packet4f psqrt<Packet4f>(const Packet4f& x)
++{
++ Packet4f a;
++ for (Index i = 0; i < packet_traits<float>::size; i++) {
++ a[i] = numext::sqrt(x[i]);
++ }
++ return a;
++}
+ #endif
+
+ // Hyperbolic Tangent function.
+--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
++++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
+@@ -175,16 +175,19 @@
+ #else
+ HasRsqrt = 0,
+ #endif
++ HasTanh = EIGEN_FAST_MATH,
++ HasErf = EIGEN_FAST_MATH,
++ HasRint = 1,
+ #else
+ HasSqrt = 0,
+ HasRsqrt = 0,
+- HasTanh = EIGEN_FAST_MATH,
+- HasErf = EIGEN_FAST_MATH,
++ HasTanh = 0,
++ HasErf = 0,
++ HasRint = 0,
+ #endif
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1,
+- HasRint = 1,
+ HasNegate = 1,
+ HasBlend = 1
+ };
+@@ -217,16 +220,17 @@
+ #else
+ HasRsqrt = 0,
+ #endif
++ HasRint = 1,
+ #else
+ HasSqrt = 0,
+ HasRsqrt = 0,
+- HasTanh = EIGEN_FAST_MATH,
+- HasErf = EIGEN_FAST_MATH,
++ HasRint = 0,
+ #endif
++ HasTanh = 0,
++ HasErf = 0,
+ HasRound = 1,
+ HasFloor = 1,
+ HasCeil = 1,
+- HasRint = 1,
+ HasNegate = 1,
+ HasBlend = 1
+ };
+@@ -872,19 +876,29 @@
+ return vec_nor(c,c);
+ }
+
++#ifdef __VSX__
+ template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmple(a,b)); }
++#endif
+ template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmplt(a,b)); }
+ template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); }
++#ifdef __VSX__
+ template<> EIGEN_STRONG_INLINE Packet8s pcmp_le(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmple(a,b)); }
++#endif
+ template<> EIGEN_STRONG_INLINE Packet8s pcmp_lt(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmplt(a,b)); }
+ template<> EIGEN_STRONG_INLINE Packet8s pcmp_eq(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmpeq(a,b)); }
++#ifdef __VSX__
+ template<> EIGEN_STRONG_INLINE Packet8us pcmp_le(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmple(a,b)); }
++#endif
+ template<> EIGEN_STRONG_INLINE Packet8us pcmp_lt(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmplt(a,b)); }
+ template<> EIGEN_STRONG_INLINE Packet8us pcmp_eq(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmpeq(a,b)); }
++#ifdef __VSX__
+ template<> EIGEN_STRONG_INLINE Packet16c pcmp_le(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmple(a,b)); }
++#endif
+ template<> EIGEN_STRONG_INLINE Packet16c pcmp_lt(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmplt(a,b)); }
+ template<> EIGEN_STRONG_INLINE Packet16c pcmp_eq(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmpeq(a,b)); }
++#ifdef __VSX__
+ template<> EIGEN_STRONG_INLINE Packet16uc pcmp_le(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmple(a,b)); }
++#endif
+ template<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmplt(a,b)); }
+ template<> EIGEN_STRONG_INLINE Packet16uc pcmp_eq(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmpeq(a,b)); }
+
+@@ -937,6 +951,7 @@
+ }
+ template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
+ template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
++#ifdef __VSX__
+ template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
+ {
+ Packet4f res;
+@@ -947,6 +962,7 @@
+
+ return res;
+ }
++#endif
+
+ template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)
+ {
+@@ -1341,12 +1357,6 @@
+ BF16_TO_F32_BINARY_OP_WRAPPER(psub<Packet4f>, a, b);
+ }
+
+-template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
+- BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
+-}
+-template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
+- BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
+-}
+ template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
+ BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
+ }
+@@ -1390,9 +1400,11 @@
+ template<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf> (const Packet8bf& a){
+ BF16_TO_F32_UNARY_OP_WRAPPER(pround<Packet4f>, a);
+ }
++#ifdef __VSX__
+ template<> EIGEN_STRONG_INLINE Packet8bf print<Packet8bf> (const Packet8bf& a){
+ BF16_TO_F32_UNARY_OP_WRAPPER(print<Packet4f>, a);
+ }
++#endif
+ template<> EIGEN_STRONG_INLINE Packet8bf pmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) {
+ Packet4f a_even = Bf16ToF32Even(a);
+ Packet4f a_odd = Bf16ToF32Odd(a);