summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dev-libs/apache-arrow/Manifest1
-rw-r--r--dev-libs/apache-arrow/apache-arrow-15.0.1.ebuild131
-rw-r--r--dev-libs/apache-arrow/files/apache-arrow-15.0.1-32bit.patch57
3 files changed, 189 insertions, 0 deletions
diff --git a/dev-libs/apache-arrow/Manifest b/dev-libs/apache-arrow/Manifest
index db13cba08b97..4bc33181578d 100644
--- a/dev-libs/apache-arrow/Manifest
+++ b/dev-libs/apache-arrow/Manifest
@@ -1,5 +1,6 @@
DIST apache-arrow-14.0.2.tar.gz 20969982 BLAKE2B 9df119fca564a4140d1143a1ac0614831e4f80846d43439d011b8ca7f5e77461005c96b9430b6dd7ce0ba7496b879a8abc9ffa372e6b455c317abeebb16ae8e4 SHA512 dd6cf6cbb817a48ef5275bb409367e5904526a3c16a17a37ea75101085ea19a71ba6bf936a6f099012e7c528811db1728ef2f14dcb16a1056a22088839280ce0
DIST apache-arrow-15.0.0.tar.gz 21491996 BLAKE2B 55709d1d181ed5c1482e1eadc9031c692bbd39434ccad17be8c0f3f5af47e3b3d5f262903d1ce09c39442497e14c22c80d7b30215e4de830a4ac82a1b3db34fb SHA512 d5dccaa0907b0e6f2a460e32ae75091942dcb70b51db4aefe2767ee8d99882694607b723a9c06898dda3938d8eb498258d7f9aad11054665b6ea9c2fbaeafa74
+DIST apache-arrow-15.0.1.tar.gz 21499849 BLAKE2B 5f8f91932941105e753b7b7812bf132bd99501ccfac0574b8072e638764cb46694062bcdb8568a474f50de008ede9259b70f16ba7f33ada0f6ec763c21b1c25a SHA512 b426421336c6bc3757626b2743a039d3c7030ad257c3bcf3247a236462dbc140b7eff4476cb727f4d048144a90c1368740c139318f8237d6cc20e87d3efdaf74
DIST apache-arrow-arrow-data-ad82a736c170e97b7c8c035ebd8a801c17eec170.tar.gz 3571641 BLAKE2B e3daae62938b54dbeb8ca9227eddfd2d59864cfafb54a7a7fd7aaa2b2cabd0f2dde58b77b2def086a1759b43c3b5834cb55462cc0b2886b2892329a957afdacb SHA512 ad6fd3343d8a3a6beb5b6269815bc207171905c102698fdb8ed7091e6d829dc51f354293c7686f845e3d49c4a828d9c77958447d965c9da4272260984fea4262
DIST apache-arrow-arrow-data-d2c73bf78246331d8e58b6f11aa8aa199cbb5929.tar.gz 3570422 BLAKE2B df1f8d1c4e321ca0abdce598b09945aa9e63114199e7f2eb0fc549323dadf703b917efd90c04a2d540d763a44128d76c6ee12788a983d4b9536d04297316b656 SHA512 0be19960b0d22fc2e07bf84f11148e69d6fa82f10627eb2a5e4b762b1d4cf4e151a57c5a1dc761d6d8ed29881ea589e4cfe0cd47d5c7cbf7b6107143b4a45e82
DIST apache-arrow-parquet-data-b2e7cc755159196e3a068c8594f7acbaecfdaaac.tar.gz 1013835 BLAKE2B cb4e544b6588ddad503f9692ee2eee5d4f1e931f10654b7253e11ffd2feb4c957dec047a1b1644c1f1538a3bf7ea726e5a9ae9b7d0c3bb0d55d27f088b5c5be3 SHA512 d4c2af20d6a88c1beb44770982f4aa324ea4ee375e8f556339074875af15c48f4398d972c1eec35eba5d209e9e62df4bdbf73ebc8148a7bb05ffa2d69c97df61
diff --git a/dev-libs/apache-arrow/apache-arrow-15.0.1.ebuild b/dev-libs/apache-arrow/apache-arrow-15.0.1.ebuild
new file mode 100644
index 000000000000..22b647070026
--- /dev/null
+++ b/dev-libs/apache-arrow/apache-arrow-15.0.1.ebuild
@@ -0,0 +1,131 @@
+# Copyright 2023-2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+inherit cmake
+
+# arrow.git: testing
+ARROW_DATA_GIT_HASH=ad82a736c170e97b7c8c035ebd8a801c17eec170
+# arrow.git: cpp/submodules/parquet-testing
+PARQUET_DATA_GIT_HASH=d69d979223e883faef9dc6fe3cf573087243c28a
+
+DESCRIPTION="A cross-language development platform for in-memory data."
+HOMEPAGE="
+ https://arrow.apache.org/
+ https://github.com/apache/arrow/
+"
+SRC_URI="
+ mirror://apache/arrow/arrow-${PV}/${P}.tar.gz
+ test? (
+ https://github.com/apache/parquet-testing/archive/${PARQUET_DATA_GIT_HASH}.tar.gz
+ -> ${PN}-parquet-data-${PARQUET_DATA_GIT_HASH}.tar.gz
+ https://github.com/apache/arrow-testing/archive/${ARROW_DATA_GIT_HASH}.tar.gz
+ -> ${PN}-arrow-data-${ARROW_DATA_GIT_HASH}.tar.gz
+ )
+"
+S="${WORKDIR}/${P}/cpp"
+
+LICENSE="Apache-2.0"
+SLOT="0/$(ver_cut 1)"
+KEYWORDS="~amd64 ~hppa ~riscv"
+IUSE="brotli bzip2 compute dataset +json lz4 parquet re2 snappy ssl test zlib zstd"
+RESTRICT="!test? ( test )"
+
+RDEPEND="
+ brotli? ( app-arch/brotli:= )
+ bzip2? ( app-arch/bzip2:= )
+ compute? ( dev-libs/libutf8proc:= )
+ dataset? (
+ dev-libs/libutf8proc:=
+ )
+ lz4? ( app-arch/lz4:= )
+ parquet? (
+ dev-libs/libutf8proc:=
+ dev-libs/thrift:=
+ ssl? ( dev-libs/openssl:= )
+ )
+ re2? ( dev-libs/re2:= )
+ snappy? ( app-arch/snappy:= )
+ zlib? ( sys-libs/zlib:= )
+ zstd? ( app-arch/zstd:= )
+"
+DEPEND="${RDEPEND}
+ dev-cpp/xsimd
+ >=dev-libs/boost-1.81.0
+ json? ( dev-libs/rapidjson )
+ test? (
+ dev-cpp/gflags
+ dev-cpp/gtest
+ )
+"
+
+REQUIRED_USE="
+ test? (
+ json
+ parquet? ( zstd )
+ )
+ ssl? ( json )
+"
+
+PATCHES=(
+ "${FILESDIR}/${PN}-11.0.0-shared-lz4.patch"
+ "${FILESDIR}/${PN}-14.0.1-gcc14.patch"
+ # upstream backports
+ "${FILESDIR}/${PN}-15.0.1-32bit.patch"
+)
+
+src_prepare() {
+ # use Gentoo CXXFLAGS, specify docdir at src_configure.
+ sed -i \
+ -e '/SetupCxxFlags/d' \
+ -e '/set(ARROW_DOC_DIR.*)/d' \
+ CMakeLists.txt \
+ || die
+ cmake_src_prepare
+}
+
+src_configure() {
+ local mycmakeargs=(
+ -DARROW_BUILD_STATIC=OFF
+ -DARROW_BUILD_TESTS=$(usex test)
+ -DARROW_COMPUTE=$(usex compute)
+ -DARROW_CSV=ON
+ -DARROW_DATASET=$(usex dataset)
+ -DARROW_DEPENDENCY_SOURCE=SYSTEM
+ -DARROW_DOC_DIR=share/doc/${PF}
+ -DARROW_FILESYSTEM=ON
+ -DARROW_HDFS=ON
+ -DARROW_JEMALLOC=OFF
+ -DARROW_JSON=$(usex json)
+ -DARROW_PARQUET=$(usex parquet)
+ -DPARQUET_REQUIRE_ENCRYPTION=$(usex ssl)
+ -DARROW_USE_CCACHE=OFF
+ -DARROW_USE_SCCACHE=OFF
+ -DARROW_WITH_BROTLI=$(usex brotli)
+ -DARROW_WITH_BZ2=$(usex bzip2)
+ -DARROW_WITH_LZ4=$(usex lz4)
+ -DARROW_WITH_RE2=$(usex re2)
+ -DARROW_WITH_SNAPPY=$(usex snappy)
+ -DARROW_WITH_ZLIB=$(usex zlib)
+ -DARROW_WITH_ZSTD=$(usex zstd)
+ -DCMAKE_CXX_STANDARD=17
+ )
+ cmake_src_configure
+}
+
+src_test() {
+ export PARQUET_TEST_DATA="${WORKDIR}/parquet-testing-${PARQUET_DATA_GIT_HASH}/data"
+ export ARROW_TEST_DATA="${WORKDIR}/arrow-testing-${ARROW_DATA_GIT_HASH}/data"
+ cmake_src_test
+}
+
+src_install() {
+ cmake_src_install
+ if use test; then
+ cd "${D}"/usr/$(get_libdir) || die
+ rm -r cmake/ArrowTesting || die
+ rm libarrow_testing* || die
+ rm pkgconfig/arrow-testing.pc || die
+ fi
+}
diff --git a/dev-libs/apache-arrow/files/apache-arrow-15.0.1-32bit.patch b/dev-libs/apache-arrow/files/apache-arrow-15.0.1-32bit.patch
new file mode 100644
index 000000000000..d268e565ab76
--- /dev/null
+++ b/dev-libs/apache-arrow/files/apache-arrow-15.0.1-32bit.patch
@@ -0,0 +1,57 @@
+diff --git a/gdb_arrow.py b/gdb_arrow.py
+index 6c3af1680..ad40ec499 100644
+--- a/gdb_arrow.py
++++ b/gdb_arrow.py
+@@ -304,7 +304,8 @@ def format_timestamp(val, unit):
+ seconds, subseconds = divmod(val, traits.multiplier)
+ try:
+ dt = datetime.datetime.utcfromtimestamp(seconds)
+- except (ValueError, OSError): # value out of range for datetime.datetime
++ except (ValueError, OSError, OverflowError):
++ # value out of range for datetime.datetime
+ pretty = "too large to represent"
+ else:
+ pretty = dt.isoformat().replace('T', ' ')
+diff --git a/src/arrow/io/file.cc b/src/arrow/io/file.cc
+index 543fa90a8..3b18bb7b0 100644
+--- a/src/arrow/io/file.cc
++++ b/src/arrow/io/file.cc
+@@ -36,6 +36,7 @@
+ #include <cerrno>
+ #include <cstdint>
+ #include <cstring>
++#include <limits>
+ #include <memory>
+ #include <mutex>
+ #include <sstream>
+@@ -560,17 +561,22 @@ class MemoryMappedFile::MemoryMap
+ RETURN_NOT_OK(::arrow::internal::FileTruncate(file_->fd(), initial_size));
+ }
+
+- size_t mmap_length = static_cast<size_t>(initial_size);
+- if (length > initial_size) {
+- return Status::Invalid("mapping length is beyond file size");
+- }
+- if (length >= 0 && length < initial_size) {
++ int64_t mmap_length = initial_size;
++ if (length >= 0) {
+ // memory mapping a file region
+- mmap_length = static_cast<size_t>(length);
++ if (length > initial_size) {
++ return Status::Invalid("mapping length is beyond file size");
++ }
++ mmap_length = length;
++ }
++ if (static_cast<int64_t>(static_cast<size_t>(mmap_length)) != mmap_length) {
++ return Status::CapacityError("Requested memory map length ", mmap_length,
++ " does not fit in a C size_t "
++ "(are you using a 32-bit build of Arrow?");
+ }
+
+- void* result = mmap(nullptr, mmap_length, prot_flags_, map_mode_, file_->fd(),
+- static_cast<off_t>(offset));
++ void* result = mmap(nullptr, static_cast<size_t>(mmap_length), prot_flags_, map_mode_,
++ file_->fd(), static_cast<off_t>(offset));
+ if (result == MAP_FAILED) {
+ return Status::IOError("Memory mapping file failed: ",
+ ::arrow::internal::ErrnoMessage(errno));