From 7e5059a7d33c7b6334b7caf6b7bf6d35a025e042 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 3 Jun 2024 06:55:28 -0400 Subject: [PATCH 1/5] feat: Standalone Python installer using Meson --- python/bootstrap.py | 139 +++++++----------------------- python/meson.build | 24 ++++++ python/pyproject.toml | 4 +- python/src/nanoarrow/meson.build | 66 ++++++++++++++ python/subprojects/nanoarrow.wrap | 37 ++++++++ 5 files changed, 160 insertions(+), 110 deletions(-) create mode 100644 python/meson.build create mode 100644 python/src/nanoarrow/meson.build create mode 100644 python/subprojects/nanoarrow.wrap diff --git a/python/bootstrap.py b/python/bootstrap.py index 5898f5008..77c7c0081 100644 --- a/python/bootstrap.py +++ b/python/bootstrap.py @@ -15,13 +15,9 @@ # specific language governing permissions and limitations # under the License. -import os +import argparse import pathlib import re -import shutil -import subprocess -import tempfile -import warnings # Generate the nanoarrow_c.pxd file used by the Cython extensions @@ -29,14 +25,7 @@ class NanoarrowPxdGenerator: def __init__(self): self._define_regexes() - def generate_nanoarrow_pxd(self, file_in, file_out): - file_in_name = pathlib.Path(file_in).name - - # Read the nanoarrow.h header - content = None - with open(file_in, "r") as input: - content = input.read() - + def generate_nanoarrow_pxd(self, content: str, build_dir: pathlib.Path) -> None: # Strip comments content = self.re_comment.sub("", content) @@ -57,12 +46,11 @@ def generate_nanoarrow_pxd(self, file_in, file_out): header = self.re_newline_plus_indent.sub("\n", self._pxd_header()) # Write nanoarrow_c.pxd + file_out = build_dir / "nanoarrow_c.pxd" with open(file_out, "wb") as output: output.write(header.encode("UTF-8")) - output.write( - f'\ncdef extern from "{file_in_name}" nogil:\n'.encode("UTF-8") - ) + output.write('\ncdef extern from "nanoarrow.h" nogil:\n'.encode("UTF-8")) # A few things we add in manually output.write(b"\n") @@ -175,105 +163,40 @@ def _pxd_header(self): """ -# Runs cmake -DNANOARROW_BUNDLE=ON if cmake exists or copies nanoarrow.c/h -# from ../dist if it does not. Running cmake is safer because it will sync -# any changes from nanoarrow C library sources in the checkout but is not -# strictly necessary for things like installing from GitHub. -def copy_or_generate_nanoarrow_c(): +def generate_nanoarrow_c() -> str: this_dir = pathlib.Path(__file__).parent.resolve() - source_dir = this_dir.parent - vendor_dir = this_dir / "vendor" + nanoarrow_dir = this_dir / "subprojects" / "nanoarrow" / "src" / "nanoarrow" - vendored_files = [ + # This should match the NANOARROW_BUNDLE code in CMakeLists.txt + # With the only thing missing being the nanoarrow namespace. However, we + # assume the Python installation is sandboxed so should not be required (?) + header_data: list[str] = [] + + files = [ + # TODO: - do we need the config file for Cython? + # 'nanoarrow_config.h', + "nanoarrow_types.h", "nanoarrow.h", - "nanoarrow.c", - "nanoarrow_ipc.h", - "nanoarrow_ipc.c", - "nanoarrow_device.h", - "nanoarrow_device.c", + "buffer_inline.h", + "array_inline.h", ] - dst = {name: vendor_dir / name for name in vendored_files} - - for f in dst.values(): - f.unlink(missing_ok=True) - - is_cmake_dir = (source_dir / "CMakeLists.txt").exists() - is_in_nanoarrow_repo = ( - is_cmake_dir and (source_dir / "src" / "nanoarrow" / "nanoarrow.h").exists() - ) - - if not is_in_nanoarrow_repo: - raise ValueError( - "Attempt to build source distribution outside the nanoarrow repo" - ) - cmake_bin = os.getenv("CMAKE_BIN") - if not cmake_bin: - cmake_bin = "cmake" - has_cmake = os.system(f"{cmake_bin} --version") == 0 - if not has_cmake: - raise ValueError("Attempt to build source distribution without CMake") + for file in files: + with open(nanoarrow_dir / file) as f: + header_data.append(f.read()) - # The C library, IPC extension, and Device extension all currently have slightly - # different methods of bundling (hopefully this can be unified) + contents = "\n".join(header_data) + # Remove includes that aren't needed when the headers are concatenated + contents = re.sub(r"#include \".*", "", contents) - vendor_dir.mkdir(exist_ok=True) - - # Copy device files - device_ext_src = ( - source_dir / "extensions" / "nanoarrow_device" / "src" / "nanoarrow" - ) - - for device_file in ["nanoarrow_device.h", "nanoarrow_device.c"]: - shutil.copyfile( - device_ext_src / device_file, - dst[device_file], - ) - - ipc_source_dir = source_dir / "extensions/nanoarrow_ipc" - - for cmake_project in [source_dir, ipc_source_dir]: - with tempfile.TemporaryDirectory() as build_dir: - try: - subprocess.run( - [ - cmake_bin, - "-B", - build_dir, - "-S", - cmake_project, - "-DNANOARROW_IPC_BUNDLE=ON", - "-DNANOARROW_BUNDLE=ON", - "-DNANOARROW_NAMESPACE=PythonPkg", - ] - ) - subprocess.run( - [ - cmake_bin, - "--install", - build_dir, - "--prefix", - vendor_dir, - ] - ) - except Exception as e: - warnings.warn(f"cmake call failed: {e}") - - if not dst["nanoarrow.h"].exists(): - raise ValueError("Attempt to vendor nanoarrow.c/h failed") - - -# Runs the pxd generator with some information about the file name -def generate_nanoarrow_pxd(): - this_dir = pathlib.Path(__file__).parent.resolve() - maybe_nanoarrow_h = this_dir / "vendor/nanoarrow.h" - maybe_nanoarrow_pxd = this_dir / "vendor/nanoarrow_c.pxd" - - NanoarrowPxdGenerator().generate_nanoarrow_pxd( - maybe_nanoarrow_h, maybe_nanoarrow_pxd - ) + return contents if __name__ == "__main__": - copy_or_generate_nanoarrow_c() - generate_nanoarrow_pxd() + parser = argparse.ArgumentParser() + parser.add_argument("build_dir", type=str) + args = parser.parse_args() + build_dir = pathlib.Path(args.build_dir).resolve() + + contents = generate_nanoarrow_c() + NanoarrowPxdGenerator().generate_nanoarrow_pxd(contents, build_dir) diff --git a/python/meson.build b/python/meson.build new file mode 100644 index 000000000..fbaf64f55 --- /dev/null +++ b/python/meson.build @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +project( + 'nanoarrow-python', + 'cython', + version: '0.15.0', # TODO: don't hard code this +) + +subdir('src/nanoarrow') diff --git a/python/pyproject.toml b/python/pyproject.toml index 2be6aebbe..067687765 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -38,7 +38,7 @@ Changelog = "https://github.com/apache/arrow-nanoarrow/blob/main/CHANGELOG.md" [build-system] requires = [ - "setuptools >= 61.0.0", + "meson-python", "Cython" ] -build-backend = "setuptools.build_meta" +build-backend = "mesonpy" diff --git a/python/src/nanoarrow/meson.build b/python/src/nanoarrow/meson.build new file mode 100644 index 000000000..5a3e664a9 --- /dev/null +++ b/python/src/nanoarrow/meson.build @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +py = import('python').find_installation(pure: false) + +cython_args = [ + '--include-dir', + meson.current_build_dir(), + '--include-dir', + meson.current_source_dir(), # TODO: should meson handle this natively? +] +if get_option('buildtype') == 'debug' + cython_args += ['--gdb'] +endif + +nanoarrow_dep = dependency('nanoarrow') + +generated_pyx = custom_target( + 'generate-pyx', + input: meson.current_build_dir(), + output: 'nanoarrow_c.pxd', + command: [py, '../../bootstrap.py', '@INPUT@'], + install: true, + install_dir: '.', + #depends: nanoarrow_dep, + # ERROR: custom_target keyword argument 'depends' was of type + # array[InternalDependency] but should have been array[BuildTarget | + # CustomTarget] - seems fragile without this? + +) +nanoarrow_c_dep = declare_dependency(sources: generated_pyx) + +# TODO: we need dependencies on nanoarrow_ipc and nanoarrow_device, +# which should happen as part of https://github.com/apache/arrow-nanoarrow/pull/483 + +py.extension_module( + '_lib', + sources: ['_lib.pyx'], + cython_args: cython_args, + dependencies: [nanoarrow_dep, nanoarrow_c_dep], + subdir: 'nanoarrow/', + install: true, +) + +py.extension_module( + '_ipc_lib', + sources: ['_ipc_lib.pyx'], + cython_args: cython_args, + dependencies: [nanoarrow_dep, nanoarrow_c_dep], + subdir: 'nanoarrow', + install: true, +) diff --git a/python/subprojects/nanoarrow.wrap b/python/subprojects/nanoarrow.wrap new file mode 100644 index 000000000..ee8188152 --- /dev/null +++ b/python/subprojects/nanoarrow.wrap @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[wrap-file] +directory = arrow-nanoarrow-apache-arrow-nanoarrow-0.5.0 +source_url = https://github.com/apache/arrow-nanoarrow/archive/refs/tags/apache-arrow-nanoarrow-0.5.0.tar.gz +source_filename = apache-arrow-nanoarrow-0.5.0.tar.gz +source_hash = 0ceeaa1fb005dbc89c8c7d1b39f2dba07344e40aa9d885ee25fb55b4d57e331a +source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/nanoarrow_0.5.0-1/apache-arrow-nanoarrow-0.5.0.tar.gz +wrapdb_version = 0.5.0-1 + +[provide] +nanoarrow = nanoarrow_dep + +# For development in the arrow-nanoarrow source tree, you may want to provide +# an alternate wrap specification. The following example will pull whatever +# local project is committed to HEAD +# [wrap-git] +# url = ../.. +# revision = HEAD + +# [provide] +# nanoarrow = nanoarrow_dep From b48db86e43c5f2439b9161514cdecebdc4eb07ea Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 3 Jun 2024 16:46:55 -0700 Subject: [PATCH 2/5] Remove setup.py --- python/setup.py | 99 ------------------------------------------------- 1 file changed, 99 deletions(-) delete mode 100644 python/setup.py diff --git a/python/setup.py b/python/setup.py deleted file mode 100644 index bd205baf4..000000000 --- a/python/setup.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import os -import subprocess -import sys - -from setuptools import Extension, setup - - -# https://github.com/jbweston/miniver -def get_version(pkg_path): - """ - Load version.py module without importing the whole package. - - Template code from miniver. - """ - from importlib.util import module_from_spec, spec_from_file_location - - spec = spec_from_file_location("version", os.path.join(pkg_path, "_version.py")) - module = module_from_spec(spec) - spec.loader.exec_module(module) - return module.__version__ - - -version = get_version("src/nanoarrow") - - -# Run bootstrap.py to run cmake generating a fresh bundle based on this -# checkout or copy from ../dist if the caller doesn't have cmake available. -# Note that bootstrap.py won't exist if building from sdist. -this_dir = os.path.dirname(__file__) -bootstrap_py = os.path.join(this_dir, "bootstrap.py") -if os.path.exists(bootstrap_py): - subprocess.run([sys.executable, bootstrap_py]) - - -# Set some extra flags for compiling with coverage support -if os.getenv("NANOARROW_PYTHON_COVERAGE") == "1": - extra_compile_args = ["--coverage"] - extra_link_args = ["--coverage"] - extra_define_macros = [("CYTHON_TRACE", 1)] -elif os.getenv("NANOARROW_DEBUG_EXTENSION") == "1": - extra_compile_args = ["-g", "-O0"] - extra_link_args = [] - extra_define_macros = [] -else: - extra_compile_args = [] - extra_link_args = [] - extra_define_macros = [] - -setup( - ext_modules=[ - Extension( - name="nanoarrow._lib", - include_dirs=["src/nanoarrow", "vendor"], - language="c", - sources=[ - "src/nanoarrow/_lib.pyx", - "vendor/nanoarrow.c", - "vendor/nanoarrow_device.c", - ], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - define_macros=extra_define_macros, - ), - Extension( - name="nanoarrow._ipc_lib", - include_dirs=["src/nanoarrow", "vendor"], - language="c", - sources=[ - "src/nanoarrow/_ipc_lib.pyx", - "vendor/nanoarrow.c", - "vendor/nanoarrow_ipc.c", - "vendor/flatcc.c", - ], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - define_macros=extra_define_macros, - ), - ], - version=version, -) From c4faea5059bcda7f92f00604bd9fdfa3d0529d70 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 17 Jun 2024 12:48:21 -0400 Subject: [PATCH 3/5] Add device dep --- src/nanoarrow/meson.build | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/nanoarrow/meson.build b/src/nanoarrow/meson.build index bda83fb3a..7ecadf0f3 100644 --- a/src/nanoarrow/meson.build +++ b/src/nanoarrow/meson.build @@ -107,6 +107,10 @@ if needs_device target_type: libtype, cpp_args: device_defines, ) + + nanoarrow_device_dep = declare_dependency(include_directories: [incdir], + link_with: nanoarrow_device_lib, + dependencies: device_deps) endif if get_option('tests') or get_option('integration_tests') From 9cd35cfac7892c0fdfcdf528b8a2de7dc9564bd1 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 17 Jun 2024 12:59:38 -0400 Subject: [PATCH 4/5] Updates --- .gitignore | 3 ++ python/src/nanoarrow/meson.build | 53 +++++++++++++++++++++++-------- python/subprojects/nanoarrow.wrap | 32 +++++++++++-------- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 89fb9fe2f..1196540f1 100644 --- a/.gitignore +++ b/.gitignore @@ -33,5 +33,8 @@ __pycache__ subprojects/* !subprojects/packagefiles !subprojects/*.wrap +python/subprojects/* +!python/subprojects/packagefiles +!python/subprojects/*.wrap compile_commands.json diff --git a/python/src/nanoarrow/meson.build b/python/src/nanoarrow/meson.build index 5a3e664a9..678ec567e 100644 --- a/python/src/nanoarrow/meson.build +++ b/python/src/nanoarrow/meson.build @@ -27,31 +27,28 @@ if get_option('buildtype') == 'debug' cython_args += ['--gdb'] endif -nanoarrow_dep = dependency('nanoarrow') +# TODO: not sure these are the idiomatic approach to handle multiple dependencies +# from same project, where some require optionas being set +nanoarrow_dep = dependency('nanoarrow', default_options: ['ipc=true']) +#nanoarrow_device_dep = dependency('nanoarrow_device', default_options: ['device=true']) +nanoarrow_ipc_dep = dependency('nanoarrow_ipc', default_options: ['ipc=true']) generated_pyx = custom_target( 'generate-pyx', input: meson.current_build_dir(), output: 'nanoarrow_c.pxd', - command: [py, '../../bootstrap.py', '@INPUT@'], - install: true, - install_dir: '.', - #depends: nanoarrow_dep, - # ERROR: custom_target keyword argument 'depends' was of type - # array[InternalDependency] but should have been array[BuildTarget | - # CustomTarget] - seems fragile without this? - + command: [py, meson.current_source_dir() + '/../../bootstrap.py', '@INPUT@'], ) nanoarrow_c_dep = declare_dependency(sources: generated_pyx) -# TODO: we need dependencies on nanoarrow_ipc and nanoarrow_device, -# which should happen as part of https://github.com/apache/arrow-nanoarrow/pull/483 - py.extension_module( '_lib', sources: ['_lib.pyx'], cython_args: cython_args, - dependencies: [nanoarrow_dep, nanoarrow_c_dep], + dependencies: [ + nanoarrow_c_dep, + nanoarrow_dep, + ], subdir: 'nanoarrow/', install: true, ) @@ -60,7 +57,35 @@ py.extension_module( '_ipc_lib', sources: ['_ipc_lib.pyx'], cython_args: cython_args, - dependencies: [nanoarrow_dep, nanoarrow_c_dep], + dependencies: [ + nanoarrow_c_dep, + nanoarrow_dep, + nanoarrow_ipc_dep, + ], subdir: 'nanoarrow', install: true, ) + +py_sources = [ + 'array.py', + 'array_stream.py', + 'c_array.py', + 'c_array_stream.py', + 'c_buffer.py', + 'c_schema.py', + 'device.py', + 'ipc.py', + 'iterator.py', + '_repr_utils.py', + 'schema.py', + '_static_version.py', + '_version.py', + 'visitor.py', +] + +foreach source: py_sources + py.install_sources( + source, + subdir: 'nanoarrow' + ) +endforeach diff --git a/python/subprojects/nanoarrow.wrap b/python/subprojects/nanoarrow.wrap index ee8188152..5887a514e 100644 --- a/python/subprojects/nanoarrow.wrap +++ b/python/subprojects/nanoarrow.wrap @@ -15,23 +15,27 @@ # specific language governing permissions and limitations # under the License. -[wrap-file] -directory = arrow-nanoarrow-apache-arrow-nanoarrow-0.5.0 -source_url = https://github.com/apache/arrow-nanoarrow/archive/refs/tags/apache-arrow-nanoarrow-0.5.0.tar.gz -source_filename = apache-arrow-nanoarrow-0.5.0.tar.gz -source_hash = 0ceeaa1fb005dbc89c8c7d1b39f2dba07344e40aa9d885ee25fb55b4d57e331a -source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/nanoarrow_0.5.0-1/apache-arrow-nanoarrow-0.5.0.tar.gz -wrapdb_version = 0.5.0-1 +# This section could be uncommented for releases if we want to support +# standalone installations from an sdist +# [wrap-file] +# directory = arrow-nanoarrow-apache-arrow-nanoarrow-0.5.0 +# source_url = https://github.com/apache/arrow-nanoarrow/archive/refs/tags/apache-arrow-nanoarrow-0.5.0.tar.gz +# source_filename = apache-arrow-nanoarrow-0.5.0.tar.gz +# source_hash = 0ceeaa1fb005dbc89c8c7d1b39f2dba07344e40aa9d885ee25fb55b4d57e331a +# source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/nanoarrow_0.5.0-1/apache-arrow-nanoarrow-0.5.0.tar.gz +# wrapdb_version = 0.5.0-1 -[provide] -nanoarrow = nanoarrow_dep +# [provide] +# nanoarrow = nanoarrow_dep # For development in the arrow-nanoarrow source tree, you may want to provide # an alternate wrap specification. The following example will pull whatever # local project is committed to HEAD -# [wrap-git] -# url = ../.. -# revision = HEAD +[wrap-git] +url = ../.. +revision = HEAD -# [provide] -# nanoarrow = nanoarrow_dep +[provide] +nanoarrow = nanoarrow_dep +nanoarrow_ipc = nanoarrow_ipc_dep +nanoarrow_device = nanoarrow_device_dep From e5cca77610dc13ef1d4a72bdd51d2e534e99abb9 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 17 Jun 2024 15:17:06 -0400 Subject: [PATCH 5/5] typo fix --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 814621268..6ae54b306 100644 --- a/meson.build +++ b/meson.build @@ -121,7 +121,7 @@ if needs_device cpp_args: device_defines, ) - nanoarrow_device_dep = declare_dependency(include_directories: [incdgir], + nanoarrow_device_dep = declare_dependency(include_directories: [incdir], link_with: nanoarrow_device_lib, dependencies: device_deps) endif