Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ def test_table_empty():
"""
schema = pa.schema([pa.field(name='ints', type=pa.list_(pa.int32()), metadata={b'key1': b'value1'})], metadata={b'key1': b'value1'})
table = pa.Table.from_batches([], schema=schema)
new_table = rust.build_table([], schema=schema)
new_table = rust.build_table((), schema=schema)

assert table == new_table
assert table.schema == new_table.schema
Expand Down
24 changes: 20 additions & 4 deletions arrow-pyarrow-testing/tests/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use arrow_array::{
use arrow_pyarrow::{FromPyArrow, ToPyArrow};
use pyo3::exceptions::PyTypeError;
use pyo3::types::{PyAnyMethods, PyModule};
use pyo3::Python;
use pyo3::{IntoPyObject, Python};
use std::ffi::CString;
use std::sync::Arc;

Expand All @@ -56,8 +56,7 @@ fn test_to_pyarrow() {
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"]));
// The "very long string" will not be inlined, and force the creation of a data buffer.
let c: ArrayRef = Arc::new(StringViewArray::from(vec!["short", "a very long string"]));
let input = RecordBatch::try_from_iter(vec![("a", a), ("b", b), ("c", c)]).unwrap();
println!("input: {input:?}");
let input = RecordBatch::try_from_iter([("a", a), ("b", b), ("c", c)]).unwrap();

let res = Python::attach(|py| {
let py_input = input.to_pyarrow(py)?;
Expand All @@ -70,6 +69,24 @@ fn test_to_pyarrow() {
assert_eq!(input, res);
}

#[test]
fn test_to_pyarrow_pair() {
Python::initialize();

let a: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b"]));
let input = RecordBatch::try_from_iter([("a", a), ("b", b)]).unwrap();

let res = Python::attach(|py| {
let record_batch = input.to_pyarrow(py)?;
let tuple = (record_batch.clone(), record_batch).into_pyobject(py)?;
Vec::<RecordBatch>::from_pyarrow_bound(&tuple)
})
.unwrap();
assert_eq!(input, res[0]);
assert_eq!(input, res[1]);
}

#[test]
fn test_to_pyarrow_byte_view() {
Python::initialize();
Expand All @@ -84,7 +101,6 @@ fn test_to_pyarrow_byte_view() {
])
.unwrap();

println!("input: {input:?}");
let res = Python::attach(|py| {
let py_input = input.to_pyarrow(py)?;
let records = RecordBatch::from_pyarrow_bound(&py_input)?;
Expand Down
14 changes: 8 additions & 6 deletions arrow-pyarrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,18 +321,20 @@ impl ToPyArrow for ArrayData {

impl<T: FromPyArrow> FromPyArrow for Vec<T> {
fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
let list = value.cast::<PyList>()?;
list.iter().map(|x| T::from_pyarrow_bound(&x)).collect()
let mut v = Vec::with_capacity(value.len().unwrap_or(0));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not keep the collect syntax?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a micro-opt to pre-allocate the Vec at the expected size. .collect() won't have a visibility on it because of PyAny::try_iter. Glad to revert if you prefer

for item in value.try_iter()? {
v.push(T::from_pyarrow_bound(&item?)?);
}
Ok(v)
}
}

impl<T: ToPyArrow> ToPyArrow for Vec<T> {
fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
let values = self
.iter()
self.iter()
.map(|v| v.to_pyarrow(py))
.collect::<PyResult<Vec<_>>>()?;
Ok(PyList::new(py, values)?.into_any())
.collect::<PyResult<Vec<_>>>()?
.into_pyobject(py)
}
}

Expand Down
Loading