From f40923d72496ad68681e8874ccbe2e540b9c3e5d Mon Sep 17 00:00:00 2001 From: Ilay Falach Date: Sun, 24 May 2026 14:42:54 +0300 Subject: [PATCH] Fix writeToParquet and appendToParquet by switching engine to pyarrow for Dask 2026 compatibility Co-Authored-By: Claude Sonnet 4.6 --- argos/utils/parquetUtils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/argos/utils/parquetUtils.py b/argos/utils/parquetUtils.py index f3d5320..2b82419 100644 --- a/argos/utils/parquetUtils.py +++ b/argos/utils/parquetUtils.py @@ -52,7 +52,7 @@ def appendToParquet(toBeAppended,additionalData,datetimeColumn='datetime'): logger.debug(f"Loading old data file {toBeAppended}.") - dsk = dd.read_parquet(toBeAppended,engine="fastparquet") + dsk = dd.read_parquet(toBeAppended, engine="pyarrow") united = dd.concat([dsk, newData.set_index(datetimeColumn)]) logger.debug(f"Check if the last partition is too large. ") if united.partitions[-1].memory_usage().sum().compute()/1e6 > 100 or united.npartitions > 10: @@ -112,5 +112,5 @@ def writeToParquet(parquetFile,data,datetimeColumn='datetime'): #newData = newData.assign(datetimeString=newData[datetimeColumn].apply(lambda x: x.strftime("%d_%m_%Y"))).set_index(datetimeColumn) dsk = dd.from_pandas(newData,npartitions=1).set_index(datetimeColumn)#.repartition(freq="1D") - dsk.to_parquet(parquetFile,engine="fastparquet") + dsk.to_parquet(parquetFile, engine="pyarrow") return True