Skip to content

Parallel Process Zonal Statistics

Daniel Hamill edited this page Jan 15, 2020 · 1 revision
from glob import glob
from rasterstats import zonal_stats
import rasterio
import numpy as np
import os
from datetime import date as dtdate
import pandas as pd
import geopandas as gpd
from joblib import Parallel, delayed


def clip_me(i,date, gdf,out_root):
       
    names, means, dates = [] , [],[]
    zs = zonal_stats(gdf, i,no_data = -9999 , raster_out=True ,all_touched = True)
    
    for item, name  in zip(zs, gdf.Name.str.replace(' ' , '_').tolist()):
        oName = name+ '_' + date.strftime(format="%Y_%m_%d") +'.tif'
        try:
            os.mkdir(out_root + os.sep + name)
        except:
            pass
        
        with rasterio.open(out_root + os.sep + name+ os.sep + oName, 'w',driver='GTiff', 
                       height = item['mini_raster_array'].shape[0], width = item['mini_raster_array'].shape[1], 
                       dtype=rasterio.dtypes.int16, 
                       crs='+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs', count = 1, transform=item['mini_raster_affine']) as dst:
            dst.write(item['mini_raster_array'].astype('int16').filled(-9999),1)
            dst.nodata = -9999
        try:
            mean_swe_depth = np.round(item['mean'],3)
        except:
            mean_swe_depth = np.nan
        names.append(name)
        means.append(mean_swe_depth)
        dates.append(date)
            
    return {'Date':dates, 'Mean_Depth':means, 'Name':names}

    
if __name__ == '__main__':
    
    dsts = glob(r"E:\ririe\rasters\UA SWE Data\depth\*alb.tif")
    dates = [dtdate(int(i.split('_')[-4]), int(i.split('_')[-3]), int(i.split('_')[-2][:2])) for i in dsts]
    
    # df = pd.DataFrame({'fpath':dsts,'date':dates})
    # df.date = pd.to_datetime(df.date)
    # df = df[df.date.dt.month.isin([9,10,11,12,1,2,3])]
    
    gdf = gpd.read_file(r"E:\ririe\shp\total_watershed.shp")
    gdf = gdf.to_crs('+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs')
    
    
    out_root = r"C:\workspace\ririe\Frozen_Ground_Data\Snow_Depth\ririe_watershed"
    w = Parallel(n_jobs=-1)(delayed (clip_me)(fpath, date, gdf, out_root) for fpath, date in zip(dsts, dates) )
    
    
    dates = np.ravel([i['Date'] for i in w])
    names = np.ravel([i['Name'] for i in w])
    mean_depth = np.ravel([i['Mean_Depth'] for i in w])
    df = pd.DataFrame({'name':names, 'date':dates, 'mean_depth':mean_depth})
    
    
    df = df.pivot(index='date',columns='name', values='mean_depth')
    
    df.to_excel(r"C:\workspace\ririe\Frozen_Ground_Data\Snow_Depth\basin_average_snow_depth.xlsx")

Clone this wiki locally