Zonal statistics for each time step in a NetCDF data using rasterstats in python


NetCDF is a common format when we are talking about the scentific data. Here we can see an example of air temperature data structure. NetCDF data format

The following is the python code for calculating zonal statistics using rasterstats for each time step in a NetCDF.

import netCDF4
import rasterio as rio
import numpy
import rasterstats as rstats
import datetime

wkt='POLYGON ((81 26, 86 26, 86 29, 81 29, 81 26))'
parameterName='temperature'
NetCDFFileCompletePath='/home/username/temperatureData.nc'

nc_fid = netCDF4.Dataset(NetCDFFileCompletePath, 'r')  # Reading the netCDF file
lis_var = nc_fid.variables

lats = nc_fid.variables['latitude'][:]  # Defining the latitude array (make sure variable name is latitude)
lons = nc_fid.variables['longitude'][:]  # Defining the longitude array (make sure variable name is longitude)

field = nc_fid.variables[parameterName][:]  # Defining the variable array
time = nc_fid.variables['time'][:]

deltaLats = lats[1] - lats[0]
deltaLons = lons[1] - lons[0]

deltaLatsAbs = numpy.abs(deltaLats)
deltaLonsAbs = numpy.abs(deltaLons)

geotransform = rio.transform.from_origin(lons.min(), lats.max(), deltaLatsAbs, deltaLonsAbs)

seriesData = []

for timestep, v in enumerate(time):

    nc_arr = field[timestep]
    nc_arr[nc_arr < -9000] = numpy.nan  # use the comparator to drop nodata fills
    if deltaLats > 0:
        nc_arr = nc_arr[::-1]  # vertically flip array so orientation is right

    dt_str = netCDF4.num2date(lis_var['time'][timestep], units=lis_var['time'].units,
                              calendar=lis_var['time'].calendar)
    strTime = str(dt_str)
    dt_str = datetime.datetime.strptime(strTime, '%Y-%m-%d %H:%M:%S')
    dateInmillisecond = dt_str.timestamp() * 1000

    tt = rstats.zonal_stats(wkt, nc_arr, affine=geotransform, stats='mean')
    interestedValue=tt[0]['mean']

    if interestedValue:
        value = round(interestedValue, 3)
        seriesData.append([int(dateInmillisecond), value])
    else:
        seriesData.append([int(dateInmillisecond), None])
nc_fid.close()