#!/data/home006/ronald.vogel/.conda/envs/rv_pip_installs/bin/python3

# download data from an ERDDAP server using xarray's OpenDAP feature
# for a geographic subset of that data
# - required input on command line:
#        start day
#        end day
#        w bound coord 
#        e bound coord 
#        s bound coord 
#        n bound coord
#        subsetted region name
# - currently this advances daily data downloads from one day to the next
#   (need to modify this code for different time steps)
# - for downloading a single day, use same day for start and end days
# - geographically subsets data for a rectangular lat/lon coordinate box
# - region name is currently NOT added to the downloaded data filename
# - ERDDAP server is currently hardcoded

# usage: ./erddap_dwnld_data_xarray.py --start_day [start day of dwnld] --end_day [end day of dwnld] --wcoord [W bound coord to subset] --ecoord [E bound coord to subset] --scoord [S bound coord to subset] --ncoord [N bound coord to subset] --region_name [name of geographic region subsetted]

#    - day format, string: YYYYMMDD
#                    for example: 20181207
#                                 20230729
#    - coord format: -90.0 - +90.0, -180.0 - +180.0
#                    for example: -132.0
#    - region name format: string
#                    for example: CONUS

# Code from Dale Robinson, WCN, 2023-08-03
# Updates:
# - arg parsing added by Ron Vogel, 2023-08-10
# - accounting for missing data files on ERDDAP server and adding 
#   time and geospatial metadata by Dale Robinson, 2024-07-02


import xarray as xr
from datetime import date, timedelta
import argparse
from dateutil.parser import parse


# get start_day and end_day from the arguments
doc_formatter = argparse.RawDescriptionHelpFormatter

parser = argparse.ArgumentParser(description=__doc__,
                                 formatter_class=doc_formatter)

parser.add_argument('-a', '--start_day',
                    help='First day to download YYYYMMDD',
                    required=True)
parser.add_argument('-z', '--end_day',
                    help='Last day to download YYYYMMDD',
                    required=True)
parser.add_argument('-w', '--wcoord',
                    help='W-bounding coord to subset',
                    type=float,
                    required=True)
parser.add_argument('-e', '--ecoord',
                    help='E-bounding coord to subset',
                    type=float,
                    required=True)
parser.add_argument('-s', '--scoord',
                    help='S-bounding coord to subset',
                    type=float,
                    required=True)
parser.add_argument('-n', '--ncoord',
                    help='N-bounding coord to subset',
                    type=float,
                    required=True)
parser.add_argument('-r', '--region_name',
                    help='Name of geographically subsetted region',
                    required=True)
args = parser.parse_args()

def main():

    # set constants
    # make date objects with dateutil.parser
    start_dt = parse(args.start_day)
    end_dt = parse(args.start_day)
    
    # rectangular bounding coordinates for geographic subset
    w_coord = args.wcoord
    e_coord = args.ecoord
    s_coord = args.scoord
    n_coord = args.ncoord
    reg_name = args.region_name
    
    # set the count of days to the first day
    delta = timedelta(days=1)
    
    ofile_templ = 'mursst_{}_wcn_erddap.nc'

    # use the ERDDAP opendap url
    ##### modify for other ERDDAP servers #####
    full_url = 'https://coastwatch.pfeg.noaa.gov/erddap/griddap/jplMURSST41'

    print ("Downloading data from WCN-ERDDAP for dates %s to %s" % (args.start_day, args.end_day) )
    print ("     and for region %s" % (args.region_name) )

    # point to the dataset
    df = xr.open_dataset(full_url)

    #TEST:
    # view the dataset metadata
    #print(df)


    # Step 1. subset the data by lat,lon for the ENTIRE time series
    df_latlon = df.sel(longitude=slice(float(w_coord), float(e_coord)),
                       latitude=slice(float(s_coord), float(n_coord))
                       )

    # Step 2. subset the time series for the needed dates, add 
    #         time and geospatial metadata, and write out each daily file
    while start_dt <= end_dt:

        print('start', start_dt)
        df_subset = df_latlon.sel(time='{0:%Y-%m-%d}'.format(start_dt))
        
        # Add this to catch when the daily data is missing
        if len(df_subset.time) == 0:
            start_dt += delta
            print('ERROR: data is missing or not subsetting')
            continue

        # Make the date for the file
        file_date = '{0:%Y%m%d}'.format(start_dt)
        # Make the date for the metadata
        meta_date = '{0:%Y-%m-%dT09:00:00Z}'.format(start_dt)
        
        # Reset the time metadata attributes
        df_subset.attrs['time_coverage_end'] = meta_date
        df_subset.attrs['time_coverage_start'] = meta_date
        
        # Reset lat/lon metadata attributes
        df_subset.attrs['geospatial_lat_max'] = df_subset.latitude.max().item()
        df_subset.attrs['geospatial_lat_min'] = df_subset.latitude.min().item()
        df_subset.attrs['geospatial_lon_max'] = df_subset.longitude.max().item()
        df_subset.attrs['geospatial_lon_min'] = df_subset.longitude.min().item()
        
        # Remove some unneeded attributes
        del df_subset.attrs['Westernmost_Easting']
        del df_subset.attrs['Easternmost_Easting']
        del df_subset.attrs['Southernmost_Northing']
        del df_subset.attrs['Northernmost_Northing']

        # save the file
        df_subset.to_netcdf(ofile_templ.format(file_date),
                            format= 'NETCDF3_CLASSIC')

        # advance to the next day
        print('end', start_dt)
        start_dt += delta
    print('End WCN-download script')
if __name__ == "__main__":
    main()
