#!/data/home004/ronald.vogel/.conda/envs/rv_env_main/bin/python3
#calc_CBP_all-bay_monthly_median_ts.py
#
#calculates monthly median chlorophyll from CBP insitu data for all
#available mainstem stations per month (49 max # stations)
#
#the CBP insitu data must be in CSV files for all mainstem stations for
#full years (12 months)
#
#the output from this code can be plotted with VIIRS-SQ monthly median
#chlorophyll from cwstats output (uses monthly median chl VRSRCW files 
#and calc's *spatial* median with mainstem "CD" polygon)
#
#input:  CBP water quality monitoring yearly file (all stations in one
#        yearly file) as CSV file
#        e.g. CBP_WaterQualityStationMainstem_CHLOR_2012.csv
#        located in: 
#        /disks/data563/rvogel/chlor_cb_viirs-sq_timeseries_reports
#
#output: text file containing time series of monthly median chl for all
#        mainstem stations for year 2012-2022
#
#usage:  ./calc_CBP_all-bay_monthly_median_ts.py
#
#this code was based on: matchup_MUR_CBP_by_station.py
#
#Ron Vogel, UMD/ESSIC
#Mar 7, 2023

import os
from datetime import datetime
from numpy import nanmedian  # calculates median ignoring NaN values
# modules not needed:
#import sys
#import re
#import shutil
#import commands
#import csv

# directory containing CBP water quality monitoring data files
cbp_dir = '/disks/data563/rvogel/chlor_cb_viirs-sq_timeseries_reports'

# open output file for appending (i.e. add new lines to an existing file)
# - all monthly medians for all years are written in time order to this file
ts_output_file = open('CBP_chl_monthly_median_allstations_2012-2022.txt', 'a')


for file_yr in range(2012,2022+1):

      # get CBP filenames
      cbp_file = 'CBP_WaterQualityStation_49mainstem_' + str(file_yr) + '.csv'
      cbp_path_file = os.path.join(cbp_dir,cbp_file)

      print ('Now reading CBP data for year: ' + str(file_yr))

      # open file and read all lines
      cbp_records = open(cbp_path_file, 'r')
      cbp_records_list = cbp_records.readlines()
      # get total number of records: 'Total_Records' (last line of CBP file)
      total_records = cbp_records_list[-1]
      last_line_to_read = int(total_records.split()[-1])

      #TEST
      #print ('Number of lines in yearly file: ' + str(last_line_to_read))

      # need 12 lists to hold insitu values, one for each month
      chl_allsta_month_1 = []
      chl_allsta_month_2 = []
      chl_allsta_month_3 = []
      chl_allsta_month_4 = []
      chl_allsta_month_5 = []
      chl_allsta_month_6 = []
      chl_allsta_month_7 = []
      chl_allsta_month_8 = []
      chl_allsta_month_9 = []
      chl_allsta_month_10 = []
      chl_allsta_month_11 = []
      chl_allsta_month_12 = []

      # read each line to get CBP monitoring sample values
      #      BUT do not read first line (header)
      #      AND only read up to and including last sample (last line of file
      #          contains "Total_Records:" )
      for cbp_indx in range(1,last_line_to_read+1):
         cbp_info = cbp_records_list[cbp_indx].split(',')
         #cbp_station = cbp_info[0].replace('"','')   # removing quotes
         cbp_date    = cbp_info[8]
         cbp_layer   = cbp_info[14].replace('"','').strip()
         cbp_chl     = cbp_info[19]

         # only use surface "S" layer
         if cbp_layer != 'S':
            continue

         if cbp_chl == '':
            cbp_chl = 'nan'

         cbp_date_object = datetime.strptime(cbp_date, '%m/%d/%Y')
         cbp_yr  = cbp_date_object.strftime('%Y')
         #cbp_doy = cbp_date_object.strftime('%j')
         cbp_mon = cbp_date_object.strftime('%-m') # %m = month w/leading 0
                                                   # %-m = month w/o leading 0

         #TEST
         #if file_yr == 2012 and cbp_indx == 70:
         #   print ('Date:  ' + str(cbp_date))
         #   print ('Layer: ' + str(cbp_layer))
         #   print ('Chl:   ' + str(cbp_chl))
         #   print ('Year:  ' + str(cbp_yr))
         #   print ('Month: ' + str(cbp_mon))

         if cbp_mon == '1':
            chl_allsta_month_1.append(float(cbp_chl))
         elif cbp_mon == '2':
            chl_allsta_month_2.append(float(cbp_chl))
         elif cbp_mon == '3':
            chl_allsta_month_3.append(float(cbp_chl))
         elif cbp_mon == '4':
            chl_allsta_month_4.append(float(cbp_chl))
         elif cbp_mon == '5':
            chl_allsta_month_5.append(float(cbp_chl))
         elif cbp_mon == '6':
            chl_allsta_month_6.append(float(cbp_chl))
         elif cbp_mon == '7':
            chl_allsta_month_7.append(float(cbp_chl))
         elif cbp_mon == '8':
            chl_allsta_month_8.append(float(cbp_chl))
         elif cbp_mon == '9':
            chl_allsta_month_9.append(float(cbp_chl))
         elif cbp_mon == '10':
            chl_allsta_month_10.append(float(cbp_chl))
         elif cbp_mon == '11':
            chl_allsta_month_11.append(float(cbp_chl))
         elif cbp_mon == '12':
            chl_allsta_month_12.append(float(cbp_chl))

      #TEST: visually ck values for one month for all stations
      if file_yr == 2022:
      #   print ('Check these values exist for Jan 2012 for all stations')
          print ('Length of filled Jan 2022 list: ' + str(len(chl_allsta_month_1)))
          print(chl_allsta_month_1)
          print ('Length of filled Feb 2022 list: ' + str(len(chl_allsta_month_2)))
          print(chl_allsta_month_2)
          print ('Length of filled Mar 2022 list: ' + str(len(chl_allsta_month_3)))
          print(chl_allsta_month_3)
          print ('Length of filled Apr 2022 list: ' + str(len(chl_allsta_month_4)))
          print(chl_allsta_month_4)
          print ('Length of filled May 2022 list: ' + str(len(chl_allsta_month_5)))
          print(chl_allsta_month_5)

      # calc median for each list and write to output file
      mon_1_median = nanmedian(chl_allsta_month_1)
      mon_2_median = nanmedian(chl_allsta_month_2)
      mon_3_median = nanmedian(chl_allsta_month_3)
      mon_4_median = nanmedian(chl_allsta_month_4)
      mon_5_median = nanmedian(chl_allsta_month_5)
      mon_6_median = nanmedian(chl_allsta_month_6)
      mon_7_median = nanmedian(chl_allsta_month_7)
      mon_8_median = nanmedian(chl_allsta_month_8)
      mon_9_median = nanmedian(chl_allsta_month_9)
      mon_10_median = nanmedian(chl_allsta_month_10)
      mon_11_median = nanmedian(chl_allsta_month_11)
      mon_12_median = nanmedian(chl_allsta_month_12)

      cbp_records.close()

      # write median for each month to output file
      # separated by commas
      ts_output_file.write(','.join(['{:.3f}'.format(x) for x in [mon_1_median, mon_2_median, mon_3_median, mon_4_median, mon_5_median, mon_6_median, mon_7_median, mon_8_median, mon_9_median, mon_10_median, mon_11_median, mon_12_median]]) + '\n')

ts_output_file.close()


