#!/usr/bin/python

"""
matchup_MUR_CBP_by_station.py

Performs matchup of PODAAC MUR SST with Chesapeake Bay Program (CBP) water
temperature by CBP station and date of water sample collection.  MUR SST
daily statistics file (which are station-specific) is searched for the CBP
date, and both temperatures (MUR and CBP) are written to a matchup results
file (also station-specific).

procedure:
   loop thru CBP files year by year
   rd each CBP file line by line
   rd station, date, layer, wtemp value
        for layer, use surface "S" only
        convert date to YYYY JJJ
   for station determine JPLMUR-DAILY txt file
   search JPLMUR-DAILY file line by line for date and select mean
   write result (date, CBP value, MUR value) to file (all
      dates and years in one station-specific file)

input:   1) PODAAC MUR SST daily statistics file (station-specific, all years
            in one file)
            e.g. JPLMUR_DAILY-2002-2017_L4ANALYSIS_SST_CBPsite001_1KM_STATS.txt
            located in: /disks/data035/data/podaac-mur/sst/daily
         2) CBP water quality monitoring yearly file (all stations in one
            yearly file)
            e.g. CBP_WaterQualityStationMainstem_WTemp_2010.csv
            located in: /data/data015/pub/rvogel/sst_mur_validation

output:  match-up text file listing MUR and CBP water temp for each date
         (file is station-specific)
         matchup_MUR_CBP_station_CB3.3C.txt

usage:   ./matchup_MUR_CBP_by_station.py


Ron Vogel, SMRC for NOAA CoastWatch
May 5, 2017
"""


import os
from datetime import datetime
# modules not needed:
#import sys
#import re
#import shutil
#import commands
#import csv

# directory containing CBP water quality monitoring data files
cbp_dir = '/data/data015/pub/rvogel/sst_mur_validation'

# create sorted list of MUR filenames (for indexing by CBP station name)
mur_dir = '/disks/data035/data/podaac-mur/sst/daily'
mur_files = os.listdir(mur_dir)
mur_files_to_index = []
for mur_file in mur_files:
   if mur_file.startswith('JPLMUR_DAILY-2002-2017_L4ANALYSIS_SST_CBPsite'):
      mur_files_to_index.append(mur_file)
mur_files_list = sorted(mur_files_to_index)

# CBP station name list
cbp_station_names = [ "CB1.1", "CB2.1", "CB2.2", "CB3.1", "CB3.2", "CB3.3C",
                      "CB4.1C", "CB4.2C", "CB4.3C", "CB4.4", "CB5.1", "CB5.2",
                      "CB5.3", "CB5.4", "CB5.5", "CB6.1", "CB6.2", "CB6.3",
                      "CB6.4", "CB7.1", "CB7.1N", "CB7.1S", "CB7.2", "CB7.2E",
                      "CB7.3", "CB7.3E", "CB7.4", "CB7.4N", "CB8.1", "CB8.1E",
                      "EE3.4", "EE3.5", "LE2.3", "LE3.6", "LE3.7", "LE5.5-W",
                      "WE4.1", "WE4.2", "WE4.3", "WE4.4" ]

# Start Matchup

for file_yr in range(2003,2016+1):

      # get CBP filenames
      cbp_file = 'CBP_WaterQualityStationMainstem_WTemp_' + str(file_yr) + '.csv'
      cbp_path_file = os.path.join(cbp_dir,cbp_file)

      print 'Now reading CBP data for year: ' + str(file_yr)

      # open file and read all lines
      cbp_records = open(cbp_path_file, 'r')
      cbp_records_list = cbp_records.readlines()
      # get total number of records: 'Total_Records' (last line of CBP file)
      total_records = cbp_records_list[-1]
      last_line_to_read = int(total_records.split()[-1])

      # only print CBP station name once to screen
      cbp_station_to_print = 'dummy name'

      # read each line to get CBP monitoring sample values
      #      BUT do not read first line (header)
      #      AND only read up to and including last sample (last line of file
      #          contains "Total_Records:" )
      for cbp_indx in range(1,last_line_to_read+1):
         cbp_info = cbp_records_list[cbp_indx].split(',')
         cbp_station = cbp_info[0].replace('"','')   # removing quotes
         cbp_date    = cbp_info[8]
         cbp_layer   = cbp_info[14].replace('"','').strip()
         cbp_temp    = cbp_info[19]

         # only use surface "S" layer
         if cbp_layer != 'S':
            continue

         cbp_date_object = datetime.strptime(cbp_date, '%m/%d/%Y')
         cbp_yr  = cbp_date_object.strftime('%Y')
         cbp_doy = cbp_date_object.strftime('%j')

         if cbp_station != cbp_station_to_print:
            print '  Finding MUR matchups for CBP station: ' + cbp_station
            cbp_station_to_print = cbp_station

         # get MUR station-specific file given CBP station name & its index
         cbp_station_index = cbp_station_names.index(cbp_station)
         mur_file_name = mur_files_list[cbp_station_index]
         mur_records = open(mur_dir + '/' + mur_file_name, 'r')

         # open station-specific output matchup file for appending (i.e. add
         # new lines to an existing file)
         matchup_output = open('matchup_MUR_CBP_' + cbp_station + '_allyears.txt', 'a')

         # search for year and doy in MUR file
         for mur_record in mur_records:
            mur_info = mur_record.split()
            mur_yr   = mur_info[0]
            mur_doy  = mur_info[1]
            mur_temp = mur_info[6]

            if cbp_yr == mur_yr and cbp_doy == mur_doy:

               # print statements to check code is working correctly
               #print 'Station: ' + cbp_station
               #print '   CBP date: ' + cbp_date
               #print '      CBP doy: ' + cbp_doy
               #print '      MUR doy: ' + mur_doy
               #print '   CBP temp:   ' + cbp_temp
               #print '   MUR temp:   ' + mur_temp

               # write output to matchup file
               matchup_output.write(mur_yr + ' ' + cbp_date + ' ' + mur_temp + ' ' + cbp_temp + '\n')
               # Note: if mur_doy does not exist in MUR file (no MUR file for
               #       that day), then there will be no matchup MUR value for
               #       given CBP day. No line is written to matchup_output file
               #       to indicate that CBP exists and MUR does not.
               # Can use Python csv module to check columns for existence of
               # MUR day?

         matchup_output.close()
         mur_records.close()
      cbp_records.close()

