#!/usr/bin/python

"""
read_cw_stats_stddev.py

reads a CW Utilities Library output stats file and from a column,
determines the min and max stddev values

sorts stats files by estuary and then loops through months

outputs the min of the min stddevs (across years, then months) and prints
the max & min stddev for each estuary (max of the maxes, min of the mins,
for all years, for all months)

calculates needed sample size ("needed N") to achieve 0.3 deg C confidence
interval width @ 95% confidence for each estuary's low stddev and high stddev
    uses formula for sample size determination:  16s^2/(conf-interval^2)
              see README_signif_small_N.txt

stddev = 7th column of CW stats file

dir = /disks/data035/data/podaac-mur/sst/monthly
file = JPLMUR_MONTH-BY-YEAR_12DEC-2007-2017_L4ANALYSIS_SST_ALB_1KM_STATS.txt

output text file:
   estuary Stddev-low Stddev-high Needed-N-low Needed-N-high

Ron Vogel, UMD/ESSIC/CICS
Oct 12, 2018
"""

import sys
import os
import shutil
import glob

estuaries = [ 'ALB', 'BRN', 'BZB', 'CAS', 'CD', 'CNV', 'DEL', 'GRB',
              'LEH', 'LIS', 'PAM' ]
#months = [ '01JAN', '02FEB', '03MAR', '04APR', '05MAY', '06JUN', '07JUL',
#           '08AUG', '09SEP', '10OCT', '11NOV', '12DEC' ]
dir = '/disks/data035/data/podaac-mur/sst/monthly/'


for i_est in range(0,11):

   # glob.glob reads the list of files in dir wildcarded with '*' as specified
   files = glob.glob(dir + 'JPLMUR_MONTH-BY-YEAR_*' + estuaries[i_est] + '*_STATS.txt') 
   files_sorted = sorted(files)

   max_all_months = [0]
   min_all_months = [0]

   for file in files_sorted:

       cwstats_file = open(file, 'r')
       cwstats_lines = cwstats_file.readlines()
       # remove whitespace and create list of all items (as strings)
       cwstats_list = [statsrow.split() for statsrow in cwstats_lines]
       # print number of rows & cols of list
       #print 'rows: ' + str(len(cwstats_list))
       #print 'cols: ' + str(len(cwstats_list[0]))
       # extract column of stddevs
       cwstats_stddevs = [col[6] for col in cwstats_list]
       # convert strings to floats
       stddevs = [float(stdv) for stdv in cwstats_stddevs]
       # max/min of each file is max/min across years
       max_all_yrs = max(stddevs)
       min_all_yrs = min(stddevs)

       # append max/min for all months
       max_all_months.append(max_all_yrs)
       min_all_months.append(min_all_yrs)

   max_this_est = max(max_all_months[1:])
   min_this_est = min(min_all_months[1:])

   # Needed N at 95% confidence level
   #needed_n_high = (16*(max_this_est**2))/(0.3**2)
   #needed_n_low = (16*(min_this_est**2))/(0.3**2)

   # Needed N at 99% confidence level
   needed_n_high = (26.5*(max_this_est**2))/(0.3**2)
   needed_n_low = (26.5*(min_this_est**2))/(0.3**2)

   print '{0} {1:.5f} {2:.5f} {3:6d} {4:6d}'.format(estuaries[i_est], max_this_est, min_this_est, int(round(needed_n_high)), int(round(needed_n_low)))


