# Weather::GHCN::Extremes.pm - analyze of extremes from Weather::GHCN::Fetch.pm output

=head1 NAME

Weather::GHCN::App::Extremes - Report temperature extremes from Weather::GHCN::Fetch output

=head1 VERSION

version v0.0.003

=head1 SYNOPSIS

    use Weather::GHCN::App::Extremes;

    Weather::GHCN::App::Extremes->run( \@ARGV );

See ghcn_extremes -help for details.

=cut

########################################################################
# Pragmas
########################################################################
use v5.18;

package Weather::GHCN::App::Extremes;

our $VERSION = 'v0.0.003';

use feature 'signatures';
no warnings 'experimental::signatures';

########################################################################
# Export
########################################################################

require Exporter;

use base 'Exporter';

our @EXPORT = ( 'run' );

########################################################################
# perlcritic rules
########################################################################

## no critic [Subroutines::ProhibitSubroutinePrototypes]
## no critic [References::ProhibitDoubleSigils];
## no critic [ErrorHandling::RequireCarping]

########################################################################
# Libraries
########################################################################
use Getopt::Long    qw( GetOptionsFromArray );
use Pod::Usage;
use Const::Fast;
use Hash::Wrap      {-lvalue => 1, -defined => 1, -as => '_wrap_hash'};

use ControlBreak;
use List::Util      qw(max min sum);
use Set::IntSpan::Fast;

# modules for Windows only
use if $^O eq 'MSWin32', 'Win32::Clipboard';

########################################################################
# Global delarations
########################################################################

# is it ok to use Win32::Clipboard?
our $USE_WINCLIP = $^O eq 'MSWin32';

my $Opt;

my @ExtremeWaves;
my %Location;

########################################################################
# Constants
########################################################################

const my $EMPTY  => q();        # empty string
const my $SPACE  => q( );       # space character
const my $TAB    => qq(\t);     # tab character
const my $DASH   => q(-);       # dash character
const my $TRUE   => 1;          # perl's usual TRUE
const my $FALSE  => not $TRUE;  # a dual-var consisting of '' and 0

const my $DEFAULT_HOT_LIMIT  => 30;
const my $DEFAULT_COLD_LIMIT  => -20;
const my $DEFAULT_NDAYS => 5;

########################################################################
# Script Mainline
########################################################################

__PACKAGE__->run( \@ARGV ) unless caller;

#-----------------------------------------------------------------------
=head1 SUBROUTINES

=head2 run ( \@ARGV )

Invoke this subroutine, passing in a reference to @ARGV, in order to
perform an analysis of the heat or cold waves in the input data.

Input is from stdin, or from the files listed in @ARGV.  Data should
contain tab-separated output if the format generated by:

    ghcn_fetch -report id

The following columns are expected:

    Year, Month, Day, Decade, S_Decade, S_Year, S_Qtr,
    TMAX, TMIN, Tavg, Qflags, StationId, Location

Any other columns are ignored.

See ghnc_extremes.pl -help for details.

=cut

sub run ($progname, $argv_aref) {

    $Opt = get_options($argv_aref);

    my @files = $argv_aref->@*;

    my $limit = $Opt->limit //
        ( $Opt->cold ? $DEFAULT_COLD_LIMIT
                     : $DEFAULT_HOT_LIMIT
        );

    my $ndays = $Opt->ndays // $DEFAULT_NDAYS;

    my $cmp_op = $Opt->cold ? '<=' : '>=';

    my $years_set = Set::IntSpan::Fast->new;

    my ( $output, $new_fh, $old_fh );
    if ( $Opt->outclip and $USE_WINCLIP ) {
        open $new_fh, '>', \$output
            or die 'Unable to open buffer for write';
        $old_fh = select $new_fh;  ## no critic (ProhibitOneArgSelect)
    }

    my @files = $argv_aref->@*;
    @files = ('-') unless @files;

    foreach my $file (@files) {
        my $fh;
        if ($file eq '-') {
            $fh = *STDIN;
        } else {
            open ($fh, '<', $file) or die;
        }

        @ExtremeWaves = ();
        %Location = ();

        # controlling on bool is_extreme and alpha stnid, minor to major
        my $cb = ControlBreak->new( '+XT', 'STNID', 'EOF' );

        read_data( $fh, $cb, $limit );
    }

    my $years_href;
    if ( $Opt->peryear ) {
        $years_href = report_extremes_per_year($limit, $ndays, $cmp_op);
    } else {
        $years_href = report_extremes_daycounts($limit, $ndays, $cmp_op)
    }

    # generate lines for each year that was missing
    if ($Opt->nogaps) {
        foreach my $stnid (keys $years_href->%*) {
            my @years = sort keys $years_href->{$stnid}->%*;

            my $s = Set::IntSpan::Fast->new( min(@years) .. max(@years) );
            my $t = Set::IntSpan::Fast->new( @years );
            my $gaps = $s->diff($t);
            my $iter = $gaps->iterate_runs();

            while ( my ( $from, $to ) = $iter->() ) {
                foreach my $yr ($from .. $to) {
                    say join $TAB, $stnid, $Location{$stnid}, $yr;
                }
            }
        }
    }

WRAP_UP:
    # send output to the Windows clipboard
    if ( $Opt->outclip and $USE_WINCLIP ) {
        Win32::Clipboard->new()->Set( $output );
        select $old_fh;
    }

    return;
}

########################################################################
# Script-specific Subroutines
########################################################################

=head2 read_data ( $fh, $cb, $limit )

Read weather data from the filehandle and collect extreme waves
according to $limit and $Opt->cold (true for cold waves, false for
heat waves).

=cut

sub read_data ( $fh, $cb, $limit ) {

    my $extremes_begins;
    my @extreme_days;
    my $lineno;

    while ( my $data = <$fh> ) {
        chomp $data;
        next if $data eq $EMPTY;
        last if $data =~ m{ \A Notes: }xms;

        my ($year,$month,$day,$decade,$s_decade,$s_year,$s_qtr,$tmax,$tmin,$tavg,$qflags,$stnid,$loc ) = split $TAB, $data;

        $lineno++;
        if ($lineno == 1) {
            die '*E* invalid input data: ' . $data
                unless  $year eq 'Year' and $tmax =~ m{ \A TMAX }xms and $tmin =~ m{ \A TMIN }xms;
            next;
        }

        last unless $year =~ m{ \A \d{4} \Z }xms;

        my $ymd = sprintf '%04d-%02d-%02d', $year, $month, $day;

        my $value = $Opt->cold ? $tmin : $tmax;

        next if not defined $value or $value eq $EMPTY;

        $Location{$stnid} = $loc;

        my $is_extreme = $Opt->cold
                       ? $value <= $limit
                       : $value >= $limit
                       ;

        my $on_break = sub {
            if ( $is_extreme ) {
                $extremes_begins = $ymd if $cb->break('XT');
                push @extreme_days, [$ymd, $value, $stnid, $loc];
            } elsif ($cb->break('XT')) {
                push @ExtremeWaves, [$extremes_begins, [@extreme_days], $stnid, $loc ];
                $extremes_begins = undef;
                @extreme_days = ();
            }
            if ($cb->break('STNID')) {
                $extremes_begins = undef;
                @extreme_days = ();
            }
        };

        $cb->test_and_do($is_extreme, $stnid, eof, $on_break);
    }

    return;
}

=head2 report_extremes_daycounts ($limit, $ndays, $cmp_op)

Analyzes the input data lookin for $ndays consecutive days when
the temperature is beyond $limit.  By default, heatwaves are examined.
If the option -cold is given, then cold waves are examined.

Returns a reference to a hash keyed on year, and which contains a
tab_separated line of text that includes the station id, location,
year, ymd the wave began, the number of days the wave lasted,
the average temperature during the wave, and the most extreme (hot
or cold) temperature during the wave.

=cut

sub report_extremes_daycounts ($limit, $ndays, $cmp_op) {
    my %years;
    my %location;

    my $daycount_col_head = sprintf '%d-day waves %s %dC', $ndays, $cmp_op, $limit;
    say join $TAB, 'StnId', 'Location', 'Year', 'YMD', $daycount_col_head, 'Avg C', 'Max C';

    foreach my $xw_aref (@ExtremeWaves) {
        my ($xw_begin, $xdays_aref, $stnid, $loc) = $xw_aref->@*;

        $stnid //= $EMPTY;
        $loc   //= $EMPTY;

        my $count = scalar $xdays_aref->@*;

        next if $count < $ndays;

        my $year = substr $xw_begin, 0, 4; ## no critic [ProhibitMagicNumbers]

        my @temps = map { $_->[1] } $xdays_aref->@*;
        my $sum = sum(@temps);
        my $extreme = $Opt->cold ? min(@temps) : max(@temps);
        my $avg = sprintf '%0.1f', $sum / $count;
        say join $TAB,
            $stnid, $Location{$stnid}, $year, $xw_begin, $count, $avg, $extreme;
        $years{$stnid}{$year}++;
    }

    return \%years;
}

=head2 report_extremes_per_year ($limit, $ndays, $cmp_op)

Analyzes the input data lookin for $ndays consecutive days when
the temperature is beyond $limit.  By default, heatwaves are examined.
If the option -cold is given, then cold waves are examined.

Returns a reference to a hash keyed on year, and which contains a
tab_separated line of text that includes the station id, location,
year, and a count of the number of waves detected during that year.

=cut

sub report_extremes_per_year ($limit, $ndays, $cmp_op) {
    my $type = $Opt->cold ? 'Coldwaves' : 'Heatwaves';
    my $title = sprintf '%d-day waves %s %dC', $ndays, $cmp_op, $limit;
    say join $TAB, 'StnId', 'Location', 'Year', $title;

    my %years;

    foreach my $xw_aref (@ExtremeWaves) {
        my ($xw_begin, $xdays_aref, $stnid, $loc) = $xw_aref->@*;
        $stnid //= $EMPTY;
        $loc   //= $EMPTY;
        my ($year) = split $DASH, $xw_begin;
        my $count = scalar $xdays_aref->@*;
        next if $count < $ndays;
        $years{$stnid}{$year}++;
    }

    foreach my $stnid ( sort keys %years ) {
        foreach my $yr ( sort keys $years{$stnid}->%* ) {
            say join $TAB, $stnid, $Location{$stnid}, $yr, $years{$stnid}{$yr};
        }
    }

    return \%years;
}

########################################################################
# Script-standard Subroutines
########################################################################

=head2 get_options ( \@ARGV )

B<get_options> encapsulates everything we need to process command line
options, or to set options when invoking this script from a test script.

Normally it's called by passing a reference to @ARGV; from a test script
you'd set up a local array variable to specify the options.

By convention, you should set up a file-scoped lexical variable named
$Opt and set it in the mainline using the return value from this function.
Then all options can be accessed used $Opt->option notation.

=cut

sub get_options ($argv_aref) {

    my @options = (
        'limit=i',              # lower bound of extremes daily temperature
        'ndays=i',              # number of consecutive days needed to be a extremes
        'peryear',              # report number of heatwaves per year
        'cold',                 # report coldwaves instead of heatwaves
        'nogaps',               # generate a line for missing years (for charting)
        'outclip',              # output data to the Windows clipboard
        'help','usage|?',       # help
    );

    my %opt;

    # create a list of option key names by stripping the various adornments
    my @keys = map { (split m{ [!+=:|] }xms)[0] } grep { !ref  } @options;
    # initialize all possible options to undef
    @opt{ @keys } = ( undef ) x @keys;

    GetOptionsFromArray($argv_aref, \%opt, @options)
        or pod2usage(2);

    # Make %opt into an object and name it the same as what we usually
    # call the global options object.  Note that this doesn't set the
    # global -- the script will have to do that using the return value
    # from this function.  But, what this does is allow us to call
    # $Opt->help and other option within this function using the same
    # syntax as what we use in the script.  This is handy if you need
    # to rename option '-foo' to '-bar' because you can do a find/replace
    # on '$Opt->foo' and you'll get any instances of it here as well as
    # in the script.

    ## no critic [Capitalization]
    ## no critic [ProhibitReusedNames]
    my $Opt = _wrap_hash \%opt;

    pod2usage(1)             if $Opt->usage;
    pod2usage(-verbose => 2) if $Opt->help;

    return $Opt;
}

1;  # needed in case we import this as a module (e.g. for testing)

=head1 AUTHOR

Gary Puckering (jgpuckering@rogers.com)

=head1 LICENSE AND COPYRIGHT

Copyright 2022, Gary Puckering

=cut
