#!/usr/local/bin/perl =head1 NAME Calculate Generic =head1 VERSION 0.05 =head1 SYNOPSIS perl gus_calc_generic.pl --fm /foo/bar/input.txt --to /foo/bar/output.txt =head1 DESCRIPTION Reads in a pure ASCII *.CSV, comma-delimited text file representing a table of acceptance ranges and data sets. Format of input table must be of a kind similar to the example/development-test DATA embeded herein just beneath the __END__ statement. Output is yet another pure ASCII, comma-delimited text file reporting on the passage or failure of each included data set against each included acceptance range. Cumulative (row-wise) passage or failure is indicated rightmost of each row. Individual (column-wise) failures are separately indicated within each. General intent is that data normally entered into columns by hand onto printed forms may be easily transcribed into plain ASCII text documents, and then fed to this script. Obtained will be a report easily embeded in any document without need of linking to yet another (spreadsheet, etc) document for simple presentation. Such format is especially handy for inclusion in XML and word processor documents. Alternately, the resulting *.CSV file may instead be opened directly as a spreadsheets, if desired. =head2 STAND-ALONE TEST You may run this program by itself as a self-test of the program. It will read in default DATA from the end-of-script (below the __END__ line) as its input. Likewise, you may study said end-of-script example data to configure your own pre-report source data. =head1 INPUT CRITERIA The embeded examle DATA are so configured as to make usage self-evident. The rules are simple. 1. Matrix must be rectangular, columns delilmited by commas, rows by newlines. 2. First line must provide a UNIQUE title for each column. 3. One or more test sets must be provided. Each test set provides a name for itself in the 0th (far left) column, and an acceptance range in each subsequent column. 4. One or more data sets must be provided. Each data set provides a name for itself in the 0th (far left) column, and an acceptance range in each subsequent column. =head1 BUGS AND LIMITATIONS None discovered by author as yet. =head1 AUTHOR Gan Uesli Starling > =head1 LICENSE AND COPYRIGHT Copyright (c) 2007 by Gan Uesli Starling. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut use strict; use Getopt::Long; my ($VERSION) = '$Revision: 0.05 $' =~ m{ \$Revision: \s+ (\S+) }xm; my ($path_input, $path_output, $debug_level); my $this_perl = __FILE__; my $FAIL_FLAG = '(*)'; # Failure flag & RegEx used to mark & find bad results. my $FAIL_REGEX = escape_regex_metachars($FAIL_FLAG); # Let flag hold any chars. my $FAIL_STYLE = q|style="color:red;font-weight:bold;"|; # For XML/HTML use. my $VERBOSITY = 0; # How much debug info to display. my $FMT_RH = '%13s'; # Format of the furthest right-hand column. # Escape any RegEx leaving no metachars. sub escape_regex_metachars { my @chars = split //, $_[0]; map {$_ = "\\$_" if $_ =~ /\\|\.|\^|\$|\*|\+|\?|\{|\}|\[|\]|\(|\)|\|/} @chars; return join '', @chars; } # For when options are given at startup on the command line. &GetOptions( "fm=s" => \$path_input, # Whence to read input data. "to=s" => \$path_output, # Whither to write output data. ); # Perlify paths. for ( $path_input, $path_output, $this_perl ) { $_ =~ s{\\}{\/}g } my $report = "\nReport generated by Perl script:\n\t'$this_perl' "; $report .= "version $VERSION \n"; $report .= "Input data read from:\n\t'$path_input'\n\n"; # Open and read in a pure-ASCII flow data file. if (open my $fh, '<', $path_input) { # Real data exists. while (<$fh>) {read_input($_)} } else { # Oops! Fake it. $report .= "Oops! Cannot open input file:\n\t$path_input\n\t$!\n\n"; $report .= "Note! End-of-script DATA read in as default.\n"; my @eof_data; while () { read_input($_); # Read in default data from end-of-script. $_ =~ s{\s+}{ }g; # Scrunch space to fit screen. push @eof_data, $_; # Store where can print. } $report .= "Note! A proper *.CSV data file looks like this...\n\n"; $report .= join "\n", @eof_data; # Show scrunched example data. $report .= "\n\n...which would yield you an output *.CSV like this...\n\n"; } generate_report($report); sub generate_report { my $report = shift; test_all_data_all_ranges(); # Prepare data so column width will be known. my $report_00 = calc_col_fmts(); # Compile the column header. $report_00 .= results_all_ranges(); # Compile results. my $report_90 = csv_rot_90($report_00); # Also a 90-deg rotated table $report .= $report_00; $report .= "\nBelow is same table as above, HTML-ified.\n\n"; $report .= htmlify($report_00); $report .= "\nBelow is same table as the first one, rotated 90 degrees\n"; $report .= "and re-grouped into sets by test range.\n\n"; $report .= $report_90; $report .= "Below is same table group as above, HTML-ified\n\n"; $report .= htmlify($report_90); print $report; # Display in CLI window. if (open my $fh, '>>', $path_output) { print $fh $report; } else { print "Oops! Could not append to '$path_output': $!\n"; } } my @col_heads; my @test_sets; my @data_sets; # Fill @lines with data from input file (or end-of-file DATA lines). sub read_input { if ($_ =~ /(^#)|(^\s*$)/) { # Commented and blank lines? $report .= $_; # ...pass through to output... next; # ...skip until next line. } chomp $_; next unless $_ =~ /(.+,){2}.+/; # Valid lines have min 3 cells. $_ =~ s/^\s*//; # Scrunch away leading spaces. my @line = split /,/, $_; # Break up comma-delimited cells. for (@line) { $_ =~ s/(^\s+)|(\s+$)//g; # Lose wasteful whitespace. } unless (@col_heads) { # Column heads defined? @col_heads = @line; # Parse 1st line as column head. uniqueify_strings(\@col_heads); # Enforce uniqueness on strings. debug(3, "$line[0] holds column heads. \n"); return; # Go back for Nth line. } if ($_ =~ /(~|<|>)/) { # Nth line is a test set. push @test_sets, parse_line(@line); debug(3, "$test_sets[-1]{$col_heads[0]} holds test columns. \n"); prep_test_hash($test_sets[-1]); } else { # Nth line is a data set. push @data_sets, parse_line(@line); debug(3, "$data_sets[-1]{$col_heads[0]} holds data columns. \n"); prep_data_hash($data_sets[-1]); } } # Given an array of strings, enforce their uniqueness # relative to one another. sub uniqueify_strings { my $aref = shift; my %temp; for my $a (@$aref) { my $i = 1; my $b = $a; while (defined $temp{$b}) { $b = "$a ($i)" } # Iter till unique. $a = $b; $temp{$a} = 0; # Just so as to define it. } } # Parse a test or data line into a hash sourcing @col_heads for keys. sub parse_line { my %hash; for (@col_heads) { $hash{$_} = shift } return \%hash; } # Prepare dispatch arg for whatever kind of test this is. sub prep_test_hash { my $href = shift; $href->{$col_heads[0] . ' Pretty'} = $href->{$col_heads[0]}; for my $id (@col_heads[1..$#col_heads]) { if ($href->{$id} =~ /^\s*\d+(\.\d+)*~\d+(\.\d+)*\s*$/) { # Range test my @hi_lo = split /~/, $href->{$id}; for (@hi_lo) { $_ = justify_res($_) } # Resolution if ($hi_lo[0] < $hi_lo[1]) { $href->{$id} = "$hi_lo[0] < ? && ? < $hi_lo[1]"; # Usual way. } else { $href->{$id} = "$hi_lo[0] > ? && ? > $hi_lo[1]"; # Other way. } } elsif ($href->{$id} =~ /^\s*(>|<)\d+(\.\d+)?\s*$/) { # < or > X $href->{$id} =~ s/>/? > /; $href->{$id} =~ s/{$id} =~ /^\s*\d+(\.\d+)?(>|<)\s*$/) { # X < or > $href->{$id} =~ s/>/ > ?/; $href->{$id} =~ s/{$id} =~ /%/) { # Pcts mk_pct_test($href, $id); } else { $href->{$id} ="'$href->{$id}'"; # Some other. } $href->{$id . ' Pretty'} = $href->{$id}; # Eye candy. $href->{$id . ' Pretty'} =~ s/\? && \?/?/; # Un-show insertions required by relative addressing to # compare percentages against other column of same row. $href->{$id . ' Pretty'} =~ s/ \/ 100 \* \$d_href->{"/%(/g; $href->{$id . ' Pretty'} =~ s/"}/)/g; for (@col_heads) { $href->{$id . ' Pretty'} =~ s/\($_\)//g; } debug(3, "\tColumn $id test is '$href->{$id}'.\n"); } } # Given a test of percentages, convert to arguments which eval() will later # call into action. Have to do it this convoluted way because the percentage # is to be against a same-row data value not yet known. sub mk_pct_test { my ($href, $key) = @_; my ($test, $rel_col, $foo) = split /\(|\)/, $href->{$key}; $test =~ s/%//g; $test =~ s/\s//g; my @hi_lo = split /~/, $test; for (@hi_lo) { $_ = $_ ; # Scale as percent $_ = justify_res($_); # Resolution. $_ .= q| / 100 * $d_href->{"| . $rel_col . q|"}|; } if ($test =~ /^\s*\d+(\.\d+)*~\d+(\.\d+)*\s*$/) { # Range test if ($hi_lo[0] < $hi_lo[1]) { $href->{$key} = "$hi_lo[0] < ? && ? < $hi_lo[1]"; # Usual way. } else { $href->{$key} = "$hi_lo[0] > ? && ? > $hi_lo[1]"; # Other way. } return; } my $num = $test; $num =~ s/<|>//g; my $arg = justify_res($num); # Resolution. $arg .= q| / 100 * $d_href->{"| . $rel_col . q|"}|; # Build test args. if ($test =~ /^\s*(>|<)\d+(\.\d+)?\s*$/) { # < or > X $test =~ s/>$num/? > $arg/; $test =~ s/<$num/? < $arg/; $href->{$key} = $test; } elsif ($test =~ /^\s*\d+(\.\d+)?(>|<)\s*$/) { # X < or > $test =~ s/$num>/$arg > ?/; $test =~ s/$num{$key} = $test; } else { debug( 1, "Oops! Unexpected test '$test' at mk_pct_test() subroutine. \n"); } } # Prepare data resolution. sub prep_data_hash { my $href = shift; for (@col_heads[1..$#col_heads]) { $href->{$_} = justify_res($href->{$_}); # Resolution debug(3, "\tColumn $_ data is '$href->{$_}'.\n"); } } # Set digit resolution on value. sub justify_res { my $val = shift; my @hi_lo = split /\./, $val; my $digits = length $hi_lo[1]; $digits = 0 if length $hi_lo[0] > 3; my $fmt = '%.' . $digits . 'f'; return sprintf $fmt, $val; # Resolution } # Calculate a minimal width for each column for a pretty report. my %col_fmts; sub calc_col_fmts { my $report; for my $id (@col_heads) { $col_fmts{$id} = length $id; for (@test_sets) { $col_fmts{$id} = length $_->{$id . ' Pretty'} if $col_fmts{$id} < length $_->{$id . ' Pretty'}; } for my $t_id (0 .. $#test_sets) { for (@data_sets) { $col_fmts{$id} = length $_->{"$t_id $id" . ' Pretty'} if $col_fmts{$id} < length $_->{"$t_id $id" . ' Pretty'}; } } $col_fmts{$id} = '%' . $col_fmts{$id} . 's'; $report .= sprintf( $col_fmts{$id}, $id ) . ", "; debug(3, "Format for column $id will be '$col_fmts{$id}' chars.\n"); } return $report . sprintf $FMT_RH, "Combined\n\n"; } # Rotate a *CSV matrix 90 degrees. # Result similar to rotating JPEG from landscape to portrait. sub csv_rot_90 { my $pre_rot = shift; my @rows; for (split /\n/, $pre_rot) { $_ = ' ' x 3 if $_ =~ m/^\s*$/; # Limit col-width of blank rows. push @rows, $_; } my @fmts_90; for (@rows) { my @cols = split /,/, $_; $_ = \@cols; } for my $i (0..$#rows) { # By rows first. for my $j (0..$#{$rows[0]}) { # By columns second. $rows[$i][$j] =~ s/\s+/ /g; my $k = length $rows[$i][$j]; $fmts_90[$i] = $k if $fmts_90[$i] < $k; } } for (@fmts_90){ $_ = '%' . ($_+2) . 's' } # Cols wider by 2 spaces. my $rot_90; for my $j (0..$#{$rows[0]}) { # By columns first for my $i (0..$#rows) { # By rows second $rot_90 .= sprintf $fmts_90[$i], $rows[$i][$j] . ','; } $rot_90 .= "\n"; } return regroup_cvs_90_by_range($rot_90); } # Given a cvs_90 rotation, regroup splitting on empty columns. sub regroup_cvs_90_by_range { my @lines = split /\n/, $_[0]; my @groups = ([]); for (@lines){ my @cols = split /,\s+,/, $_; for (0..$#cols){ push @{$groups[$_]}, $cols[$_]; } } my $regrouped = "\n"; for my $i (1..$#groups) { for my $j (0..$#{$groups[0]}) { my $line = $groups[0][$j]; $line .= $groups[$i][$j]; $regrouped .= justify_flag($line) . "\n"; } $regrouped .= "\n\n"; } return $regrouped; } # Escape angle-brackets for markup languages. # Use style attrs to highlight cells which fail their tests. sub htmlify { my $lines = shift; $lines =~ s//>/g; $lines =~ s/($FAIL_REGEX\s+\w+\.?\w*)(,)?/$1<\/span>$2/g; $lines =~ s/^\n*/\n/; # Allow but a single leading newline... $lines =~ s/\n*$/\n/; # ...and a single trailing one. return "
$lines
\n"; } # Generate formated entries reporting on acceptance for # each data set against every test set. sub test_all_data_all_ranges { my $t_id = 0; for my $t_href (@test_sets) { for my $d_href (@data_sets) { test_data_at_range($t_href,$d_href, $t_id); } $t_id++; } } # Generate formatted entries reporting on acceptance for # a single data set at only one range. sub test_data_at_range { my ($t_href, $d_href, $t_id) = @_; # Test ID to specify with (Col) ID. $d_href->{"$t_id " . $col_heads[0] . ' Pretty'} = $d_href->{$col_heads[0]}; for my $id (@col_heads[1..$#col_heads]) { my $test = $t_href->{$id}; # Template $test =~ s/\?/$d_href->{$id}/g; # Insert test value my $foo = " $id : $test "; if (eval $test) { $d_href->{"$t_id $id" . ' Pretty'} = $d_href->{$id}; # Pass } else { $d_href->{"$t_id $id" . ' Pretty'} = $FAIL_FLAG . $d_href->{$id}; } } } # Compile the pre-generated entries as a report line for # one range. sub results_one_range { my ($t_href, $t_id) = @_; my $report; my $fmt; for (@col_heads) { $report .= sprintf( $col_fmts{$_}, $t_href->{$_ . ' Pretty'}) . ", "; } $report .= sprintf $FMT_RH, "Results\n"; for my $d_href (@data_sets) { my $row_rpt; for (@col_heads) { $row_rpt .= sprintf( $col_fmts{$_}, $d_href->{"$t_id $_" . ' Pretty'}) . ", "; } $row_rpt = justify_flag($row_rpt); my $failures = 0; while ( $row_rpt =~ m/$FAIL_REGEX/g ) { $failures++ } if ( $failures > 1 ) { $row_rpt .= ', ' . sprintf $FMT_RH, "$failures FAILURES!\n" } elsif ( $failures == 1 ) { $row_rpt .= ', ' . sprintf $FMT_RH, "1 FAILURE!\n" } else { $row_rpt .= ', ' . sprintf $FMT_RH, "PASS\n"; } $report .= $row_rpt; } return "$report\n"; } # Cause $FAIL_FLAG to justify at the left-hand edge of each # data column. sub justify_flag { my @cells = split /, /, $_[0]; for my $i (0 .. $#cells) { while ($cells[$i] =~ m/\s$FAIL_REGEX/) { $cells[$i] =~ s/ $FAIL_REGEX/$FAIL_FLAG /; } } return join ', ', @cells; } # Compile the pre-genenerated entries as a composite report # for all test ranges. sub results_all_ranges { my $report; my $t_id = 0; for my $t_href (@test_sets) { $report .= results_one_range($t_href, $t_id); $t_id++; } return $report; } # How many debug messsages and how to show them. sub debug { print $_[1] if $_[0] <= $VERBOSITY; } __END__ # The script above reads in a *.CSV file that should parse VERY SIMILAR to this. # RULES: # 1. Blank lines and comments (starting with '#') will be ignored. # 2. Columns: # A. Column heads (top row) should all be unique. # B. Input columns are comma-delimited. Space padding ignored. # C. Output columns will be comma-delimited and space-padding aligned. # 3. Rows: # A. Row heads (left column) divided into sections as follows: # B. Test sets (acceptance ranges) mostly have ~, <, > or % signs in columns. # C. Data sets (measurements) never have ~, <, > or % signs in any column. # 4. Test Sets: # A. Ranges denoted by tilde sybmols: min~max or max~min. # B. Greater- or less-than symbolized by < or >. # C. Percentages relative to (Named Column) must match existing column head. # D. Any other text Record/Ignore/Skip/Foo always true (passes). # 5. Data Sets: # A. Data type must be approprate for test set type: numeric, etc. Titles, Col 1, Col 2, Col 3, Col 4, Col 5, Col 6, Col 7, Col 8, Col 9, Col 10, Col 11 (%Col 4), Col 12 (%Col 5) Range 1, 12.5~16.5, 41.5~28.5, 193~217, 490~510, 620~640, Record, 4.0>, <1.0, >1.2, 3.0~10.0, >103% (Col 4), 98%~102% (Col 5) Range 2, 11.0~18.0, 43.0~27.0, 188~219, 483~527, 605~665, Ignore, 6.0>, <1.1, >1.1, 3.5~11.0, 105%< (Col 4), 96%~104% (Col 5) Range 3, 10.0~19.0, 47.5~25.5, 185~222, 473~538, 595~677, Skip, 8.0>, <1.2, >1.0, 4.0~12.0, >107% (Col 4), 106%~94% (Col 5) Data A, 15.0, 30.0, 200, 520, 630, 200, 0.0, 0.0, 6.1, 7.7, 559, 625 Data B, 15.2, 36.1, 213, 505, 667, 207, 0.0, 0.0, 5.8, 7.4, 541, 687 Data C, 17.2, 42.1, 227, 535, 672, 211, 0.0, 0.0, 2.3, 10.4, 576, 671