#!/usr/bin/perl # # Morse code audio file generator program. # Copyright 2006-2024 by Gan Uesli Starling, KY8D, VA7KYD, ZS1KYD, and T000000139 # Windows speed-up modifications by Bill Lanahan, WA2NFN # These modules you need to install. use Audio::Wav; # Precision of tone may depend on CPU speed. use Time::HiRes qw( gettimeofday ); # Used with tags in files. use File::Path qw( rmtree ); use File::Copy; my $windoze_flag = 0; # Set to 1 only for making EXE using PP. $windoze_flag = 1 if $^O =~ m/MSWin/; # These modules are standard. use Getopt::Long; use Config; use warnings; use strict; STDOUT -> autoflush(1); # else bash hangs on prints from windoze simple menu (WDL##) # Set defaults my $VERSION = '2024-01-03'; # Update to make Farnsworth work like ARRL practice skeds. Such that $wpm cannot advance by $incr until $farn >= $wpm my $txt_path = './_ReadMe.txt'; my $codec = 'wav'; my $lang = 'en'; my $wav_path = ''; my $sample_rate = 11025; my $tone = 700; my $max_wav_mins = 5.0; # Minutes after which to break on next major punctuation. my $wpm = 25; # Default speed of characters (not always their spacing). my $farn = 0; # Speed of inter-char spaces, --f will fix, or later auto-set = wpm my $incr = 0.05; # Increase per file of wpm my $decr = 0.05; # Decrease per file of farn my $koch_flag = 0; # Used for Koch-method output. my $rand_flag = 0; # Randomize words read in from file? my $jmbl_flag = 0; # Mix up words from lines read in from file? my $help_flag = 0; #Give help? Called by switch. my $v_flag = 0; # Run verbosely? my $call_sign = 'KY8D'; # For head & tail of message files. Change to your own. my %tags = ( generator => 'gus_morse.pl', genre => "'Morse Code'", artist => 'KY8D.NET',); # ID3 tags only. my $volume = 0.5; # How loud? my $about_flag = 0; # Tell user about this program? my $quit_limit = 8192; # Exit after generating this many files. my $test_flag = 1; # Set hi for dummy run sans wav files. Always done first. my $dgt_cnt = 4; # Assum thousands of files. Will auto-reduce after self-test. my $graphic = ''; # Path to album art obtained as switch. my $extra = 0; # Extra PARIS elements added to space between whole words (non-Farnsworth). my $count_re = '%04s'; # Default file-numbering if option. my $wav_cnt = -1; # Inc at TOP of loop so MP3 tag won't be off by one. my $punct = ''; # String of punctuation chars to exclude. &GetOptions( "about" => \$about_flag, "codec=s" => \$codec, "decr=f" => \$decr, "farn=f" => \$farn, "graphic=s" => \$graphic, "help" => \$help_flag, "incr=f" => \$incr, "jumble" => \$jmbl_flag, "koch" => \$koch_flag, "lang=s" => \$lang, "mins=f" => \$max_wav_mins, "path=s" => \$txt_path, "nopunct=s" => \$punct, "quit=i" => \$quit_limit, "rand" => \$rand_flag, "samp=i" => \$sample_rate, "test" => \$test_flag, "tone=i" => \$tone, "verbose" => \$v_flag, "wpm=f" => \$wpm, "extra=i" => \$extra, ); my @punct = split('', $punct); # User-hated punctuation into array. for (@punct) { if ($_ =~ m/[\[\]]/) { $_ = qq|\\| . qq|$_|; # Escape brackets and parens. } } my @channels = (); # use for writes # When on Windoze, be ultra-simple. if ($windoze_flag && $help_flag != 1 ) { my $foo; print "\nRunning gus_morse.pl by KY8D, version $VERSION\n"; print "\nEnter user options (default value)...\n"; print "Character sound-off speed in wpm? ($wpm): "; chomp( $foo = ); $wpm = $foo if $foo =~ m/[0-9]/; print "Per-file char-speed increment in wpm? ($incr): "; chomp( $foo = ); $incr = $foo if $foo =~ m/[0-9]/; print "Starting Farnsworth gap rate in wpm? ($farn): "; chomp( $foo = ); $farn = $foo if $foo =~ m/[0-9]/; if ($farn) { # Ask only when relevent. print "Per-file Farnsworth increment in wpm? ($decr): "; chomp( $foo = ); $decr = $foo if $foo =~ m/[0-9]/; } print "Extra dit-widths between whole words? ($extra): "; chomp( $foo = ); $extra = $foo if $foo =~ m/[0-9]/; print "Average file length in minutes? ($max_wav_mins): "; chomp( $foo = ); $max_wav_mins = $foo if $foo =~ m/[0-9]/; print "Max limit on number of output files? ($quit_limit): "; chomp( $foo = ); $quit_limit = $foo if $foo =~ m/[0-9]/; print "Input filename? ($txt_path): "; chomp( $foo = ); $foo =~ s{\\}{/}g; $txt_path = $foo if $foo =~ m/\.txt$/; print "Output as WAV or MP3 files? ($codec): "; chomp( $foo = ); $codec = 'mp3' if $foo =~ m/mp3/i; print "\n"; } $farn = $wpm if $farn > $wpm; # Fix if no --f option given. sub quick_help { my $help_msg = <= $wpm ) { printf("\nNOTE: Farnsworth & WPM speeds will converge at %.2f wpm on file $x.\n", $wpm); last; } elsif ($x > 10_000) { print "\nOOPS! No convergence within 10k loops. ($farn)\n"; last; } ++$x; } } # Hash of known Morse code characters. my %morse = ( # Punctuation ' ' => sub { space(); }, # Equivalent to 7 dits, Farnsworth adjusted. '.' => sub { di(); da(); di(); da(); di(); da(); space(); }, ',' => sub { da(); da(); di(); di(); da(); da(); space(); }, ':' => sub { da(); da(); da(); di(); di(); di(); space(); }, ';' => sub { da(); di(); da(); di(); da(); di(); space(); }, # KR '?' => sub { di(); di(); da(); da(); di(); di(); space(); }, # Question '!' => sub { da(); di(); da(); di(); da(); da(); space(); }, # KW (Non ITU, proposed by Heathkit) '?x' => sub { di(); di(); da(); da(); di(); di(); stp(); }, # Spanish inverted question '!x' => sub { da(); di(); da(); di(); da(); da(); stp(); }, # Spanish inverted bang "'" => sub { di(); da(); da(); da(); da(); di(); stp(); }, # Apostrophe, single quote '-' => sub { da(); di(); di(); di(); di(); da(); stp(); }, # Hyphen '–' => sub { da(); di(); di(); di(); di(); da(); stp(); }, # Hyphen for n-dash '—' => sub { da(); di(); di(); di(); di(); da(); stp(); da(); di(); di(); di(); di(); da(); stp(); }, # Double hyphen for m-dash '_' => sub { di(); di(); da(); da(); di(); da(); stp(); }, # UK (Non ITU) for underscore. '=' => sub { da(); di(); di(); di(); da(); stp(); }, # BT '/' => sub { da(); di(); di(); da(); di(); stp(); }, # DN '(' => sub { da(); di(); da(); da(); di(); da(); stp(); }, # KN ')' => sub { da(); di(); da(); da(); di(); da(); stp(); }, # KK '[' => sub { da(); di(); da(); da(); di(); da(); stp(); }, # Same as paren ']' => sub { da(); di(); da(); da(); di(); da(); stp(); }, # Same as paren '"' => sub { di(); da(); di(); di(); da(); di(); stp(); }, # RR '+' => sub { di(); da(); di(); da(); di(); stp(); }, # AR '&' => sub { di(); stp(); di(); di(); di(); stp(); }, # e s (Non ITU) '$' => sub { di(); di(); di(); da(); di(); di(); da(); stp(); }, # SX '@' => sub { di(); da(); da(); di(); da(); di(); stp(); }, # WR # Most common letters 'A' => sub { di(); da(); stp(); }, 'B' => sub { da(); di(); di(); di(); stp(); }, 'C' => sub { da(); di(); da(); di(); stp(); }, 'D' => sub { da(); di(); di(); stp(); }, 'E' => sub { di(); stp(); }, 'F' => sub { di(); di(); da(); di(); stp(); }, 'G' => sub { da(); da(); di(); stp(); }, 'H' => sub { di(); di(); di(); di(); stp(); }, 'I' => sub { di(); di(); stp(); }, 'J' => sub { di(); da(); da(); da(); stp(); }, 'K' => sub { da(); di(); da(); stp(); }, 'L' => sub { di(); da(); di(); di(); stp(); }, 'M' => sub { da(); da(); stp(); }, 'N' => sub { da(); di(); stp(); }, 'O' => sub { da(); da(); da(); stp(); }, 'P' => sub { di(); da(); da(); di(); stp(); }, 'Q' => sub { da(); da(); di(); da(); stp(); }, 'R' => sub { di(); da(); di(); stp(); }, 'S' => sub { di(); di(); di(); stp(); }, 'T' => sub { da(); stp(); }, 'U' => sub { di(); di(); da(); stp(); }, 'V' => sub { di(); di(); di(); da(); stp(); }, 'W' => sub { di(); da(); da(); stp(); }, 'X' => sub { da(); di(); di(); da(); stp(); }, 'Y' => sub { da(); di(); da(); da(); stp(); }, 'Z' => sub { da(); da(); di(); di(); stp(); }, '0' => sub { da(); da(); da(); da(); da(); stp(); }, '1' => sub { di(); da(); da(); da(); da(); stp(); }, '2' => sub { di(); di(); da(); da(); da(); stp(); }, '3' => sub { di(); di(); di(); da(); da(); stp(); }, '4' => sub { di(); di(); di(); di(); da(); stp(); }, '5' => sub { di(); di(); di(); di(); di(); stp(); }, '6' => sub { da(); di(); di(); di(); di(); stp(); }, '7' => sub { da(); da(); di(); di(); di(); stp(); }, '8' => sub { da(); da(); da(); di(); di(); stp(); }, '9' => sub { da(); da(); da(); da(); di(); stp(); }, # Spanish characters. Work undelimited when *LANG=ES* # In 2010, the Royal Spanish Academy officially removed 'ch' and 'll' from the alphabet. # Only non-ITU code in actual use, according to PMs with EA stations on-line. 'Nx' => sub { da(); da(); di(); da(); da(); stp(); }, # N tilde # Esperanto characters. Work undelimited when *LANG=EO* 'Cx' => sub { da(); di(); da(); di(); di(); stp(); }, 'Gx' => sub { da(); da(); di(); da(); di(); stp(); }, 'Hx' => sub { da(); da(); da(); da(); stp(); }, 'Jx' => sub { di(); da(); da(); da(); di(); stp(); }, 'Sx' => sub { di(); di(); di(); da(); di(); stp(); }, 'Ux' => sub { di(); di(); da(); da(); stp(); }, # Additional ITU code. Defined but not in actual use. 'Ex' => sub { di(); di(); da(); di(); di(); stp(); }, # ITU E-Accent # Common prosigns. Require delimiting as *AA*, etc. # Some are redundant with punctuation. 'AA' => sub { di(); da(); di(); da(); stp(); }, # 'End of line' 'AR' => sub { di(); da(); di(); da(); di(); stp(); }, # 'End of message' or '+' 'AS' => sub { di(); da(); di(); di(); di(); stp(); }, # 'Wait' 'BT' => sub { da(); di(); di(); di(); da(); stp(); }, # 'Break' or 'Um, er, ah' or '=' or 'End of paragraph' 'CL' => sub { da(); di(); da(); di(); di(); da(); di(); di(); stp(); }, # 'Closing station' 'KA' => sub { da(); di(); da(); di(); da(); stp(); }, # Start of message. 'KN' => sub { da(); di(); da(); da(); di(); stp(); }, # 'Over to named station' 'NR' => sub { da(); di(); di(); da(); di(); stp(); }, # 'Number(s) to follow' 'SK' => sub { di(); di(); di(); da(); di(); da(); stp(); }, # 'End of contact' 'SN' => sub { di(); di(); di(); da(); di(); stp(); }, # 'Understood' or 'Sx' in Eo. 'IMI' => sub { di(); di(); da(); da(); di(); di(); stp(); }, # 'Huh?' or 'I say again' 'SOS' => sub { di(); di(); di(); da(); da(); da(); di(); di(); di(); stp(); }, 'ERROR' => sub { di(); di(); di(); di(); di(); di(); di(); di(); stp(); }, ); # Operations embeded in text, like so: *TONE=800* *WPM=20* my %ops = ( 'ABOUT' => sub { $about_flag = $_[0]; print " Okay! Head and tail flag = $_[0] \n" if $v_flag; }, 'CODEC' => sub { $codec = $_[0]; print " Okay! Codec = $_[0] \n" if $v_flag; }, 'DECR' => sub { $decr = $_[0]; print " Okay! DECR = $_[0] \n" if $v_flag; }, 'FARN' => sub { $farn = $_[0]; print " Okay! FARN = $_[0] \n"; }, # Inform regardless of v_flag. 'INCR' => sub { $incr = $_[0]; print " Okay! INCR = $_[0] \n" if $v_flag; }, 'GRAPHIC' => sub { $graphic = $_[0]; print " Okay! Graphic = $_[0] \n" if $v_flag; }, 'JMBL' => sub { $jmbl_flag = $_[0]; print " Okay! Rand flag = $_[0] \n" if $v_flag; }, 'KOCH' => sub { $koch_flag = $_[0]; print " Okay! Koch flag = $_[0] \n" if $v_flag; }, 'LANG' => sub { $lang = lc($_[0]); print " Okay! Language = $_[0] \n" if $v_flag; }, 'MAX' => sub { $max_wav_mins = $_[0]; print " Okay! Max *.wav = $_[0] minutes.\n" if $v_flag; }, 'NEXT' => sub { next_wav(); print " Okay! Next file triggered. \n" if $v_flag; }, 'QUIT' => sub { $quit_limit = $_[0]; print " Okay! Quit after $_[0] files. \n" if $v_flag; }, 'RAND' => sub { $rand_flag = $_[0]; print " Okay! Rand flag = $_[0] \n" if $v_flag; }, 'TONE' => sub { $tone = $_[0]; print " Okay! Tone = $_[0] Hz \n" if $v_flag; }, 'EXTRA' => sub { $extra = $_[0]; print " Okay! $_[0] PARIS elements added to inter-word spaces \n" if $v_flag; }, 'WPM' => sub { set_wpm($_[0]); print " Okay! WPM = $_[0] \n"; }, # Inform regardless of v_flag. ); # Generate named master directory for audio files. Example: "r:/CW_Foo_Bar". sub choose_dir { my ($path) = @_; unless ($test_flag) { if (-d "$path") { print "\nDirectory already exists: '$path'\nOverwrite? (y/N) > "; my $foo; chomp( $foo = ); if ($foo =~ m/y/i) { rmtree($path); } else { die "\nQuitting. File-naming would be confused.\n"; } } if (mkdir "$path") { # print "Creating '$path'\n"; } else { print "Oops! Could not create directory '$path': $! \n"; } } return $path; } # Generate or reuse numerically named sub-directory for audio files. "r:/CW_Foo_Bar/000-127". sub choose_subdir { my ($ptr, $div, $dir) = @_; my $blw = $ptr - $ptr % $div; my $abv = $blw + $div; my $subdir = sprintf($count_re, $blw) . '-' . sprintf($count_re, $abv - 1); if ($ptr % $div == 0) { choose_dir("$dir/$subdir"); } return "$dir/$subdir/"; } # Dit length is the master time unit, as follows: # secs_per_min / words_per_min my $dit_length; sub set_wpm { if ($farn == 0 || $farn >= $wpm) { # Don't let farn fall behind once it has already convergned or been turned off. $farn = $wpm = sprintf("%.2f", shift); } else { $wpm = sprintf("%.2f", shift); } $dit_length = 1 / $wpm * $sample_rate; } set_wpm($wpm); # So can be flagged inside of text. my $pi = 3.14159265359 * 2; my $bits_sample = 16; my $max_no = ( 2**$bits_sample ) / 2 * $volume; my $details = { 'bits_sample' => $bits_sample, 'sample_rate' => $sample_rate, 'channels' => 1, }; my $write; my $msg_head; my $msg_tail = " *AR* DE $call_sign *SK* "; sub add_head { morsify( 0, (split //, $msg_head) ) } sub add_tail { morsify( 0, (split //, $msg_tail) ) } # Lose the text file name. sub get_dir_only { pop @_; return join '/', @_; } # Create file path for *wav sub wav_name { my @path_elems = split /\//, $txt_path; my @name_elems = split /\s|_/, pop @path_elems; # Name sans spaces. $name_elems[-1] =~ s/\.txt$/\.wav/; # Audio files named like text input. # Create the pathname. my $wav_path = join '/', @path_elems; # Assemble custom path... $wav_path .= '/' . join '_', @name_elems; # ...with custom dir for audio files... $wav_path =~ s/\.wav$//; # ...named like files themselves. # Create the filename. unshift @name_elems, sprintf($count_re,$wav_cnt); # Prepend file count. my $wav_name = join '_', @name_elems; # Assemble file name # Find or make the main dir, sub-dir... if ($wav_cnt == 0) { choose_dir($wav_path) } # Choose or create outer path, "r:/foo/CW_Moby_Dick" $wav_path = choose_subdir($wav_cnt, 128, $wav_path); # Choose or create inner path, "r:/foo/CW_Moby_Dick/000-127" return "$wav_path$wav_name"; } # Create a new, innumerated ouput *.wav file. sub new_wav { ++$wav_cnt; # Inc here so MP3 tag won't be off by one. my $wav = new Audio::Wav; $wav_path = wav_name(); # print "\nOUTPUT = '$wav_path'\n"; set_wpm($wpm); # die("\nOUTPUT = '$wav_path'\n"); unless ($test_flag) { mk_tag_hash(); $write = $wav->write( "$wav_path", $details ); $write->set_info( 'name' => "$tags{'name'}" ); $write->set_info( 'genre' => "$tags{'genre'}" ); if ($farn < $wpm) { $write->set_info( 'comment' => sprintf("%.2f chars spaced at %.2f wpm", $wpm, $farn)); } else { $write->set_info( 'comment' => "Generated $tags{'generator'}" ); } } if ($wav_cnt > 0) { $farn = sprintf("%.2f", $farn + $decr); if ($decr == 0 || $farn >= $wpm) { set_wpm($wpm + $incr); # When doing Farnsworth, advance WPM only after Farnsworth has caught up. } if ($farn > $wpm) { $farn = $wpm; # Can shorten no more. } } my $msg = " $wav_path "; if ($farn > 0 && $farn + 0.001 < $wpm) { # Because sometimes $farn = X.9999 $msg .= sprintf("@ %.2f/%.2f WPM", $farn, $wpm); $msg_head = sprintf("FILE: $wav_cnt @ %.2f/%.2f WPM", $farn, $wpm); } else { $msg .= sprintf("@ %.2f WPM", $wpm); $msg_head = sprintf("FILE: $wav_cnt @ WPM: %.2f ", $wpm); } print "$msg\n" unless $test_flag; $msg_head .= " *KA* "; $msg_head =~ s/\./R/g; # Swap period for Morse decimal. $msg_head = " *KA* " unless $about_flag; # Sometimes it's just annoying. add_head(); } # Return Date Time Group in ISO 8601 approved fashion. sub current_DTG { my ($secs, $msecs) = Time::HiRes::gettimeofday(); my ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) = gmtime($secs); return sprintf( "%04d-%02d-%02d %02d:%02d:%02d.%5s", $year + 1900, $mon + 1, $mday, $hour, $min, $sec, $msecs ); } # Provide basic default info tags. sub mk_tag_hash { $tags{'name'} = (split /\//, $txt_path)[-1]; $tags{'name'} =~ s/\.txt$//; $tags{'name'} =~ s/_/ /g; $tags{'name'} = ucfirst $tags{'name'}; $tags{'date_time'} = current_DTG(); } # Called by certain punctuation to break *.wav files # into managable sizes at apporopriate points. sub next_wav { add_tail() if $about_flag; print("Done writing file: $wav_path \n\n") if $v_flag; $write->finish() unless $test_flag; oggify(); mp3ify(); new_wav(); } # Remove user-designated punctuation from text. sub punct { my $sref = shift; for (@punct) { if ($_ =~ m/[:;]/) { $$sref =~ s/$_/*BT* /g; # Replace colons and semicolons with break plus space. } else { $$sref =~ s/$_//g; # Other puncts replace with nothing. } } # print "LINE 463: $$sref \n"; } # A dit element. sub di { cw($dit_length); gap(); } # A dah element. sub da { cw( $dit_length * 3 ); gap(); } # Generate CW tone of required length sub cw { for ( 0 .. $_[0] ) { my $y = $max_no; if ($_ < 15) { $y *= $_ / 15 } # Rise time. elsif ($_[0] - $_ < 15) { $y *= ($_[0] - $_) / 15 } ; # Fall time. $channels[$_] = ( $y * sin( $pi * $_ / $sample_rate * $tone ) ); } $write->write( @channels ); @channels = (); } # Make space between dits. sub gap { for ( 0 .. $dit_length ) { $channels[$_] = 0; } $write->write(@channels); @channels = (); } sub calc_farn { # Reference http://www.arrl.org/files/file/Technology/x9004008.pdf my $ta = (60 * $wpm - 32.7 * $farn) / $farn / $wpm; return int( $ta / 19 * $sample_rate ); } # Make space between chars, uses Farnsworth speed for calc. # same as wpm if no --f option given. sub stp { if ($farn == 0 || $farn >= $wpm) { # Not Farnsworth for ( 0 .. int($dit_length * 3) ) { $channels[$_] = 0 } } else { # Is Farnsworth for ( 0 .. 3 * calc_farn() ) { $channels[$_] = 0 } } $write->write(@channels); @channels = (); } # Make space between words, uses Farnsworth speed for calc. # same as wpm if no --f option given. # Multiplier = 4 because "stp" already has three. sub space { if ($farn == 0 || $farn >= $wpm) { # Not Farnsworth for ( 0 .. int($dit_length * 4 + $dit_length * $extra) ) { $channels[$_] = 0; } } else { # Is Farnsworth my @timing = calc_farn(); for ( 0 .. 4 * calc_farn() + $dit_length * $extra ) { $channels[$_] = 0 } } $write->write(@channels); @channels = (); } # Assemble special character key from all between paired asterisks. # Return key and advanced pointer. # Example special characters: *AR*, *KN*, *SK*, *Oops!* sub special_char { my ($aref, $i) = @_; my $char = ''; my $j; ++$i; # Skip since '*' not defined in Morse code. # Assemble key for presumed special char. for ( $j = $i; $j <= $#$aref; ++$j ) { last if $aref->[$j] eq '*'; $char .= $aref->[$j]; } # Return key and new pointer. if ( defined($morse{"$char"}) ) { print " Okay! Special char: *$char* \n" if $v_flag; return ($char, $j); } else { if ( defined($ops{"$char"}) || $char =~ /=/ ) { special_op("$char"); return ('NOOP', $j); # Undefined char will be skipped. } elsif ( $j - $i > 16 ) { print " Oops! Lone asterisk '*' found in text. Replaced by 'error' code. \n" if $v_flag; return ('ERROR', $i + 1); } else { print " Oops! Undefined char: *$char* found in text. Replaced by 'error' code. \n" if $v_flag; return ('ERROR', $j); } } } # Perform embeded special operations such as *foobar* from input text. sub special_op { my ($op,$arg) = split /=/, $_[0]; if ( defined($ops{$op}) ) { $ops{$op}->($arg) } else { print " Oops! Skipping undefined special op: *$op* \n" if $v_flag; } } # Not exact PARIS length. Assume char-mix will average out. sub next_file_trigger { my $outfile_char_cnt = shift; my $outfile_mins = $outfile_char_cnt / $farn / 5; return $outfile_mins > $max_wav_mins; } # For flagging a file-break on appropriate punctuation. # ASCII quote-symbol as HEX \042 so editor won't foobar syntax highlighting. my $break_file_here = '(\.\042)|(\?\042)|(!\042)' # End of quotation. . '|(\. )|(\? )|(! )' # Sentence break mid-line. . '|(\.$)|(\?$)|(!$)'; # Sentence break at end-of-line. # Is this the start of a fresh pragraph? Give it a BT prosign. # Spaces give listener a restful short break, rather like in an actual QSO. sub paragraph_check { my ($i, $char) = @_; if ($i == 0) { # Zeroth char in line? if ($char eq qq|\n|) { # Is 0th char a newline? unless ($test_flag) { &{$morse{'BT'} }; # Insert break prosign. for (1..3) { &{ $morse{' '} }; } # Pause } return 4; # Increment character count. } else { return 0; } } else { return 0; } } # Roman numerals for sustitution by arabic my @roman = qw( I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI XVII XVIII XIX XX XXI XXII XXIII XXIV XXV XXVI XXVII XXVIII XXIX XXX XXXI XXXII XXXIII XXXIV XXXV XXXVI XXXVII XXXVIII XXXIX XL XLI XLII XLIII XLIV LXV LXVI XLVII XLVIII XLIX L LI LII LIII LIV LV LVI LVII LVIII LIX LXX LXI LXII LXIII LXIV LXV LXVI LXVII LXVIII LXIX LXX LXXI LXXII LXXIII LXXIV LXXV LXXVI LXXVII LXXVIII LXXIX LXXX LXXXI LXXXII LXXXIII LXXXIV LXXXV LXXXVI LXXXVII LXXXVIII LXXXIX XC XCI XCII XCIII XCIV XCV XCVI XCVII XCVIII XCVIX C ); # Title chapters as Arabic (not Roman) numerals. # English and Spanish sub fix_chapter_enumeration { no warnings; my $sref = shift; my $j=0; for (0..scalar@roman-1) { $j++; if ($lang eq 'en') { $$sref =~ s{\s+chapter $roman[$_]\.?\s*\n}{\nCHAPTER $j\n}i; } elsif ($lang eq 'es') { $$sref =~ s{\s+capitulo $roman[$_]\.?\s*\n}{\nCAPITULO $j\n}i; } } } # De-accent these chars regardless of language. # As '*' is undefined for Morse, an effective deletion. my @accents = qw( à A è E ì I ò O ù U ý Y À A È E Ì I Ò O Ù U Ý Y ä A ë E ï I ö O ü U ÿ Y Ä A Ë E Ï I Ö O Ü U Ÿ Y â A ê E î I ô O û U Â A Ê E Î I Ô O Û U á A é E í I ó O ú U Á A É E Í I Ó O Ú U ã A õ O Ã A Õ O ç C Ç C ø O Ø O ‘ ' ’ ' “ " ” " « " » " … = ); # De-accent Spanish chars when not Spanish my @accents_not_es = qw( ñ N Ñ N ); # X-ify Spanish chars when is Spanish my @accents_es = qw( ñ Nx Ñ Nx); # qw( é Ex É Ex ñ Nx Ñ Nx ¿ ?x ¡ !x); # De-hat Esperanto chars when not Esperanto. my @accents_not_eo = qw( ĉ C ĝ G ĥ H ĵ J ŝ S ŭ U Ĉ C Ĝ G Ĥ H Ĵ J Ŝ S Ŭ U ); # X-ify hatted letters when is Esperanto. my @accents_eo = qw( ĉ Cx ĝ Gx ĥ Hx ĵ Jx ŝ Sx ŭ Ux Ĉ Cx Ĝ Gx Ĥ Hx Ĵ Jx Ŝ Sx Ŭ Ux ); sub fix_accents { no warnings; my $sref = shift; # General swap-out of accented chars. for (0...scalar@accents/2-1) { $$sref =~ s{$accents[$_*2]}{$accents[$_*2+1]}g; } # Special case for when Spanish or not. if ($lang ne 'es') { for (0...scalar@accents_not_es/2-1) { $$sref =~ s{$accents_not_es[$_*2]}{$accents_not_es[$_*2+1]}g; } } elsif ($lang eq 'es') { for (0...scalar@accents_es/2-1) { $$sref =~ s{$accents_es[$_*2]}{$accents_es[$_*2+1]}g; } # Lose start-of-phrase punctuation. $$sref =~ s|[¿¡]||g; } # Special case for when Esperanto or not. if ($lang ne 'eo') { for (0...scalar@accents_not_eo/2-1) { $$sref =~ s{$accents_not_eo[$_*2]}{$accents_not_eo[$_*2+1]}g; } } elsif ($lang eq 'eo') { for (0...scalar@accents_eo/2-1) { $$sref =~ s{$accents_eo[$_*2]}{$accents_eo[$_*2+1]}g; } } } # Convert string to Morse code. sub morsify { my $outfile_char_cnt = shift; my $line_cnt = shift; for ( my $i = 0 ; $i <= $#_ ; ++$i ) { my $char = $_[$i]; $outfile_char_cnt += paragraph_check($i, $char); # Give indication of output print "Line $line_cnt: " . ( join('', @_) . "\n" ) if $v_flag && $i == 0; if ($char eq '*') { ($char, $i) = special_char(\@_, $i) } # Traktu Esperanton laux la iksa sistemo. if ($lang eq 'eo' || $lang eq 'es') { no warnings; # Look-ahead returns uninitialized at EOS! next if $char =~ /x/i && $i > 0 && $_[$i - 1] =~ /C|G|H|J|S|U|N/; # When to skip X. $char .= 'x' if $_[1 + $i] =~ /x/i; # When to Esperantize. } next unless defined( $morse{"$char"} ); &{ $morse{"$char"} } unless $test_flag; ++$outfile_char_cnt; my $break_file_re = $break_file_here; $break_file_re .= '|(, )|(,$)' if $wpm < 20; # On commas too, when semi slow. $break_file_re .= '|(.\ )|(\.$)' if $wpm < 15; # On spaces too, when very slow. # Break multi-file output at appropriate end-of-phrase punctuation. if ( $i>1 && next_file_trigger($outfile_char_cnt) ){ no warnings; # Look-behind returns uninitialized at start-of-string! my $recent = join '', @_[$i-1, $i]; if ( $recent =~ /$break_file_re/ ) { next_wav(); $outfile_char_cnt = 0; } } } return $outfile_char_cnt; } # Since neither POE::Component::Enc::Ogg nor any similar # module was available for Win32, convert to Ogg only # on Unician platforms. sub oggify { if ( $codec =~ /ogg/i && $test_flag == 0 ) { if ( $Config::Config{'osname'} !~ /Win/i ) { `nice oggenc -q 3 $wav_path \\ -t \"$tags{name}\" \\ -G \"$tags{genre}\" \\ -d \"$tags{date_time}\" \\ -c \"generator=$tags{generator}\" \\ -c \"wpm=$wpm\" \\ -c \"farn=$farn\"`; unlink $wav_path; # Lose the *.wav } else { print "Sorry! I have not yet figured out how to do oggenc in Win32. \n" } } } # Convert to mp3 and delete the wav. sub mp3ify { if ( $codec =~ /mp3/i && $test_flag == 0) { # The Title Tag my $title = "$tags{name} @ "; if ($farn < $wpm) { $title .= sprintf("%.2f/", $farn) } $title .= "$wpm WPM"; my $cmd; # Are we on Linux or Windows? if ( $Config::Config{'osname'} !~ /Win/i ) { $cmd = 'lame '; # Let LAME itself choose quality and bitrate for best tone } else { $cmd = q|./lame3.100-64/lame.exe |; } $cmd .= qq|--tt \"$title\" |; $cmd .= qq|--ta \"$tags{artist}\" |; # The Comment Tag if ($farn < $wpm) { $cmd .= qq|--tc \"| . sprintf("%.2f wpm spaced at %.2f wpm", $wpm, $farn) . qq|\" |; # 30 chars max allowed } else { $cmd .= qq|--tc \"Generated by $tags{generator}\" |; } $cmd .= qq|--tg \"Audiobook\" |; $cmd .= qq|--tl \"$tags{name}\" |; # So players like Sansa Clip can group in sequence, $cmd .= qq|--tn \"$wav_cnt\" |; $cmd .= qq|--ti \"$graphic\" | if $graphic =~ /\.png/i; $cmd .= qq| \"$wav_path\" |; print "\n\n CMD = $cmd \n\n"; `$cmd`; unlink $wav_path; # Lose the *.wav } } # Put a copy of the text inside output dir. sub embed_text { my $dir = $txt_path; $dir =~ s/\.txt//; if ( copy($txt_path, $dir) ) { print "\nOkay! Text copied into output directory.\n"; } else { print "\nOops! Could not copy text into output directory.\n"; } } # A split on // will separate cxapelito base char from its 'x' # This sub will repair an Esperanto iksa sistemo pair after # a split on // by re-associating the 'x' to its base and # replacing the 'x' with a space. sub fix_iksa_split { for (0 .. $#_ - 1) { if ( $_[1+$_] =~ /x/i ) { $_[1+$_] = ''; # Teleport orphan 'x' from isolation... $_[$_] .= 'x'; # ...re-uniting it to its cxapelito. } } return @_; } # Modified from fisher-yates_shuffle from the Perl Cookbook, 4.17. sub shuffle_chars { my @chars = split //, $_[0]; # on nothing @chars = fix_iksa_split(@chars) if ($lang eq 'eo' || $lang eq 'es'); @chars = @chars[0 .. 100] if $#chars > 100; my $i; for ($i = @chars; --$i;) { my $j = int rand ($i+1); next if $i == $j; @chars[$i,$j] = @chars[$j,$i]; } return join '', @chars; } # Likewise modified from fisher-yates_shuffle from the Perl Cookbook, 4.17. sum sub jumble_words { my @words = split / /, $_[0]; # on spaces my $i; for ($i = @words; --$i;) { my $j = int rand ($i+1); next if $i == $j; @words[$i,$j] = @words[$j,$i]; } return join ' ', @words; } # Create a long random string from a file line having as few as 2 chars. # Inspired by the Koch method of CW training. # TO DO: Fix for tracking Esperanto. sub kochify { $_ = shuffle_chars($_); until (length $_ >= 100) { $_ .= shuffle_chars($_); $_ .= ' ' if int(rand(5)) == 1; } $_ =~ s/^\s+//; # Lose spaces shuffled to leftmost of string. return "$_ "; } $farn = $wpm if $farn == 0; $count_re = '%04s'; # Read input text, convert to audio file(s). sub gus_morse { my $wpm_ = $wpm; my $farn_ = $farn; if ( open INFILE, "<$txt_path" ) { my $line_cnt = 0; new_wav(); my $outfile_char_cnt = 0; while () { ++$line_cnt; # For verbose reporting. $_ = uc($_); # Morse hash keys are UC. $_ =~ s|(\s{7}\*){5}|=|g; # Swap those 5-star subparagraph lines: " * * * * *" $_ =~ s/^\s*//g; # Lose leading whitespace. $_ =~ s/\s+/ /g; # Swap whitespace, compressing plural. punct(\$_); # Remove user-hated punctuation. fix_accents(\$_); # Many UTF-8 chars and punctuiation to ASCII fix_chapter_enumeration(\$_); # Roman numerals to arabic. # Test first because... # Shuffler chokes on 1-elem arrays, and... # Shuffler will mangle embeded *-delimited special ops. if ( (length $_ > 1) && ($_ !~ /\*/) ) { $_ = kochify($_) if $koch_flag ; $_ = shuffle_chars($_) if $rand_flag; $_ = jumble_words($_) if $jmbl_flag; $_ =~ s/\s+/ /g; } $_ .= "\n"; # So can break out on minutes when file has no punctuation. # print "Line = $_ "; # last; $outfile_char_cnt = morsify( $outfile_char_cnt, $line_cnt, (split //, $_) ); last if $wav_cnt >= $quit_limit; # print "LINE 917: $_"; } add_tail() if $about_flag; $write->finish() unless $test_flag; oggify(); mp3ify(); embed_text() unless $test_flag; if ($test_flag) { printf("\nWPM starts at %.f WPM and finishes with %.2f WPM.", $wpm_, $wpm ); if ($farn_ != $wpm_) { printf("\nFarnsworth starts at %.f WPM and finishes with %.2f WPM", $farn_, $farn ); }; printf("\nExpect %s total files.", $wav_cnt+1); $farn = $farn_; $wpm = $wpm_; } return 1; } else { print "Oops! Cannot read text infile '$txt_path': $! \n"; return 0; } } # Test for number of files. Ask if should proceed. Then do as asked. my $wpm_start = $wpm; $test_flag = 1; if (gus_morse()) { # First a test run. printf("\nCreate these %s files? (y/N) > ", $wav_cnt+1); my $foo; chomp( $foo = ); if ($foo =~ m/y/i) { $test_flag =0; $wpm = $wpm_start; $dgt_cnt = 3 if $wav_cnt < 999; $count_re = '%0' . $dgt_cnt . 's'; # Set file enumeration format. $wav_cnt = -1; # Reset zero-based file count. print "\nGenerating audio files..\n"; gus_morse(); # Create files for real. } } print "All done.\n"; if ($windoze_flag) { print "Press ENTER to close. > "; my $foo = ; } sleep 3 if $windoze_flag; # So error will show befor window closes. __END__ _ =head1 NAME Morse Code Text-to-Audio Converter =head1 SYNOPSIS C =head1 DESCRIPTION Reads in text file, writes out Morse code as audio file. Writes initially to C<*.wav> then converts. =head1 MODULES USED C C C C C C =head1 COMMAND LINE OPTIONS Control behavior of text-to-audio conversion using these options. =head2 --p[ath] Valid system file path to input text file. Audio output file(s) will be written to same directory, with extensions for inumeration and codec. Default = './morse.txt' =head2 --w[pm] Character speed as words-per-minute...independent of character spacing. Default = 26.0. =head2 --i[ncr] For each new file generated in series, increase the --wpm by this value in wpm. Default = 0. When used in combination with -f[arn] and -d[ecr], has effect only after Farnsworth has caught up with -w[pm]. =head2 --f[arn] Farnsworth spacing. Spreads out inter-character & inter-word spacing by stretching the gaps to this value in WPM. =head2 --e[xtra] Inter-word spacing. Spreads out inter-word spacing by adding N to the 7 PARIS elements for a space. Default = 0; =head2 --d[ecr] For each new file generated in series, incease the --farn by this value in wpm. Default = 0; =head2 --t[one] Tone of CW characters in Hz. Default = 700 =head2 --m[ins] Once output file exceeds this limit in minutes, a new file will split off at next major punctuation. =head2 --n[opunct] Arg is a string of characters which are to be passed over or substituted when encountered in a text file. =head2 --s[amp] Sampling rate of the C<*.wav> file in Hz. Default = 11025. =head2 --c[odec] Which audio codec (format) as final output? Default = C On Linux you may also choose C<*.ogg> if you have the C package installed or C should you have the C package installed. =head2 --g[raphic] Path to C<*.png> or C<*.jpg> file, if any, for cover-art tag. Used only with C<--codec mp3> on Linux. =head2 --l[ang] Language of input C<*.txt> file and subsequent Morse code charset. =over =item en English via US ASCII (Default) =item eo Esperanto: prefere laŭ Unikodo, anstataŭ la iksa sistemo. =item es Spanish, includes N-tilde char. =item others? Possibly to be supported later. Only just maybe... =back =head2 --r[and] Randomly shuffle all characters in each line. This will convert any text to random practice. =head2 --k[och] Like --r[and] except that the character count for each line will be expanded by adding tupples of all those provided. This is to facilitate the Koch method of teaching Morse code where only one each from a very small subset of characters are provided on a given line. You must provide an input text file containing lines of those characters to be tuppled and suffled. =head2 --v[erbose] Verbosity flag provides verbose feedback. =head2 --g[raphic] Specify a file by name as album art. Must conform to player requirements. Note, that if rather than a command-line arg, this instruction is embedded (see below) then the file name must be all upper-case: C =head2 --h[elp] Help Display basic help/usage message. =head2 --test Test Perform dry run as predictive test, generating no *.wav files. Use this mode to try out various combinations of --wpm, --decr, --incr and view the results in quick order. =head1 SPECIAL CHARACTERS Converter will parse input text for *-delimited special characters. Examples: *AR* *SK* *KN* *BT* *SOS* *ERROR* =head1 EMBEDED OPERATORS Converter will parse input text for *-delimited special operators. These have the same effect as their CLI arg equivalents, but may be embeded mid-stream in the text file so as to take effect mid-stream during the playout. Use them to simulate QSO between multiple stations. =head2 Regular Ops These have same effect as their CLI-arg equivalents. Examples: *TONE=775* *WPM=20.6* *DIFF=8.5* *LANG=EN* *LANG=EO* *QUIT=99* *GRAPIC=COVER.JPG* head2 *RAND=1* and *KOCH=1* These differ from their CLI-arg equivalents in requiring either a 0 or a 1 as integer arguments. On the CLI they are flags, always equal to 1. As embedded operators they may be toggled on and off. =head2 *NEXT* This op has no CLI-arg equivalent. Its function is to trigger a break between consecutive output files. =head1 BUILT-IN REGEX =head2 Roman Numerals Chapter titles enumerated as Roman will all become Arabic up to a limit of 100. =head2 Excess Whitespace Plural spaces will be reduced to single, leading and trailing spaces on lines removed. =head2 Full Stop Sentence final punctuation is followed by a short pause, as would be heard if read by a human narrator. =head1 AUTHOR Gan Uesli Starling > =head1 LICENSE Authored 2006-2023, Gan Uesli Starling, KY8D, VA7KYD, T000000139. No rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 SCRIPT CATEGORIES Convert =cut