Note that the original program makes direct use of accented characters in the CSX encoding; here it has been converted to use Unicode equivalents to ensure readability on modern computers. This means that downloading and attempting to run the program as it stands is unlikely to work.
#!/usr/bin/perl #-*-Perl-*- #------------------------------------------------------------------# $description = "Syntax: edmconv [options] file Edmconv allows the user to continue editing the correction files generated by mkmconv. \"File\" should be the name of the Tokunaga Mahābhārata file in CSX format which mconv compared with another, more correct file. -n option asks no questions but does a \"next\" on every form which is not already known to it. -h option prints this help. "; #------------------------------------------------------------------# require 'getopts.pl'; Getopts(':nh'); if ($opt_h || $#ARGV != 0) { print $description; exit 1; } $file = shift; $aborted = 0; @stems = ""; open(STEM_FILE, "mconv.spacing"); while () { next if (/^\#/ || /^\s*$/); if (/\\s\)([][)('A-zÇ-Ḥ]*) \//) { $word = $1; $word =~ s/E/ai/g; $word =~ s/O/au/g; push(@stems, "=$word="); # "=" to anchor boundaries } } close STEM_FILE; open(STEM_FILE, "$file.stems"); while ( ) { next if (/^\#/ || /^\s*$/); if (/\\s\)([][)('A-zÇ-Ḥ]*) \//) { $word = $1; $word =~ s/E/ai/g; $word =~ s/O/au/g; push(@stems, "=$word="); } } close STEM_FILE; @words = ""; open(WORD_FILE, "mconv.words"); while ( ) { chop; next if (/^\#/ || /^\s*$/); push(@words, "=$_="); } close WORD_FILE; open(WORD_FILE, "$file.words"); while ( ) { chop; next if (/^\#/ || /^\s*$/); push(@words, "=$_="); } close WORD_FILE; @compounds = ""; open(COMPOUND_FILE, "mconv.spacing"); while ( ) { next if (/^\#/ || /^\s*$/); if (/\\s\)([^ \/]+ [^ \/]+)\//) { $word = $1; $word =~ s/E/ai/g; $word =~ s/O/au/g; push(@compounds, "=$word="); } } close COMPOUND_FILE; open(COMPOUND_FILE, "$file.compounds"); while ( ) { next if (/^\#/ || /^\s*$/); if (/\\s\)([^ \/]+ [^ \/]+)\//) { $word = $1; $word =~ s/E/ai/g; $word =~ s/O/au/g; push(@compounds, "=$word="); } } close COMPOUND_FILE; open(CORRECTION_FILE, "$file.corrections"); open(NEW_CORRECTION_FILE, ">$file.corrections.new"); open(COMPOUND_FILE, ">>$file.compounds.new"); open(STEM_FILE, ">>$file.stems.new"); open(WORD_FILE, ">>$file.words.new"); while ( ) { if (/^\#/) { print NEW_CORRECTION_FILE $_; next; } print "\nProposed correction: $_"; while (/([^ \/]+) /g) { $word = $1; ($gword = $word) =~ s/^[a']/([a'])/; ($ggword = $gword) =~ s/([iī])$/([$1y])/; $ggword =~ s/([uū])$/([$1v])/; ($grepword = $ggword) =~ s/([][)(])/\\$1/g; if (grep(/=$grepword=/, @stems)) { print NEW_CORRECTION_FILE "\# > Stems $ggword\n"; print "\"$word\" is already recognised as a stem.\n"; next; } elsif (grep(/=$grepword=/, @words)) { print "\"$word\" is already recognised as a word.\n"; if (/($word)( [^ \/]+)[\/ ].*$word.*\/g;/) { $left = $gword . $2; ($right = $left) =~ s/ /./g; $right =~ s/^\(\[.*\]\)/\$2/; $cpd = "s/(\\.|\\s)$left\/\$1$right\/g;"; } print NEW_CORRECTION_FILE "\# > Compounds $cpd\n"; $strip = &strip_case_endings($cpd); if ($strip =~ /\\s\)([^ \/]+ [^ \/]+)\//) { $compound = $1; ($grepcompound = $compound ) =~ s/([][)(])/\\$1/g; unless (grep(/=$grepcompound=/, @compounds)) { push(@compounds, "=$compound="); $strip =~ s/ai/E/g; $strip =~ s/au/O/g; print COMPOUND_FILE "$strip\n"; } } next; } if ($opt_n) { print NEW_CORRECTION_FILE "$_"; next; } else { do { print "*** Is \"$word\" a (S)tem or a (W)ord,\n"; print "*** or would you rather (C)omment it out,\n"; print "*** move to the (N)ext case or (Q)uit? "; $c = ; print "\n"; if ($c =~ /^[sS]/) { print NEW_CORRECTION_FILE "\# > Stems $ggword\n"; push(@stems, "=$ggword="); $ggword =~ s/ai/E/g; $ggword =~ s/au/O/g; print STEM_FILE "s/(\\.|\\s)$ggword /\$1"; $ggword =~ s/\(\[.*\]\)/\$2/g; $ggword =~ s/(\$2.*)\$2/$1\$3/g; print STEM_FILE "$ggword./g;\n"; } elsif ($c =~ /^[wW]/) { if (/($word)( [^ \/]+)[\/ ].*$word.*\/g;/) { $left = $gword . $2; ($right = $left) =~ s/ /./g; $right =~ s/^\(\[.*\]\)/\$2/; $cpd = "s/(\\.|\\s)$left\/\$1$right\/g;"; } print NEW_CORRECTION_FILE "\# > Compounds $cpd\n"; push(@words, "=$ggword="); print WORD_FILE "$ggword\n"; $strip = &strip_case_endings($cpd); if ($strip =~ /\\s\)([^ \/]+ [^ \/]+)\//) { $compound = $1; ($grepcompound = $compound ) =~ s/([][)(])/\\$1/g; unless (grep(/=$grepcompound=/, @compounds)) { push(@compounds, "=$compound="); $strip =~ s/ai/E/g; $strip =~ s/au/O/g; print COMPOUND_FILE "$strip\n"; } } } elsif ($c =~ /^[cC]/) { print NEW_CORRECTION_FILE "\# Commented out $_"; } elsif ($c =~ /^[nN]/) { print NEW_CORRECTION_FILE "$_"; next; } elsif ($c =~ /^[qQ]/) { print NEW_CORRECTION_FILE "$_"; while ( ) { print NEW_CORRECTION_FILE "$_"; } $aborted = 1; } } until ($c =~ /^[nNqQcCwWsS]/); } } } print COMPOUND_FILE "\n"; print STEM_FILE "\n"; print WORD_FILE "\n"; close CORRECTION_FILE; close NEW_CORRECTION_FILE; close COMPOUND_FILE; close STEM_FILE; close WORD_FILE; do { print "\nSave changes? (Y/N) "; $c = ; print "\n"; if ($c =~ /^[yY]/) { rename("$file.corrections.new", "$file.corrections"); system "cat $file.compounds.new >>$file.compounds"; system "cat $file.stems.new >>$file.stems"; system "cat $file.words.new >>$file.words"; unlink ("$file.compounds.new", "$file.stems.new", "$file.words.new"); } elsif ($c =~ /^[nN]/) { $aborted = 1; unlink ("$file.corrections.new", "$file.compounds.new", "$file.stems.new", "$file.words.new"); } } until ($c =~ /^[yYnN]/); if ($aborted) { print "Use \"edmconv $file\" to resume editing. Do not use \"mkmconv\"\n"; print "unless you want to scrap what you have done and start again.\n\n"; } else { do { print "\nAdd these corrections to the master files? (Y/N) "; $c = ; print "\n\n"; if ($c =~ /^[yY]/) { system "cat $file.stems >>mconv.spacing"; system "cat $file.compounds >>mconv.spacing"; open(SPACING_FILE, ">>mconv.spacing"); print SPACING_FILE "\n#\ns/\\.//g unless \$unbatched;"; print SPACING_FILE " # END OF BATCH\n#\n\n\n"; close SPACING_FILE; system "cat $file.words >>mconv.words"; } } until ($c =~ /^[yYnN]/); } sub strip_case_endings { # Generalise compound corrections by removing inflexional endings # where this will not lead to ambiguity. Watch out for cases such # as the apparent instrumental (actually vocative) bhīmasena. local ($line) = $_[0]; return $line if $line =~ s/[aiu][ṃm]\//\//g; return $line if $line =~ s/([^s])ena\//$1\//g; return $line if $line =~ s/eṇa\//\//g; return $line if $line =~ s/[iu][ṇn]ā\//\//g; return $line if $line =~ s/([^dm])ayā\//$1\//g; return $line if $line =~ s/([^km])āya\//$1\//g; return $line if $line =~ s/āyai\//\//g; return $line if $line =~ s/a[yv]e\//\//g; return $line if $line =~ s/[yv]ai\//\//g; return $line if $line =~ s/([^p])ā[tdcj]\//$1\//g; return $line if $line =~ s/āyā[ḥśṣs]\//\//g; return $line if $line =~ s/[eo][ḥśṣsr]\//\//g; return $line if $line =~ s/[iu][ṇn](a[ḥśṣs]|o)\//\//g; return $line if $line =~ s/asya\//\//g; return $line if $line =~ s/au\//\//g; return $line if $line =~ s/āyā[ṃm]\//\//g; return $line if $line =~ s/yā[ṃm]\//\//g; return $line if $line =~ s/[aāiīuūṛo]bhyā[ṃm]\//\//g; return $line if $line =~ s/[āīūṝ][ḥśṣsrn]\//\//g; return $line if $line =~ s/[āīūṝ][ṇn]i\//\//g; return $line if $line =~ s/ai[ḥśṣsr]\//\//g; return $line if $line =~ s/[āiīuūṛo]bhi[ḥśṣsr]\//\//g; return $line if $line =~ s/[āiīuūṛeo]bhy(a([ḥśṣs]?)|o)\//\//g; return $line if $line =~ s/[āiīūṝ][ṇn]ā[ṃm]\//\//g; return $line if $line =~ s/ā[ṃm]\//\//g; return $line if $line =~ s/ās[uv]\//\//g; return $line if $line =~ s/[eiīuūṛ]ṣ[uv]\//\//g; return $line if $line =~ s/[aāiīuūeo]\//\//g; return $line if $line =~ s/a[ḥśṣs]\//\//g; return $line if $line =~ s/[iu][ḥśṣsr]\//\//g; return $line; }