Note that the original program makes direct use of accented characters in the CSX encoding; here it has been converted to use Unicode equivalents to ensure readability on modern computers. This means that downloading and attempting to run the program as it stands is unlikely to work.
#!/usr/bin/perl #-*-Perl-*- #------------------------------------------------------------------# $description = "Syntax: mconv [options] file Mconv converts \"file\", which should be an uncompressed Tokunaga Mahābhārata file, to CSX format. As part of this process it inserts corrections generated by mkmconv. -h option prints this help. -s option runs in \"stupid\" mode, i.e. deliberately does not make any of the corrections suggested by mkmconv. "; #------------------------------------------------------------------# require 'getopts.pl'; Getopts(':hs'); if ($opt_h || $#ARGV != 0) { print $description; exit 1; } $filename = $ARGV[0]; @problems = (); $date = `Date`; if ($opt_s) { open(STDOUT, "|mconv.cleanup"); } else { # The following line (commented out) runs the DANGEROUS unbatched option. # Check output very carefully if you use it. # open(STDOUT, "|perl -sp mconv.spacing -unbatched |mconv.spacing |mconv.cleanup"); # The standard safer option follows: open(STDOUT, "|mconv.spacing |mconv.cleanup"); } print "% This file generated by\n"; print "% mconv $filename\n"; print "% $date\%\n"; while (<>) { # Correct layout and transliteration next if (/^%/|/^\s/); study; s/\015//g; # Get rid of DOS CR char chop; s/^0//; # Get rid of leading zeros if (/(\s*({.*})$)/) { $uvaaca = $2; s/$1//; print substr($_, 0, 8) . " $uvaaca\n"; } tr/././s; s/\///; s/\/.*$//; s/\./ /g; s/ $//; s/aa/ā/g; s/ii/ī/g; s/uu/ū/g; s/R/ṛ/g; s/ṛṛ/ṝ/g; s/T/ṭ/g; s/D/ḍ/g; s/N/ṇ/g; s/z/ś/g; s/S/ṣ/g; # Temporary 1-letter versions of ai and au s/ai/E/g; s/au/O/g; # Correct visarga, anusvāra and ṅ, ñ s/([aāiīuūṛeoEO])h([ ;kpśṣs]|$)/$1ḥ$2/g; s/m( |;)([^aāiīuūṛeoEO])/ṃ$1$2/g; s/sam([^ ;aāiīuūṛeoEO])/saṃ$1/g; s/saṃrā([jṭḍ])/samrā$1/g; s/saṃya([ñṅkg])/samya$1/g; s/( |;)pum([ ;]?)([^aāiīuūṛeoEO])/$1puṃ$2$3/g; s/( |;)puṃbh/$1pumbh/g; s/([aāiīuūṛṝeoEO])m([kgcjṭḍṇtdmśṣsh])/$1ṃ$2/g; s/śaṃkh/śaṅkh/g; s/( |;)saṃga /$1saṅga /g; s/ml( |$)/ṁl$1/g; s/n([cj])/ñ$1/g; s/([cj])n/$1ñ/g; s/n([kg])/ṅ$1/g; s/( |;)(a?)pratyan( |;|$)/$1$2pratyaṅ$3/g; s/( |;)(a?)pratyann( |;)/$1$2pratyaṅṅ$3/g; s/( |;)(a?)prān( |;|$)/$1$2prāṅ$3/g; s/( |;)(a?)parān( |;)(man|mukh)/$1$2parāṅ$3$4/g; # Correct anomalies and common typos that are best dealt with before sandhi s/(k[^ ;]+)(ci[tdncjl]|cana)($|[ ;])/$1 $2$3/g; s/([ ;])kāñ cana/$1kāñcana/g; s/([krṛṝ])s/$1ṣ/g; s/([ ;])kṛṣar/$1kṛsar/g; s/([ ;])bṛṣ/$1bṛs/g; s/([ ;])viṣvakṣen/$1viṣvaksen/g; s/([ ;])nṛṣiṃh/$1nṛsiṃh/g; s/ṣt/ṣṭ/g; s/(dur|ni[rṣ]|par[iyā]|pr[aāoeEO]|antar)([kgṅpbmyrvh]*)([aāiīuūeṛṝoEO]*)([aāiīuūeṛṝoEOkgṅpbmyrvh]*)n/$1$2$3$4N/g; s/([rṛṝṣ])([aāiīuūeṛṝoEOkgṅpbmyrvh]*)n([aāiīuūṛṝeoEOnmyv])/$1$2ṇ$3/g; s/ṇn([^ ;])/ṇṇ$1/g; s/ṇn/nn/g; s/N/n/g; s/nirvinn/nirviṇṇ/g; s/prayān([aāeo])/prayāṇ$1/g; s/ṇṛt/nṛt/g; s/ṇart/nart/g; s/ṇabh/nabh/g; s/ṇand/nand/g; s/ṇaṣṭ/naṣṭ/g; s/prāpṇ/prāpn/g; s/ghṇ/ghn/g; s/([iīuūṛeoEO])s([uv])($|[ ;])/$1ṣ$2$3/g; s/([eo])s([aāiīuūṛeoEO])/$1ṣ$2/g; s/isy/iṣy/g; s/maṇīṣ/manīṣ/g; s/av([ ;])/āv$1/g; s/tisth/tiṣṭh/g; s/nisth/niṣṭh/g; s/nEsth/nEṣṭh/g; s/ṣaṇ([ ;])/ṣaṭ$1/; s/ṇ([ ;])/n$1/g; s/rūdh/rūḍh/g; s/cest/ceṣṭ/g; s/pattr/patr/g; s/([aāiīuūṛṝeoEO])ṛ([ṣṇ])/$1r$2/g; s/([^ ;])ayāmās/$1ayām ās/g; s/ar([ ;])([kpśṣs])/aḥ$1$2/g; s/(tira|pura|nama)([sḥ]) (kṛ|kar|kur[uv])/$1s$3/g; s/preṣp/preps/g; # Correct vowel sandhi s/[aā] [aā]/ā/g; s/[iī] [iī]/ī/g; s/[uū] [uū]/ū/g; s/[aā] [iī]/e/g; s/[aā] [uū]/o/g; s/[aā] ṛ/ar/g; s/[aā] [eE]/E/g; s/[aā] [oO]/O/g; s/[iī]( [aāuūṛeoEO])/y$1/g; s/[uū]( [aāiīṛeoEO])/v$1/g; s/ṛ ([aāiīuūeoEO])/r$1/g; s/([eo]) a/$1 '/g; s/[eo]( [āiīuūṛeoEO])/a$1/g; s/E( [aāiīuūṛeoEO])/ā$1/g; s/O( [aāiīuūṛeoEO])/āv$1/g; # Deal with ";" s/([aāiīuūṛeoEO]);([aāiīuūṛeoEO])/$1:$2/g; s/;/; /g; $count = (tr/aāiīuūṛṝḷeoEO//); if ($count > 18) { if (!/; /) { if (/:/) { s/:/; /g; $count = (tr/;//); if ($count > 1) { push(@problems, $_); } } else { push(@problems, $_); } } } s/:/ /; # Correct consonant sandhi s/s(;? |$)/ḥ$1/g; s/ (vā|tva|ru|sru|mu)c(;? |$)/ $1k$2/g; s/ (ru|ṛtvi|vaṇi|bhiṣa|sra|ūr)j(;? |$)/ $1k$2/g; s/ (di|dṛ|spṛ)ś(;? |$)/ $1k$2/g; s/ (samrā|parivrā)j(;? |$)/ $1ṭ$2/g; s/ viś(;? |$)/ viṭ$1/g; s/[cj](;? )/t$1/g; s/([kgṭḍtdpb])\1(;? )/$1$2/g; s/k(;? )([gjḍdbyrlvhaāiīuūṛeoEO])/g$1$2/g; s/g(;? )([kcṭtpśṣs])/k$1$2/g; s/[kg](;? )([nm])/ṅ$1$2/g; s/ṭ(;? )([gjḍdbyrlvhaāiīuūṛeoEO])/ḍ$1$2/g; s/ḍ(;? )([kcṭtpśṣs])/ṭ$1$2/g; s/[ṭḍ](;? )([nm])/ṇ$1$2/g; s/t(;? )([gjḍdbyrlvhaāiīuūṛeoEO])/d$1$2/g; s/(d|dh)(;? )([kcṭtpśṣs])/t$2$3/g; s/([td]|dh)(;? )([nm])/n$2$3/g; s/p(;? )([gjḍdbyrlvhaāiīuūṛeoEO])/b$1$2/g; s/(b|bh)(;? )([kcṭtpśṣs])/p$2$3/g; s/([pb]|bh)(;? )([nm])/m$2$3/g; s/([gḍdb])(;? )h/$1$2$1h/g; s/n(;? )c/ṃś$1c/g; s/n(;? )ṭ/ṃṣ$1ṭ/g; s/n(;? )t/ṃs$1t/g; s/n(;? )([jś])/ñ$1$2/g; s/n(;? )ḍ/ṇ$1ḍ/g; s/n(;? )l/ṁl$1l/g; s/t(;? )c/c$1c/g; s/t(;? )ś/c$1ch/g; s/t(;? )ṭ/ṭ$1ṭ/g; s/d(;? )j/j$1j/g; s/d(;? )ḍ/ḍ$1ḍ/g; s/d(;? )l/l$1l/g; s/ḥ(;? )c/ś$1c/g; s/ḥ(;? )ṭ/ṣ$1ṭ/g; s/ḥ(;? )t/s$1t/g; s/āḥ(;? )([^kpśṣs])/ā$1$2/g; s/aḥ(;? )([āiīuūṛeoEO])/a$1$2/g; s/aḥ(;? )([gjḍdnbmyrlvh])/o$1$2/g; s/aḥ(;? )a/o$1'/g; s/ḥ(;? )([aāiīuūṛeoEOgjḍdnbmyrlvh])/r$1$2/g; s/ar(;? )r/ā$1r/g; s/ir(;? )r/ī$1r/g; s/ur(;? )r/ū$1r/g; s/r(;? )r/$1r/g; print; print "\n"; } if (@problems != ()) { print STDERR "\nThe file $filename.log contains important information.\n\n"; open(LOGFILE, ">$filename.log"); print LOGFILE "This file generated by\n"; print LOGFILE "mconv $filename\n"; print LOGFILE "$date\n"; print LOGFILE "Mconv regards the following line(s) as problematical:\n"; print LOGFILE "they contain either too many semicolons or too few.\n"; print LOGFILE "(Note that the lines are quoted from an early stage\n"; print LOGFILE "of mconv's processing, so there may be minor textual\n"; print LOGFILE "divergences from the final output.)\n\n"; do { $line = shift(@problems); $line =~ s/E/ai/g; $line =~ s/O/au/g; print LOGFILE "$line\n"; } until (@problems == ()); close LOGFILE; }