#!/usr/bin/perl

# 
# a perl5 program/filter using libwww-perl (LWP) to navigate to
# Altavista's babelfish site to translate text.  Input comes
# from standard input, output goes to standard output
#
# Written on Feb. 1, 2001 by Frank Adelstein.
#

use LWP::UserAgent;
use URI::Escape;
use Getopt::Long;


if (GetOptions(qw {-t=s -o=s --help!}) == 0) {
  # force a usage message
  $opt_help = 1
};

if (defined($opt_help)) {
  # print usage message and terminate
  print <<EOF;
usage: $0 [options] [inputfile]
	Translates input to output using Altavista's babelfish.
	inputfile defaults to stdin if not specified.  Options include:

	-t translation		specifies the translation (default: 
				"en_fr" (English to French), 
				"list" prints out all known translations)
	-o outputfile 		file for output (defaults to stdout)
	--help			 this message
EOF

  exit 0;
}

if (defined($opt_t)) {
  if ($opt_t eq "list") {
    # list all of the currently known valid translations and exit
    print <<EOF;
Currently known translation options (note that this may not match the
currently supported ones for Babelfish):
	en_fr 			English to French (the default)
	en_de 			English to German
	en_it 			English to Italian
	en_pt 			English to Portuguese
	en_es 			English to Spanish
	fr_en 			French to English
	de_en 			German to English
	it_en 			Italian to English
	pt_en 			Portuguese to English
	es_en 			Spanish to English
	de_fr 			German to French
	fr_de 			French to German
	ru_en 			Russian to English
	en_ja 			English to Japanese
	en_ko 			English to Korean
	en_zh 			English to Chinese
	ja_en 			Japanese to English
	ko_en 			Korean to English
	zh_en 			Chinese to English
EOF
    exit;
  }
  $trans = $opt_t;
} else {
  $trans = "en_fr";
}

if (defined($opt_o)) {
  # file name for output
  $outfile = $opt_o;
} else {
  # or stdout by default
  $outfile = ">-";
}

if (defined($ARGV[0])) {
  # file name for input
  $infile = $ARGV[0];
} else {
  # or stdin by default
  $infile = "-";
}

$input = "";
open (IN, $infile) || die "can't open $infile for input";
while (<IN>) {
  $input .= $_;
}
close IN;

# fix up the input a bit
$input =~ s/%/%25/g;
$input =~ s/\+/%2B/g;
$input =~ s/ /\+/g;
$input = uri_escape($input);
 
# create the user agent object
$ua = new LWP::UserAgent;

# starting url
$theurl = 'http://babelfish.altavista.com/tr';

# build the page for the FORM response
my $req = new HTTP::Request 'POST', $theurl;
$req->content_type('application/x-www-form-urlencoded');
$req->content("eng=utf8&doit=done&BabelFishFrontPage=yes&bblType=urltext&urltext=$input&url=http%3A%2F%2F&lp=$trans");

#print "request: " . $req->content . "\n";

# POST it
$res = $ua->simple_request($req);

#print "success: " . $res->is_success . "\n";
#print "status: " .  $res->status_line . "\n";
#print "content: " .  $res->content . "\n";

$buffer = "";
$inmatch = 0;

# extract the translation out of the response
# unfortunately, there are two DIFFERENT responses
# depending on the length of the text (grrrr....)

foreach $line (split('\n', $res->content)) {
  if ($line =~ m|<td>| ||
      $line =~ m|<form action|) {
    # skip this line
    next;
  }
  if ($inmatch && $line =~ m|^<|) {
    # first HTML tag after the match turns off matching
    $inmatch = 0;
  }
  if ($inmatch == 1) {
      # filter out HTML garbage sent with the translation
      $line =~ s/<\/td>//;
      $line =~ s/<\/tr>//;
      $line =~ s/<tr><td colspan=3>//;
      $line =~ s/<textarea rows=\"3\" wrap=virtual cols=\"56\" name=\"q\">//;
      $line =~ s/<td bgcolor=white>//;
      $line =~ s/<tr><td colspan=3><\/tr>//;
      $buffer .= $line . " ";
  }
  if ($line =~ m|<td width=20><img border=0 src=\"/static/images/babelfish/px.gif\" width=20 height=1></td>|) {
      $inmatch = 1;
  }
}

open (OUT, "$outfile") || die "can't open $outfile for output!";
print OUT "$buffer\n";
close OUT;

exit; 

