#!/usr/bin/env perl

# This is sample code to show how to use XML-RPC to access the
# biological text process tools at:
# http://bionlp.stanford.edu/
#
# See the web page for more information.
#
# This script requires PERL with the RPC::XML package installed, and
# an internet connection.  It will not work through a firewall that
# blocks outgoing HTTP connections.  It has been tested on Solaris and
# Linux and should work on other Unices.  It may work on Windows as
# well -- please let me know if it does.  To run the script, type at
# the command prompt:
#
# > perl bionlp.pl [optional text]
# 
# where [optional text] is some text to search.  If not provided, I
# will search some sample text as a demonstration.
#
# History:
# 2003-04-15  jtc  Initial release.


use strict;
use RPC::XML::Client;


# This should point to the XML-RPC server at bionlp.stanford.edu.
my $BIONLP_URI = "http://bionlp.stanford.edu/xmlrpc";

# This is the default text to search, if no inputs are given.
my $SAMPLE_TEXT = "We observed an increase in mitogen-activated protein kinase (MAPK) activity.";


# Look for abbreviations that are defined in the text.  This function
# takes some text as a string and returns a reference to an array that
# contains an array of (long form, abbreviation, score).  Returns an
# empty array if no abbreviations are found.
sub find_abbreviations ($) {
  my $text = shift;

  my $client = new RPC::XML::Client $BIONLP_URI;
  my $res = $client->send_request('find_abbreviations', $text);
  return $res->value;
}

# Look for gene and protein names in the text.  This function takes
# some text as a string and returns a reference to an array that
# contains an array of (name, start, end, score).  Returns an empty
# array if no names are found.
sub find_gene_and_protein_names ($) {
  my $text = shift;

  my $client = new RPC::XML::Client $BIONLP_URI;
  my $res = $client->send_request('find_gene_and_protein_names', $text);
  return $res->value;
}


my $text = join " ", @ARGV;
$text = $SAMPLE_TEXT unless $text;

print "Searching text:\n$text\n\n";

my $i;
my @abbreviations = @{find_abbreviations $text};
print "I found ", scalar(@abbreviations), " possible abbreviation(s).\n";
foreach $i (0..$#abbreviations) {
  my @data = @{$abbreviations[$i]};
  print "ABBREVIATION=", $data[1], "\n";
  print "LONG FORM=", $data[0], "\n";
  print "SCORE=", $data[2], "\n";
  print "\n";
}

my @names = @{find_gene_and_protein_names $text};
print "I found ", scalar(@names), " possible gene or protein name(s).\n";
foreach $i (0..$#names) {
  my @data = @{$names[$i]};
  print "NAME=", $data[0], "\n";
  print "SCORE=", $data[3], "\n";
  print "\n";
}
