"""This is a Python wrapper for the Smarr/Manning PnpClassifier program.

Functions:
classify        Classify one word, given a training file.
classify_many   Classify many words, given a training file.

PnpClassifier   Call PnpClassifier.

"""
def classify(word, training_file, java_path=None):
    """Run PnpClassifier to classify one word."""
    results = classify_many([word], training_file, java_path=java_path)
    return results[0]

def classify_many(words, training_file, java_path=None):
    """Run PnpClassifier and return a list of clusters."""
    import os
    import tempfile
    import StringIO

    if not os.path.exists(training_file):
        raise ValueError, "I could not find the file: %s" % training_file

    test_file = tempfile.mktemp()
    try:
        lines = ["%s\n" % x for x in words]
        open(test_file, 'w').writelines(lines)
        handle = PnpClassifier(training_file, test_file, java_path)
        # Execute the program now so that we can delete the test file.
        results = parse_PnpClassifier(handle)
    finally:
        if os.path.exists(test_file):
            os.unlink(test_file)
    assert len(results) == len(words), "Unexpected number of results."
    return results

def parse_PnpClassifier(handle):
    """Parse the results from PnpClassifier into a list of (word, prediction, scores)."""
    results = []  # list of (word, prediction, scores)
    for line in handle.readlines():
        x = line.rstrip()
        if not x:
            continue
        cols = x.split(";")
        scores, prediction, word =  cols[:-2], cols[-2], cols[-1]
        prediction = int(prediction)
        scores = map(float, scores)
        results.append((word, prediction, scores))
    return results

def _find_java(*paths):
    import os

    # First, search in the PATH.
    s = os.environ.get("PATH")
    if s:
        for dir in s.split(":"):
            fullname = os.path.join(dir, "java")
            if os.path.exists(fullname):
                return fullname

    # Now search in some common places.
    paths = paths + ("java", "/usr/local/bin/java",
                     "/bin/java", "/usr/bin/java")
    paths = [x for x in paths if x]   # make sure no None
    for p in paths:
        if os.path.exists(p):
            return p

    raise AssertionError, "I could not find java"

def PnpClassifier(training_file, test_file, java_path=None):
    import os
    from Extracto import apppath

    java_path = _find_java(java_path)

    # If I change the current working directory, training_file and
    # test_file will be broken, if they are relative paths.
    cwd = os.getcwd()
    training_file = os.path.join(cwd, training_file)
    test_file = os.path.join(cwd, test_file)
    
    path = apppath.find("PnpClassifier")
    try:
        os.chdir(path)
        w, r = os.popen4("%s PnpClassifier %s %s" % (
            java_path, training_file, test_file))
    finally:
        os.chdir(cwd)
    w.close()
    return r
