# This is a package.

from Extracto.genename import support

from Extracto.genename import Feature
class SymbolFeature(Feature.Feature):
    def __init__(self):
        Feature.Feature.__init__(self, "symbol.svm.classifier")
        #Feature.Feature.__init__(self, "symbol.me.classifier")
        #Feature.Feature.__init__(self, "symbol.nb.classifier")
        # _FEATURES is a list of module names.
        self._FEATURES = [
            'appearance', 
            'morphology',
            'context',
            'endswith_in',
            ]
        
        from Extracto import memoize
        from Extracto import Cache
        # Assume short running process, cache everything...
        self.score_with_context = memoize.memoize(
            self.score_with_context, args2key=support.context2word)

    def _make_vector_with_context(self, document, sentence_range, word_range):
        args = document, sentence_range, word_range
        vector = []
        for module_name in self._FEATURES:
            module = __import__('%s.%s' % (__name__, module_name),
                                {}, {}, [__name__])
            x = module.make_vector_with_context(*args)
            vector.extend(x)
        return vector
        
    def _describe_vector(self):
        return [x[0] for x in self._FEATURES]

    def _can_handle_context(self, document, sentence_range, word_range):
        return 1

support.bind_methods_to_module(__name__, SymbolFeature())

def find(document):
    """Return list of (start, end, score)."""
    import math
    context = support.doc2context(document, fancy_tokenizer=1)
    data = []
    for x, statement_range, word_range in context:
        if not can_handle_context(document, statement_range, word_range):
            continue
        score = score_with_context(document, statement_range, word_range)
        data.append((word_range[0], word_range[1], math.exp(score)))
    return data