"""

Functions:
find      Find the occurrences of genes.

"""
from Extracto import memoize

def _get_gene_types():
    from Bio import trie
    from Extracto import datafile
    from Extracto import comments
    semantic_words = trie.trie()
    words = [comments.remove(x.rstrip())
             for x in datafile.open("semantic_gene_types")]
    words = [x.lower() for x in words if x]
    for word in words:
        semantic_words[word] = 1
    return semantic_words
_get_gene_types = memoize.memoize(_get_gene_types)

def _find(sentence):
    from Bio import triefind
    
    types = _get_gene_types()
    x = triefind.find_words(str(sentence).lower(), types)
    ranges = [(x[1], x[2]) for x in x]
    return ranges

def find(document):
    """Return a list of (start, end)."""
    from Extracto.genename import support

    data = []
    for statement, offset in support.doc2statements(document):
        ranges = _find(statement)
        data += [(s+offset, e+offset) for (s, e) in ranges]
    return data
