"""

Functions:
find    Find the occurrences of amino acids.

"""
import re
from Extracto import memoize

def _make_one_letter_aa_re():
    return re.compile(r"\b[ABCDEFGHIKLMNPQRSTVWXYZ]\d*\b", re.IGNORECASE)
_make_one_letter_aa_re = memoize.memoize(_make_one_letter_aa_re)
    
def _make_three_letter_aa_re():
    # Matches three letter amino acids (and optional numbers).
    # Cys
    # Gly2059
    from Bio.Alphabet import ThreeLetterProtein as tla
    or_aa = "|".join(tla.letters)   # Matches 
    return re.compile(r"\b(%s)\d*\b" % or_aa, re.IGNORECASE)
_make_three_letter_aa_re = memoize.memoize(_make_three_letter_aa_re)

def _make_fullname_aa_re():
    # Matches fill names (and optional prefixes and numbers).
    _AMINO_ACIDS = [
        'alanine', 'arginine', 'asparagine', 'aspartic acid',
        'cysteine', 'glutamine', 'glutamic acid', 'glycine',
        'histidine', 'isoleucine', 'leucine', 'lysine', 'methionine',
        'phenylalanine', 'proline', 'serine', 'threonine',
        'tryptophan', 'tyrosine', 'valine'
        ]
    or_aa = "|".join(_AMINO_ACIDS)
    return re.compile(r"\b\S*(%s)\d*\b" % or_aa, re.IGNORECASE)
_make_fullname_aa_re = memoize.memoize(_make_fullname_aa_re)
    
def _make_mutation_re():
    from Bio.Alphabet import ThreeLetterProtein as tla
    or_aa = "|".join(tla.letters)   # Matches
    return re.compile(r"\b(?:(?:%s)|[A-Z])\d+(?:(?:%s)|[A-Z])\b" %
                      (or_aa, or_aa), re.IGNORECASE)
_make_mutation_re = memoize.memoize(_make_mutation_re)

def _find_one_letter_aa(document):
    # C205, C207, H175, M75
    from Extracto import refns
    str_document = str(document)
    ranges = refns.findall_ranges(_make_one_letter_aa_re(), str_document)
    # Exclude:
    # only 1 digit
    # p53
    i = 0
    while i < len(ranges):
        start, end = ranges[i]
        s = document[start:end]
        if s[0].lower() == 'p':
            del ranges[i]
        elif len(s) <= 2:
            del ranges[i]
        else:
            i += 1
    return ranges

def _find_three_letter_aa(document):
    from Extracto import refns
    return refns.findall_ranges(_make_three_letter_aa_re(), str(document))

def _find_full_names(document):
    from Extracto import refns
    return refns.findall_ranges(_make_fullname_aa_re(), str(document))

def _find_mutations(document):
    from Extracto import refns
    return refns.findall_ranges(_make_mutation_re(), str(document))

def find(document):
    """Return list of (start, end)."""
    from Extracto import rangefns
    
    ranges = []
    ranges += _find_one_letter_aa(document)
    ranges += _find_three_letter_aa(document)
    ranges += _find_full_names(document)
    ranges += _find_mutations(document)
    return ranges
