"""

Functions:
find    Find the units in a string.

"""
from Extracto import memoize


METRIC_PREFIXES = r"[MmKkDdCcNnPp]|[Mm]ega|[Kk]ilo|[Cc]enti|[Mm]illi|[Mm]icro|[Pp]ico|[Nn]ano"

METRIC_UNITS = [
    r"[mM](?:eter)?",  # meter
    r"Pa(?:scal)?",    # pascal
    r"Da(?:lton)?",    # dalton
    r"[Ll](?:iter)?",  # liter
    r"Hz",             # hertz
    r"[Bb](?:ase)?",   # base
    r"[Jj](?:oule)?",  # joule
    r"[Mm](:?ol)?",    # mole
    ]

UNITS = [
    r"mmHg"
    r"[Bb][Pp]",       # base pair
    ]

def _make_re(unit_expression, is_metric):
    import re
    if is_metric:
        prefix = r"(?:%s)?" % METRIC_PREFIXES
    else:
        prefix = ''
    expression = r"\b[\d.]*%s%s\b" % (prefix, unit_expression)
    return re.compile(expression)
_make_re = memoize.memoize(_make_re)

def find(document):
    """Return list of (start, end)."""
    from Extracto import refns
    from Extracto import rangefns
    
    s = str(document)
    matchobjs = []
    for u in METRIC_UNITS:
        u_re = _make_re(u, 1)
        matchobjs += refns.re_findall(u_re, s)
    for u in UNITS:
        u_re = _make_re(u, 0)
        matchobjs += refns.re_findall(u_re, s)
    ranges = [(m.start(), m.end()) for m in matchobjs]
    return rangefns.munge(ranges)
