"""

Functions:
split_sentence
split_sentence_into_words
tokenize

"""

def split_sentence(sentence, word1, index1, word2, index2,
                   preserve_case=0):
    """Return tuple of (left, left_word, middle, right_word, right)"""
    lword, lindex, rword, rindex = word1, index1, word2, index2
    if rindex < lindex:
        lword, lindex, rword, rindex = rword, rindex, lword, lindex
    assert lindex+len(lword) <= rindex, "cooccurrence words overlap!"

    if not preserve_case:
        sentence = sentence.lower()

    left = sentence[:lindex].strip()
    middle = sentence[lindex+len(lword):rindex].strip()
    right = sentence[rindex:].strip()

    return left, lword, middle, rword, right

def split_sentence_into_words(sentence, word1, index1, word2, index2,
                              preserve_case=0):
    """Return tuple of (words on left, left_word, words in middle, right_word, words_on_right)"""
    left, lword, middle, rword, right = split_sentence(
        sentence, word1, index1, word2, index2, preserve_case=preserve_case)
    return tokenize(left), lword, tokenize(middle), rword, tokenize(right)

def tokenize(s):
    """Return a list of words."""
    import mx.TextTools as TT
    from Extracto import tokenizer
    words = tokenizer.tokenize_str(s)
    words = [x for x in words if TT.setfind(x, TT.alphanumeric_set) >= 0]
    return words

