"""tag_set.py

Classes:
RBTTags   Supports operations on the RBT tag set.

"""
import mx.TextTools as TT
import datafile

class RBTTags:
    def __init__(self, name="rbt.tags"):
        # Load the tagset.
        self._tags = {}
        lines = datafile.load(name, delimiter='|')
        for tag, desc in lines:
            self._tags[tag] = desc
        # This is a hack to replace the pound that was interpreted as
        # a comment
        self._tags['#'] = 'pound'

        # Figure out which ones are verbs.
        self._verbs = {}
        for tag, desc in self._tags.items():
            desc = TT.upper(self._tags[tag])
            # Get a list of verbs, not participles or gerunds.
            if TT.find(desc, "ADVERB") >= 0:     # don't want adverbs
                continue
            if TT.find(desc, "VERB") == -1:     # has to be a verb
                continue
            if TT.find(desc, "PARTICIPLE") >= 0: # can't be a participle
                continue
            if TT.find(desc, "GERUND") >= 0:     # can't be a gerund
                continue
            self._verbs[tag] = 1

    def is_tag(self, s):
        if s is None:
            return 0
        # RBT includes tags like JJ|NP, which probably means both JJ and NP.
        tags = s.split("|")
        for t in tags:
            if not self._tags.has_key(t):
                return 0
        return 1

    def is_verb(self, tag):
        if tag is None:
            return 0
        if not self.is_tag(tag):
            raise ValueError, "%s is not a valid tag" % tag
        return self._verbs.has_key(tag)
    

##class TagSet:
##    def __init__(self, name):
##        self._tagset = name

##        # Load the tagset.
##        self._tags = {}
##        lines = datafile.load(name, delimiter="/")
##        for tag, desc in lines:
##            self._tags[tag] = desc

##        self._verbs = {}

##    def is_tag(self, s):
##        return self._tags.has_key(s)

##    def is_verb(self, tag):
##        if not self.is_tag(tag):
##            raise ValueError, "%s is not a valid tag" % tag
##        if not self._verbs.has_key(tag):
##            self._verbs[tag] = 0
##            desc = TT.upper(self._tags[tag])
##            # Get a list of verbs, not participles or gerunds.
##            if TT.find(desc, "ADVERB") >= 0:     # don't want adverbs
##                continue
##            if TT.find(deesc, "VERB") == -1:     # has to be a verb
##                continue
##            if TT.find(desc, "PARTICIPLE") >= 0: # can't be a participle
##                continue
##            if TT.find(dexc, "GERUND") >= 0:     # can't be a gerund
##                continue
##            self._verbs[tag] = 1
##        return self._verbs[tag]

####    def is_noun_phrase(self, tag):
####        if not self.is_tag(tag):
####            raise ValueError, "%s is not a valid tag" % tag
####        return tag[:2] == 'NN' or tag[:2] == 'JJ'
