#
#
#

__author__ = "Magdalena A. Jonikas"
__version__ = "1.0"
__date__ = " 5/12/09 "
__doc__ = """ Convert bpseq to helix format for nast """

import sys

if __name__=='__main__':
    name = sys.argv[1]
    try:
        bpsIn = open('%s.bpseq' % name).readlines()
    except IOError:
        print "The bpseq formatted file %s.bpseq does not exist in this folder, quitting" % name
        sys.exit(1)
    
    print "Opened %s.bpseq to parse secondary structure" % name
    helixOut = open('%s-SS.txt' % name, 'w')
    tertOut = open('%s-PT.txt' % name, 'w')
    FDout = open('%s-FD.txt' % name, 'w')
    primaryOut = open('%s.seq' % name, 'w')

    ssList = []
    hcount = 0
    lcount = 0
    jcount = 0
    ecount = 0
    res2h = {}

    hres = []
    helices = {}
    numList = []
    pairs = {}

    last = 'start'

    for line in bpsIn:
        if line[0]=='#': continue
        cols = line.split()
        if len(cols)!=3: continue
        resi = int(cols[0])
        numList.append(resi)
        resname = cols[1]
        resj = int(cols[2])
        primaryOut.write('%s\n' % resname)
        
        if resj!=0:
            if last!='helix' and resi not in hres:
                hcount+=1
                helices[hcount]=[[],[]]
            if resi in hres:
                rest = 'h%i' % res2h[resi]
                ssList.append(rest)
                continue
            if last=='helix' and (resj+1)!=pairs[(resi-1)]:
                hcount+=1
                helices[hcount]=[[],[]]
                last='nh'
            if last=='helix' and resj!=(helices[hcount][1][-1]-1):
                hcount+=1
                helices[hcount]=[[],[]]
            pairs[resi]=resj
            pairs[resj]=resi
            rest = 'h%i' % hcount
            ssList.append(rest)
            helices[hcount][0].append(resi)
            helices[hcount][1].append(resj)
            res2h[resi]=hcount
            res2h[resj]=hcount
            hres.append(resi)
            hres.append(resj)
            last='helix'
        else:
            last='nh'
            rest='nh'
            ssList.append(rest)

    primaryOut.close()

    for h in helices.keys():
        if len(helices[h][0])<2:
            # This is not a helix, this is a tertiary contact
            tertOut.write('%i %i 1.3 200\n') % (helices[h][0][0], helices[h][0][1])
            # Need to remove these residues from the ssList and any other helix res list:
            # remove h from ssList
            # remove the actual resides from hres
            # remove residues from res2h dictionary
            # reomve residues from pairs dictionary?
        else:
            for res in helices[h][0]: helixOut.write('%i ' % res)
            helixOut.write('\n')
            for res in helices[h][1]: helixOut.write('%i ' % res)
            helixOut.write('\n\n')

    tertOut.close()
    helixOut.close()

    last='start'
    hcount = 0
    coarseSSlist = []
    count=0
    countList = []
    for i in range(len(ssList)):
        type=ssList[i]
        if len(coarseSSlist)==0 or type!=coarseSSlist[-1]:
            coarseSSlist.append(type)
            if len(coarseSSlist)>1:
                lastType = coarseSSlist[-2]
                coarseSSlist[-2]='%s %i' % (lastType, count)
            count=0
            count+=1
        else: count+=1
    lastType = coarseSSlist[-1]
    coarseSSlist[-1]='%s %i' % (lastType, count)

    ecount=0
    jcount=0
    lcount=0
    fdhList = []
    fdlList = []
    fdjList = []
    fdeList = []
    hlist = []
    h2list = []
    for i in range(len(coarseSSlist)):
        type = coarseSSlist[i].split()[0]
        l = int(coarseSSlist[i].split()[1])
        if type[0]=='h':
            h=int(type[1:])
            if type in hlist: 
                h2list.append(type)
                continue
            fdhList.append('H%i %i:%i,%i:%i 10' % (h, helices[h][0][0], helices[h][0][-1], helices[h][1][-1], helices[h][1][0]))
            hlist.append(type)
            continue
        if i==0 and type=='nh': 
            ecount+=1
            coarseSSlist[i]='e%i' % ecount
            nexttype = coarseSSlist[i+1].split()[0]
            h=int(nexttype[1:])
            fdeList.append('E%i H%i:5-%i 10' % (ecount, h, l))
            continue
        if i==(len(coarseSSlist)-1):
            ecount+=1
            coarseSSlist[i]='e%i' % ecount
            lasttype = coarseSSlist[i-1].split()[0]
            h=int(lasttype[1:])
            fdeList.append('E%i H%i:3+%i 10' % (ecount, h, l))
            continue
        lasttype = coarseSSlist[i-1].split()[0]
        nexttype = coarseSSlist[i+1].split()[0]
        if lasttype==nexttype:
            lcount+=1
            coarseSSlist[i]='l%i' % lcount
            h=int(lasttype[1:])
            fdlList.append('L%i H%i 10' % (lcount, h))
        else:
            jcount+=1
            coarseSSlist[i]='j%i' % jcount
            h1 = int(lasttype[1:])
            h2 = int(nexttype[1:])
            if lasttype in h2list: h1e=3
            else: h1e=5
            if nexttype in hlist: h2e=3
            else: h2e=5
            fdjList.append('J%i H%i:%i,H%i:%i 10' % (jcount, h1, h1e, h2, h2e))

    fdList = fdhList+fdlList+fdjList+fdeList

    for item in fdList:
        FDout.write(item)
        FDout.write('\n')
    FDout.close
    print "Wrote the following files:"
    print '%s.seq -- Primary sequence file for use with NAST to start from an unfolded sequence' % name
    print '%s-SS.txt -- Secondary structure file for use with NAST to constrain secondary structure' % name
    print '%s-PT.txt -- Tertiary contact definitions as detected from the bpseq file' % name
    print '  If you wish to constrain additional tertiary contacts in NAST, you still need to add them to this file yourself'
    print '%s-FD.txt --  Fragment definintion file based on secondary structure for use with C2A' % name

    print 'To check the fragments defined in the -FD.txt file, run "python parseFD.py %s-FD.txt"' % name
