#!/usr/bin/env python

"""
=========================================================================
Package:    - Code for the Graphical User Interface fontend to the 
              Haddock model package in the CCPN data model. 
            - Code for the export of a Haddock compatible project. A 
              Haddock compatible project can either be a parameter file
              ready for submission to the Haddock webserver or a
              directory structure with necessary files for use with a 
              localy installed version of Haddock.

Dependencies: The CCPN Haddock package requires CCPN data model version
              2.0 or higher. The export of a webserver compatible 
              parameter file requires Haddock webserver version 2.1 or 
              higher and a valid user account. The export of a 'classic' 
              Haddock project requires Haddock version 2.0 or higher.

Copyright and License information:
              The Haddock data model as implemented in the CCPN data
              model as well as the use of CCPN GUI code elements is 
              licenced to the CCPN Projects (Copyright (C) 2008) and
              distributed under the terms of the GNU Lesser General
              Public License.
            
              The Haddock project export code as well as the use of 
              Haddock software is covert in the Haddock License
              agreement (Copyright (C) 2008 Haddock Project, Bijvoet
              Center for Biomolecular Research, Utrecht University,
              The Netherlands).

GNU LGPL:        This library is free software; you can redistribute it 
              and/or modify it under the terms of the GNU Lesser General 
              Public License as published by the Free Software 
              Foundation; either version 2.1 of the License, or (at 
              your option) any later version.
 
              A copy of this license can be found in LGPL.license
 
              This library is distributed in the hope that it will be 
              useful, but WITHOUT ANY WARRANTY; without even the implied 
              warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
              PURPOSE. See the GNU Lesser General Public License for 
              more details.
 
              You should have received a copy of the GNU Lesser General 
              Public License along with this library; if not, write to 
              the Free Software Foundation, Inc., 59 Temple Place, Suite 
              330, Boston, MA 02111-1307 USA.

Information:  For further information regarding CCPN, please contact:
              - CCPN website (http://www.ccpn.ac.uk/)
              - email: ccpn@bioc.cam.ac.uk
              
              For further information regarding Haddock, please contact
              Alexandre M.J.J. Bonvin:
              - http://haddock.chem.uu.nl
              - email: a.m.j.j.bonvin@uu.nl    

Citing:          If you are using this software for academic purposes, we 
                suggest quoting the following references:

              For CCPN:    
              Rasmus H. Fogh, Wayne Boucher, Wim F. Vranken, Anne
              Pajon, Tim J. Stevens, T.N. Bhat, John Westbrook, John M.C. 
              Ionides and Ernest D. Laue (2005). A framework for 
              scientific data modeling and automated software development. 
              Bioinformatics 21, 1678-1684.
            
              For Haddock:
              Cyril Dominguez, Rolf Boelens and Alexandre M.J.J. Bonvin 
              (2003). HADDOCK: a protein-protein docking approach based 
              on biochemical and/or biophysical information. 
              J. Am. Chem. Soc. 125, 1731-1737.
            
              S.J. de Vries, A.D.J. van Dijk, M. Krzeminski, M. van Dijk, 
              A. Thureau, V. Hsu, T. Wassenaar and A.M.J.J. Bonvin (2007) 
              HADDOCK versus HADDOCK: New features and performance of 
              HADDOCK2.0 on the CAPRI targets. 
              Proteins: Struc. Funct. & Bioinformatic 69, 726-733.    
=========================================================================
"""

import sys, math
from os.path     import join, exists, split, isdir, isfile
from os          import rename

from ccp.general.Io                      import getStdChemComps, getChemComp
from ccpnmr.analysis.core.MoleculeBasic  import findMatchingChain, getLinkedResidue
from ccpnmr.analysis.core.StructureBasic import getBestNamingSystem, findMatchingMolSystemAtom
from ccpnmr.format.converters.CnsFormat  import CnsFormat
from ccp.util.Molecule                   import makeChain, addMolResidues, makeMolecule, nextChainCode
from memops.gui.MessageReporter          import showOkCancel,showWarning,showYesNo
from memops.universal.Util               import returnInt, returnFloat

from HaddockLocal                        import rdcProtocolStore, daniProtocolStore

def addRdcParam(run,termId):

    """Add an RDC energy protocol to the RDC energyTerm. Both termId's need to match so that the RDC file can
       be linked to its RDC energy protocol
    """
    energyTermStore = run.newHaddockEnergyTerm(code='rdcProtocolStore',termId=termId)
    
    terms = rdcProtocolStore['terms'].keys()
    for term in terms:
        energyTerm = energyTermStore.newEnergyTermParameter(code=term,value=rdcProtocolStore['terms'][term])

def addDaniParam(run,termId):

    """Add an DANI energy protocol to the DANI energyTerm. Both termId's need to match so that the DANI file can
       be linked to its DANI energy protocol
    """
    energyTermStore = run.newHaddockEnergyTerm(code='daniProtocolStore',termId=termId)

    terms = daniProtocolStore['terms'].keys()
    for term in terms:
        energyTerm = energyTermStore.newEnergyTermParameter(code=term,value=daniProtocolStore['terms'][term])

def getPdbString(model,chains,chainRename=None,blankchain=False,fileEnd='END'):

    """Description:    Convert a CCPN molEnsemble into a PDB formatted string Atom names will use IUPAC nomenclature
       Input:         MolStructure.StructureEnsemble, list of chains (pdb One Letter Code) to check if we need to 
                    write only a subset of chains of the MolStructure. The chainID can be set to the haddock partner
                    code by using the chainRename argument. If the chain argument is set to True both chain and segid
                    are printed (needed for parameter file).
       Output:         PDB structure String
    """
    project = model.root
    cnsFormat = CnsFormat(project,guiParent=None)

    if chainRename: forceExportSegId = chainRename
    else: forceExportSegId = " "

    if blankchain: forceExportChainId = " "
    else: forceExportChainId = forceExportSegId
    
    if not chains: chainlist = project.findFirstMolSystem().sortedChains()
    
    cnsFormat.writeCoordinates('void',
                                structures = [model],
                                forceExportSegId = forceExportSegId,
                                forceExportChainId = forceExportChainId,
                                resetMapping=True,
                                exportChains = chains,
                                minimalPrompts = True,
                                noWrite = True)

    cnsFormat.coordinateFile.write(endStatement=fileEnd,writeString=True)

    return cnsFormat.coordinateFile.coordFileString

def getAirSegments(haddockPartner):
    
    """Description: Get lists of the active and passive residues for ambiguous
                       interaction restraints from a haddock interacting partner.
                       Numbers output are in the HADDOCK numbering system for the
                       partner (i.e. may be different from CCPN chains)
       Input:         Haddock.HaddockPartner
       Output:         Dict of Lists (Dict['active'/'passive'] = List of residues)
    """
    segmentDict = {'active':[],'passive':[]}
    
    for chain in haddockPartner.chains:
        residues = [(r.residue.seqCode, r) for r in chain.residues]
        residues.sort()
        
        for seqId, residue in residues:
            interaction = residue.interaction
            if interaction == 'active': segmentDict['active'].append(seqId)
            elif interaction == 'passive': segmentDict['passive'].append(seqId)
    
    return segmentDict

def getFlexibleResidues(haddockPartner,export='zone'):
    
    """Description: Get lists of the flexible and semi-flexible residues for a
                       haddock interacting partner. Numbers output are in the HADDOCK
                       numbering system for the partner (i.e. may be different from CCPN chains)
       Arguments:    export = 'zone', will return semi and fully flexible residues as a list
                    of tuples containing start and end residues of residue zones.
                    export = 'list', will return semi and fully flexible residues as a plain
                    list of residues. 
       Input:         Haddock.HaddockPartner
       Output:         Dict of Lists (Dict['semi'/'full'] = List of residues)
    """
    segmentDict = {'semi':[],'full':[]}
    
    for chain in haddockPartner.chains:
        residues = [(r.residue.seqCode, r) for r in chain.residues]
        residues.sort()
        
        for seqId, residue in residues:
            flexibility = residue.flexibility
            if flexibility == 'semi': segmentDict['semi'].append(seqId)
            elif flexibility == 'full': segmentDict['full'].append(seqId)
    
    if export == 'zone':
        for flex in segmentDict:
            if len(segmentDict[flex]):
                flexzones = []
                flexresidues = segmentDict[flex]
                flexresidues.sort()
                start = flexresidues[0]
                for r in range(len(flexresidues)):
                    if not r == len(flexresidues)-1:
                        if not flexresidues[r+1] == flexresidues[r]+1:
                            flexzones.append((start,flexresidues[r]))
                            start = flexresidues[r+1]
                    else:
                        flexzones.append((start,flexresidues[r]))
                segmentDict[flex] = flexzones
        return segmentDict         
    else: return segmentDict

def makeBackup(infile):
    
    """Make backup of files or directories by checking if file (or backup as _*) is there and rename"""
    
    if isfile(infile) or isdir(infile):
        i = 1
        while i < 100:
            if isfile(infile+'_'+str(i)) or isdir(infile+'_'+str(i)): i += 1
            else:
                rename(infile, infile+'_'+str(i))
                print("File %s exists\nrename to %s" % (infile, infile+'_'+str(i)))
                break

def copyRun(run, nmrConstraintStore=None):
    """Descrn: Make a HADDOCK run based upon an existing one. Copies existing
               attributes, links and children (makes equivalent)
       Inputs: Hadock.Run, NmrConstraint.NmrConstraintStore
       Output: Hadock.Run
    """

    project = run.haddockProject

    runB = project.newRun(nmrConstraintStore=nmrConstraintStore)

    for metaAttr in run.metaclass.attributes:
        if metaAttr.changeability == 'changeable':
            attrName = metaAttr.name
            value = getattr(run,attrName)
            setattr(runB,attrName,value)

    for symmetry in run.symmetryRestraints:
        runB.addSymmetryRestraint(symmetry)

    for scoringWeight in run.scoringWeights:
        runB.newScoringWeight(stage=scoringWeight.stage,
                                term=scoringWeight.term,
                                value=scoringWeight.value)

    for term in run.haddockEnergyTerms:
        termB = runB.newHaddockEnergyTerm(code=term.code,
                                        termId=term.termId,
                                        name=term.name,
                                        fileName=term.fileName,
                                        details=term.details)
        
        if run.nmrConstraintStore is nmrConstraintStore:
            termB.constraintList = term.constraintList
    
        for param in term.energyTermParameters:
            termB.newEnergyParameter(code=param.code, value=param.value)

    return runB

def setRunConstraintSet(run, nmrConstraintStore):
    """Descrn: Set the constraint set for a haddock run. Sets a new one
               if none exists and otherwise if changing constraint sets
               the HaddockEnergy termns are checked and moved over for the
               different constraint lists.
       Inputs: Hadock.Run, NmrConstraint.NmrConstraintStore
       Output: None
    """

    if nmrConstraintStore is not run.nmrConstraintStore:
        for term in run.haddockEnergyTerms:
            term.constraintList = None
        
        run.__dict__['nmrConstraintStore'] = nmrConstraintStore
        
    return run

def setPartnerEnsemble(haddockPartner, ensemble):
    """Descrn: Set the structure ensemble for a haddock partner.
               Sets molSystem and chains as required.
       Inputs: Hadock.HaddockPartner, MolStructure.StructureEnsemble
       Output: None
    """

    haddockPartner.molSystem = ensemble.molSystem

    chains = [c.chain for c in ensemble.coordChains]

    setPartnerChains(haddockPartner, chains)

def setPartnerChains(haddockPartner, chains):
    """Descrn: Set the haddock chains (mappings to ccp molSystem chains) for a haddock partner
               using a list of MolSystem.Chains - adds and deleted haddock chains as needed.
               Also sets the haddock resiues appropriately.
       Inputs: Hadock.HaddockPartner, Lost of MolSystem.Chains
       Output: None
    """

    for hChain in haddockPartner.chains:
        if hChain.chain not in chains: hChain.delete()

    molType = None
    for chain in chains:
        if molType is None: molType = chain.molecule.molType
        elif molType != chain.molecule.molType:
            print 'CCPN-HADDOCK setPartnerChains failed: Chains not of same type'
            return

    for chain in chains:
        hChain = haddockPartner.findFirstChain(chain=chain)
        if not hChain: haddockPartner.newChain(chain=chain)

        molType = chain.molecule.molType

    if molType == 'DNA':
        haddockPartner.isDna = True
        haddockPartner.forceFieldCode = 'DNA'

    elif molType == 'RNA' :
        haddockPartner.isDna = True
        haddockPartner.forceFieldCode = 'RNA'

    else:
        haddockPartner.isDna = False
        haddockPartner.forceFieldCode = 'TOPALLHDG'

    # Curate all haddock residue numbers and CCPN residue links
    hSeqId = 1
    for hChain in haddockPartner.sortedChains():
        chain     = hChain.chain
        residues  = chain.sortedResidues()
        hResidues = hChain.sortedResidues()

        resDict = {}
        for hResidue in hResidues:
            residue = hResidue.residue
            resDict[residue] = hResidue

        for residue in residues:
            hResidue = resDict.get(residue)

            if hResidue is None: hResidue = hChain.newResidue(haddockSeqId=hSeqId,residue=residue)
            else: hResidue.haddockSeqId = hSeqId

            hSeqId += 1

def getStructureFromFile(molSystem, fileName, fileType='rough', doWarnings=True):
    """Descrn: Creates a structure ensemble belonging to a molecular system 
               by loading a PDB style file.
               Option to specify the type of file loaded (proper PDB, rough PDB or CNS style)
       Inputs: MolSystem.MolSystem, String (File Name), String (file type)
       Output: MolStructure.StructureEmsemble
    """

    if fileType == 'true': dict = makeStructureDictFromPdb(fileName, fileType='pdb')
    elif fileType == 'cns': dict = makeStructureDictFromPdb(fileName, fileType='cns')
    else: dict = makeStructureDictFromRoughPdb(fileName)

    structures = []

    if dict: structures = makeStructures(dict, molSystem, doWarnings=doWarnings)

    return structures

def makeStructureDictFromRoughPdb(fileName):
    """Descrn: Make a structure dictionary [model][chainCode][resNum][atomName] =
               (x,y,z coords) from a non-standard PDB file.
       Inputs: String (file name)
       Output: Dictionary
    """

    # tracking dictionary
    trackDict = {}
    lastSeqId = 0

    dict = {}
    modelNum = 0
    fileHandle = open(fileName)
    for line in fileHandle.readlines():
        key = line[0:6].strip()
        if key == 'ENDMDL':
            modelNum +=1
            lastSeqId = 0
            trackDict = {}
        elif key == 'ATOM':
            atomName  = line[12:16].strip()
            altLoc    = line[16:17]
            resName   = line[17:20].strip()
            chainId   = line[21:22].strip()
            seqCode   = returnInt(line[22:26])
            insertCode = line[26:27]
            x         = returnFloat(line[30:38])
            y         = returnFloat(line[38:46])
            z         = returnFloat(line[46:54])
            occupancy = returnFloat(line[54:60])
            bFactor   = returnFloat(line[60:66])
            segId     = line[72:76].strip()
            atomType  = line[76:78].strip()
            
            if not len(chainId) == 1:
                if segId: chainId = segId
                else: chainId = 'A'
            
            if dict.get(modelNum) is None: dict[modelNum] = {}
            if dict[modelNum].get(chainId) is None: dict[modelNum][chainId] = {}

            tt1 = (chainId,seqCode,insertCode)
            seqId = trackDict.get(tt1)
            if seqId is None:
                  seqId = trackDict[tt1] = lastSeqId + 1
                  lastSeqId = seqId
                  dict[modelNum][chainId][seqId] = {}

            tt2 = (atomName, altLoc)
            if dict[modelNum][chainId][seqId].get(tt2) is None:
                  coordDict = {}
                  dict[modelNum][chainId][seqId][tt2] = coordDict
            else:
                  raise Exception("Duplicate record: %s %s %s %s %s" % (chainId, seqCode, insertCode, atomName, altLoc))

            coordDict['resName'] = resName
            coordDict['seqCode'] = seqCode
            coordDict['insertCode'] = insertCode
            coordDict['segId'] = segId
            coordDict['x'] = x
            coordDict['y'] = y
            coordDict['z'] = z
            coordDict['occupancy'] = occupancy
            coordDict['bFactor']   = bFactor
            coordDict['atomType']  = atomType
            
    return dict

def makeStructures(strucDict, molSystem, doWarnings=False):
    """Descrn: Makes structure ensemble from a structure dictionary
               [model][chainCode][resNum][atomName] = (x,y,z coords)
               in a mol system. Options to supress warnings and thus
               automatically link non-identical, but similar chains
               or make a new chain if none is found,
       Inputs: Dictionary, MolSystem.MolSystem, Boolean
       Output: MolStructures.StructureEnsemble
    """
    project = molSystem.root
    eId = 1
    while project.findFirstStructureEnsemble(molSystem=molSystem, ensembleId=eId): eId += 1

    structure = project.newStructureEnsemble(molSystem=molSystem, ensembleId=eId)

    models = []
    for m in strucDict.keys(): models.append((m, structure.newModel()))

    usedChains = []
    failedAtoms = []
    msAtomDict = {}
    chainsDict = strucDict[0]

    chCodes = list(chainsDict.keys())
    chCodes.sort()
    nOrigChCodes = len(chCodes)
    iChCode = 0
    for chCode in chCodes:
        iChCode += 1
        resDict = chainsDict[chCode]
        seqIds  = resDict.keys()
        seqIds.sort()

        chemComps  = []
        ccpCodes   = []
        seqIdsGood = []

        molTypes = set()
        resNames = []
        for seqId in seqIds:
            ll = resDict[seqId].keys()
            resName   = resDict[seqId][ll[0]]['resName']
            atomNames = [tt[0] for tt in ll]
            chemComp  = getBestChemComp(project, resName, atomNames)
            resNames.append(resName)

            if chemComp:
                chemComps.append(chemComp)
                ccpCodes.append(chemComp.ccpCode)
                seqIdsGood.append(seqId)
                molTypes.add(chemComp.molType)
            
        if not seqIdsGood:
            msg  = 'Could not find any matching CCPN ChemComps for sequence: '
            msg += ' '.join(resNames)
            showWarning('Structure Creation Failed',msg)
            continue
            
        seqIds = seqIdsGood
        msChain, mapping = findMatchingChain(molSystem, ccpCodes,
                                                           excludeChains=usedChains,
                                                           doWarning=doWarnings,
                                                           molTypes=molTypes)
        
        if msChain is None:
            # no matching chain - make new one
            # use existing chain code from PDB file if possible
            sysChainCode = chCode.strip()
            if not sysChainCode or molSystem.findFirstChain(code=sysChainCode):
                sysChainCode = nextChainCode(molSystem)
            
            if molTypes == set(('DNA',)) and iChCode <= nOrigChCodes:
                 # check for special case: 
                # double stranded DNA with a single chain code
                oneLetterCodes = [cc.code1Letter for cc in chemComps]
                if None not in oneLetterCodes:
                    # All ChemComps are (variants of) Std bases. 
                    # Not sure certain, but this should mostly work.
                    nCodes = len(oneLetterCodes)
                    halfway, remainder = divmod(nCodes, 2)
                    if not remainder:
                        # even number of codes
                        resMap = { 'A': 'T', 'T': 'A', 'G': 'C', 'C': 'G' }
                        for ii in range(halfway):
                            if oneLetterCodes[ii] != resMap[oneLetterCodes[-1-ii]]: break
                        else:
                            # the second half is the reverse complement of the first half.
                            # treat as two separate DNA chains
                            # Move second half to a new chain, and remove from this chain.
                            newChCode = chCode
                            while newChCode in chCodes: newChCode=chr(ord(newChCode)+1)
                            newResDict = chainsDict[newChCode] = {}
                            for ii in range(halfway, nCodes):
                                iix = seqIds[ii]
                                newResDict[iix] = resDict[iix]
                                del resDict[iix]

                            # put both chains on end of list for later (re)processing
                            chCodes.append(chCode)
                            chCodes.append(newChCode)
                            continue

            codes = (molSystem.code, sysChainCode)
            msg  = 'Structure residue sequence (chain %s) not in molecular system. ' % chCode
            msg += 'Make new molecular system %s chain (%s) for this sequence?' % codes
            if not doWarnings or (not molSystem.chains) or (doWarnings and showOkCancel('Confirm',msg)):
                atomNames   = resDict[seqIds[0]].keys()

                molType   = chemComps[0].molType
                ccpCodes0 = []
                startNum  = resDict[seqIds[0]][atomNames[0]]['seqCode']

                project = molSystem.root
                molecule = makeMolecule(project, molType, [])

                for i in range(len(seqIds)):
                    chemComp = chemComps[i]

                    if (chemComp.molType != molType) or (i and (seqIds[i] != seqIds[i-1]+1)):
                        newMolResidues = addMolResidues(molecule, molType, ccpCodes0, startNum=startNum)

                        # set seqCodes and seqInsertCodes
                        xSeqIds = seqIds[i-len(newMolResidues):i]
                        for j,x in enumerate(xSeqIds):
                            rr = newMolResidues[j]
                            for dummy in resDict[x]:
                                # future-safe of getting a random atomDict for seqId x
                                dd = resDict[x][dummy]
                                break
                            rr.seqCode = dd['seqCode']
                            rr.seqInsertCode = dd['insertCode']

                        ccpCodes0 = [chemComp.ccpCode,]
                        molType   =  chemComp.molType
                        startNum  = seqIds[i]

                    else:
                      ccpCodes0.append(chemComp.ccpCode)

                if ccpCodes0: addMolResidues(molecule, molType, ccpCodes0, startNum=startNum)
                
                msChain = makeChain(molSystem,molecule,code=sysChainCode)
                resMapping = {}
                for i, residue in enumerate(msChain.sortedResidues()): resMapping[i] = residue

                # TBD deal with HETATMs, proper Molecule name,
                # store CCPN xml as non-standard naming system?

            else: continue

        else:
            sysChainCode = msChain.code
            resMapping = {}
            for i, residue in mapping: resMapping[i] = residue

        usedChains.append(msChain)

        atomNamesList = []
        msResidues = []
        for j, seqId in enumerate(seqIds):
            atomNames = [tt[0] for tt in resDict[seqId].keys()]
            atomNamesList.append(atomNames)
            msResidues.append(resMapping.get(j))

        namingSystem = getBestNamingSystem(msResidues, atomNamesList)
    
        coordChain = structure.newChain(code=sysChainCode)
        structure.override = True

        for j, seqId in enumerate(seqIds):
            msResidue = msResidues[j]

            if not msResidue: # Structure is bigger
                continue

            resName = ccpCodes[j]
            ccpCode = msResidue.ccpCode

            if doWarnings:
                if resName != ccpCode:
                    msg  = 'Residue names [%s,%s] don\'t match\nin'
                    msg += ' loaded molecular system\nchain %s position %d'
                    data = (resName,ccpCode,chCode,seqId)
                    showWarning('Warning', msg % data)
                    continue

                if msResidue is None:
                    msg  = 'No equivalent molecular system residue'
                    msg += '\nfor PDB chain %s residue %d'
                    showWarning('Warning', msg % (chCode,seqId))
                    continue

            coordResidue = coordChain.newResidue(seqCode=msResidue.seqCode,
                                                 seqInsertCode=msResidue.seqInsertCode,
                                                 seqId=msResidue.seqId)

            atomCheckDict = {}
            systemAtoms = set()
            for atomKey in resDict[seqId].keys():

                atomName, altLoc = atomKey

                key = '%s:%s:%s' % (atomName, msResidue, namingSystem)
                systemAtom = msAtomDict.get(key)

                if not systemAtom:
                    systemAtom = findMatchingMolSystemAtom(atomName,
                                                           msResidue,
                                                           namingSystem,
                                                           systemAtoms,)
                    msAtomDict[key] = systemAtom

                    if (systemAtom is None) or atomCheckDict.get((systemAtom, altLoc)):
                        failedAtoms.append('%s%d.%s' % (chCode,seqId,atomName) )
                        continue

                    systemAtoms.add(systemAtom)
                    atomCheckDict[(systemAtom, altLoc)] = True
                    coordAtom = coordResidue.findFirstAtom(name=systemAtom.name)
                    if coordAtom is None:
                        # we must have altLocation alternatives here
                        coordAtom = coordResidue.newAtom(name=systemAtom.name)

                    for m, model in models:
                        coordDict = strucDict[m][chCode][seqId][atomKey]
                        occupancy = coordDict.get('occupancy')
                        bFactor   = coordDict.get('bFactor')

                        x = coordDict['x']
                        y = coordDict['y']
                        z = coordDict['z']
                        c = coordAtom.newCoord(x=x, y=y, z=z, altLocationCode=altLoc, model=model)

                        if occupancy is not None: c.setOccupancy(occupancy)
                        if bFactor is not None: c.setBFactor(bFactor)


        # final validity check
        try:
            structure.checkAllValid()
        except:
            structure.delete()
            return

        # reset switches
        structure.override = False

    if failedAtoms and doWarnings:
        msg = 'No equivalent molecular system atoms for PDB atoms: %s'
        showWarning('Warning', msg % ( ' '.join(failedAtoms) ))

    if not structure.coordChains:
        structure.delete()
        structure = None

    return structure

def getBestChemComp(project, resName, atomNames):
    """Descrn: Find the best matching ccpCode (often 3 letters) for the input
               residue name that has the input atomNames 
       Inputs: Implementation.Project, Word (imported residue name),
               List of Words (imported atom names)
       Output: Word (Molecule.MolResidue.ccpCodes)
    """

    chemComp = None
    molType  = getBestMolType(atomNames)

    if molType in ('DNA','RNA','DNA/RNA'):
        if len(resName) == 1:
            if 'PD' in atomNames: resName = resName + '11'

    if molType == 'protein':
        resName = resName[0] + resName[1:].lower()

    ccpCodeDict = {}
    chemComps = getStdChemComps(project, molTypes=[molType,])

    for chemComp0 in chemComps:
        ccpCodeDict[chemComp0.ccpCode] = chemComp0
        ccpCodeDict[chemComp0.code3Letter] = chemComp0

    chemComp = ccpCodeDict.get(resName)

    if chemComp is None:
        for chemCompTest in chemComps:
            for namingSystem in chemCompTest.namingSystems:
                for sysName in namingSystem.chemCompSysNames:
                    if sysName.sysName == resName:
                        ccpCodeDict[resName] = chemCompTest
                        chemComp = chemCompTest
                        break
            else:
                continue
            break 

        if not chemComp: chemComp = getChemComp(project, molType, resName)

        if not chemComp and molType != 'other': chemComp = getChemComp(project, 'other', resName)

    return chemComp

def getBestMolType(atomNames, ccpCodes=None):
    """Descrn: Determine the best molecule type (protein, DNA, RNA, carohydrate or
               nonpolymer) given the input atom names and residue ccpCodes
       Inputs: List of Words (imported atom names),
               List of Words (Molecule.MolResidue.ccpCodes)
       Output: Word (Molecule.Molecule.molType)
    """

    molType = 'other'

    if ("C3'" in atomNames) and ("C5'" in atomNames) and ("C2" in atomNames):
        molType = 'DNA'
        if "O2'" in atomNames: molType = 'RNA'

    elif ("C3*" in atomNames) and ("C5*" in atomNames) and ("C2" in atomNames):
        # PDB Naming system different from others
        molType = 'DNA'
        if "O2*" in atomNames: molType = 'RNA'

    elif 'CA' in atomNames: molType = 'protein'

    elif ("C1" in atomNames) and ("C2" in atomNames) and ("C3" in atomNames) and ("C4" in atomNames) and ( ("O2" in atomNames) or ("O3" in atomNames) or ("04" in atomNames)):
        molType = 'carbohydrate'

    return molType

'''
# Replaced with fixed import from StructureBasic
def getBestNamingSystem(residues, atomNamesList):
    """Descrn: Determine the best naming system for a list of list of atom names
               which correspond to the input residues.
       Inputs: List of MolSystem.Residues, List of List of Words (imported atom names)
       Output: Word (ChemAtomSysName.namingSystem)
    """

    dict = {}
    done = {}
    for i, residue in enumerate(residues):
        if not residue: # Could be mismatched position from seq alignment
            continue

        atomNames = atomNamesList[i]
        chemComp  = residue.molResidue.chemComp

        if done.get(chemComp): continue

        done[chemComp] = True

        for namingSystem in chemComp.namingSystems:
            for atomSysName in namingSystem.atomSysNames:
                for atomName in atomNames:
                    if atomName == atomSysName.sysName:    dict[namingSystem.name] = dict.get(namingSystem.name, 0) + 10
                    elif atomName in atomSysName.altSysNames: dict[namingSystem.name] = dict.get(namingSystem.name, 0) + 5

    bestSc = 0
    bestNs = 'PDB'
    for ns in dict.keys():
        if dict[ns] > bestSc:
            bestSc = dict[ns]
            bestNs = ns

    return bestNs
'''

'''
# Replaced with fixed import from StructureBasic
def findMatchingMolSystemAtom(atomName, residue, namingSystem):
    """Descrn: Find the best matching CCPN atom name in a residue for the input
               atom name in the input naming system Will try other naming systems
               if the input one doesn't work
       Inputs: Word (imported atom name), MolSystem.Residue,
               Word (ChemComp.NamingSystem.name)
       Output: Word (MolSystem.Atom.name)
    """
    nulciec = ('DNA','RNA','DNA/RNA')
    weirdos = {'O1P':'OP1','O2P':'OP2','C5A':'C7'}
    if weirdos.get(atomName) and residue.molResidue.molType in nulciec: atomName = weirdos[atomName]

    atom = None
    chemComp = residue.chemCompVar.chemComp
    namingSystem0 = chemComp.findFirstNamingSystem(name=namingSystem)
    atomSysNames = []

    if namingSystem0:
        # find plain mapping
        atomSysName = namingSystem0.findFirstAtomSysName(sysName=atomName)
        if atomSysName:
            atom = residue.findFirstAtom(name=atomSysName.atomName)
            if atom: return atom

        # otherwise try alternative sys names
        for atomSysName in namingSystem0.atomSysNames:
            if atomName in atomSysName.altSysNames: atomSysNames.append(atomSysName)

    # otherwise try any naming system plain mapping 
    if not atomSysNames:
        for namingSystem0 in chemComp.namingSystems:
            for atomSysName in namingSystem0.atomSysNames: atomSysNames.append(atomSysName)

    for atomSysName in atomSysNames:
        if atomSysName.sysName == atomName:
            atom = residue.findFirstAtom(name=atomSysName.atomName)
            if atom: return atom

    # last resort: try any naming system alternative sys names
    for atomSysName in atomSysNames:
        if atomName in atomSysName.altSysNames:
            atom = residue.findFirstAtom(name=atomSysName.atomName)
            if atom: return atom    

    return atom        
'''

class evalWcPairing(object):

    """Description: Class to evaluate Watson-Crick hydrogen bonding between
                    bases in regular RNA or DNA structures. WC-pairs are
                    evaluated between possible hydrogen bond donors and 
                    acceptors in bases. The nucleotide type does not matter.
                    An heavy atom upper distance limit of 3.0 A by default is
                    used as cutoff.
       Input      : ccpn.molecule.MolSystem instance
       Output      : List of tuples with ccpn.molecule.MolStructure.Residue
                    instances involved in WC pairing
       Arguments  : Hydrogen bond upper distance limit. 3.0 A by default
    """
    def __init__(self,partner=None,hbond=3.0):

        self.partner = partner.structureEnsemble
        self.allowed = [('O6','N4'),
                        ('N4','O6'),
                        ('N1','N3'),
                        ('N3','N1'),
                        ('N2','O2'),
                        ('O2','N2'),
                        ('O4','N6'),
                        ('N6','O4')]
        self.hbond = hbond**2    
        self.pairs = []                

        if partner.isDna:
            self.__makeAtomSelection__()
            self.__createDistanceMatrix__()
            self.__resolveWcPairs__()

    def __makeAtomSelection__(self):

        """For every residue in the system select the possible base hydrogen 
           bond donors and acceptors
        """
        self.atoms = []
        allowedAtoms = [n[0] for n in self.allowed]

        hChains = self.partner.sortedCoordChains()
        for hChain in hChains:
            for hResidue in hChain.sortedResidues():
                self.atoms += ([a for a in hResidue.sortedAtoms() if a.name in allowedAtoms])

    def __createDistanceMatrix__(self):

        """Calculate the distance between all possible base hydrogen bond donors and acceptors. Checks for:
           no distance calculation within same objects and between same base types"""

        self.matrix = []

        for atm1 in self.atoms:
            for atm2 in self.atoms:
                if not atm1.residue.residue.ccpCode == atm2.residue.residue.ccpCode and (atm1.name,atm2.name) in self.allowed:
                    dist = self.__evalAtomAtomDistance__(atm1.findFirstCoord(),atm2.findFirstCoord())
                    self.matrix.append((atm1,atm2,dist))

    def __evalAtomAtomDistance__(self,atm1,atm2):

        """Calculate the distance between atom1 and atom2. For speed we do not take the square root to optain
           the final distance.
        """
        cor1 = (atm1.x,atm1.y,atm1.z); cor2 = (atm2.x,atm2.y,atm2.z)

        d= float(0)
        if cor1 == None or cor2 == None: return None
        else:
            for i in range(len(cor1)): d=d+(cor1[i]-cor2[i])**2
            return d

    def __resolveWcPairs__(self):

        """Compose the final list of base-pairs based on the WC pairing profile."""

        for dist in self.matrix:
            if dist[2] <= self.hbond:
                pair1 = (dist[0].residue,dist[1].residue)
                pair2 = (dist[1].residue,dist[0].residue)
                if not pair1 in self.pairs and not pair2 in self.pairs:
                    self.pairs.append(pair1)
