import xml.etree.ElementTree as ET
def parseBLASTIteration(iteration, howmany=3): hitL = list() for hit in iteration.findall('Iteration_hits/Hit')[:howmany]: hitD = dict() for k in ['Hit_id','Hit_def','Hit_accession']: hitD[k] = hit.findtext(k) hitD['hsps'] = list() for hsp in hit.findall('Hit_hsps'): hspD = dict() hspD['score'] = hsp.findtext('Hsp/Hsp_score') hspD['evalue'] = hsp.findtext('Hsp/Hsp_evalue') hspD['identity'] = hsp.findtext('Hsp/Hsp_identity') hspD['gaps'] = hsp.findtext('Hsp/Hsp_gaps') hspD['length'] = hsp.findtext('Hsp/Hsp_align-len') hspD['query'] = hsp.findtext('Hsp/Hsp_qseq') hspD['midline'] = hsp.findtext('Hsp/Hsp_midline') hspD['hitseq'] = hsp.findtext('Hsp/Hsp_hseq') identity = int(hspD['identity']) length = int(hspD['length']) try: hspD['%identity'] = identity*100.0/length except ZeroDivisionError: hspD['%identity'] = 'error' hitD['hsps'].append(hspD) hitL.append(hitD) return hitL
def parseSingleIteration(tree,howmany=3): iteration = tree.find('BlastOutput_iterations/Iteration') hitL = parseBLASTIteration(iteration,howmany) return hitL #---------------------------------------------------------
def showHitList(hitL,withaccession=True): for j,hitD in enumerate(hitL): print 'hit #', j+1 for k in ['Hit_id','Hit_def','Hit_accession']: print k, hitD[k] if withaccession: print hitD['Hit_accession'].ljust(10), hspL = hitD['hsps'] for hspD in hspL[:1]: print hspD['identity'] + '/' + hspD['length'], print ('%3.2f' % hspD['%identity']).rjust(7) printHspAlignment(hspD) print def printHspAlignment(hspD): line_length = 60 query = hspD['query'] midline = hspD['midline'] hitseq = hspD['hitseq'] for i in range(0,len(query),line_length): print query[i:i+line_length] print midline[i:i+line_length] print hitseq[i:i+line_length]
fn = 'blast_test.xml' tree = ET.parse(fn) hitL = parseSingleIteration(tree) showHitList(hitL[:3]) |