from Bio import GenBank from Foundation import * from string import maketrans, upper
FH = open('sequences.gb', 'r') parser = GenBank.FeatureParser() it = GenBank.Iterator(FH, parser) results = list()
def processGene(f): D = dict() D['start'] = int(f.location._start.position) D['end'] = int(f.location._end.position) D['strand'] = int(f.strand) D['bid'] = f.qualifiers['locus_tag'][0] D['name'] = f.qualifiers['gene'][0] return D
record = it.next() L = record.features L = [f for f in L if f.type == 'CDS'] L = [processGene(f) for f in L] sequence = record.seq.tostring() print len(L)
def reversesequence(s): t = maketrans('agctAGCT','tcgaTCGA') s2 = s.translate(t) L = list(s2) L.reverse() return ''.join(L)
def getsequence(D): global sequence seq = sequence[D['start']:D['end']] if D['strand'] == -1: seq = reversesequence(seq) D['sequence'] = seq return D L = [getsequence(D) for D in L]
filename = NSHomeDirectory() filename += '/Desktop/ECgenes.plist' ma = NSArray.arrayWithArray_(L) ma.writeToFile_atomically_(filename, True) |