# generate a new motif length = SZ # howmany = N; average match = f import random nt = 'ACGT'
# given a master motif s and # a list of frequencies of conservation # generate a variant def mutateMotif(m,freqL): retL = list() for c,f in zip(m,freqL): if random.random() > f: retL.append(random.choice(nt)) else: retL.append(c) return ''.join(retL)
def getMotifList(N,M,SZ): L = list() # make a new random master motif s for i in range(M): L.append(random.choice(nt)) m = ''.join(L) # positions are conserved at these freqs freqL = [0.9,0.8,0.7,0.6,0.5] freqL *= M/len(freqL) + 1 # do mutagenesis L = list() for i in range(N): L.append(mutateMotif(m,freqL)) return m,L
def embedMotif(m,M,SZ): # embed motif in random sequence of len SZ L = [random.choice(nt) for i in range(SZ)] i = random.choice(range(SZ-M)) L[i:i+M] = list(m) return i,''.join(L) def show(m,seq): i = seq.find(m) j = i+len(m) print i print seq[:i] + '\n' print seq[i:j] + '\n' print seq[j:] + '\n' # we generate a list of motif, seq pairs # each motif derived from a master motif
def getTestSet(D=None): if not D: D = {'N':10,'M':10,'SZ':50 } N=D['N'] # number of seqs M=D['M'] # length of motif SZ=D['SZ'] # length of seqs s,L = getMotifList(N,M,SZ) retL = list() for m in L: i,seq = embedMotif(m,M,SZ) retL.append((i,m,seq)) return retL
if __name__ == '__main__': #random.seed(157) L = getTestSet() for i,m,seq in L: #show(motif,seq) print str(i).rjust(3),m,seq print i,m,seq = L[0] show(m,seq) |