import math # list printing def printL(L,name=None): if name: print name pL = [str(round(n,3)).rjust(8) for n in L] N = 6 while pL: line = pL[:N] pL = pL[N:] print ''.join(line)
# to start with, we'll assume equal branch lengths # construct the transition-probability matrix nt = 'ACGT' pur = 'AG' pyr = 'CT' f_same = math.log(0.95) f_transition = math.log(0.03) f_transversion = math.log(0.01) pi = math.log(0.25)
def get_f(n): m,n = list(n) if m == n: return f_same if m in pur and n in pur: return f_transition if m in pyr and n in pyr: return f_transition return f_transversion
k = [m+n for m in nt for n in nt] v = [get_f(n) for n in k] P = dict(zip(k,v))
#--------------------------------------- # three different functions # depending on types of child nodes def ext_ext(u,v): # u,v are nucleotide as char # for each possible n in their parent node # returns an array of log(p) for nu and nv rL = [P[n+u] + P[n+v] for n in nt] return rL
# L = list of likelihoods in order, for internal child # v = external child nucleotide as char def int_ext(L,v): rL = list() # for new node in {ACGT} for i,m in enumerate(nt): ep = P[m+v] # log(p) of m -> v sL = list() # each state for internal child # n is a float for j,n in enumerate(L): u = nt[j] # would multiply probs, so add logs p = P[m+u] # log(p) for next branch p += L[j] # log likelihood if that child = u p += ep # log(p) for external child sL.append(p) # will need the actual probs to add them # does this need to be done better? how? sL = [(math.e)**p for p in sL] logS = math.log(sum(sL)) rL.append(logS) return rL
# both children are internal def int_int(L1,L2,root=False): rL = list() # for new node = {ACGT} for i,n in enumerate(nt): sL = list() # each state for left child # v1 is a float for j,f1 in enumerate(L1): u = nt[j] # each state for right child # v2 is a float for k,f2 in enumerate(L2): v = nt[k] p = P[n+u] # log(p) for left branch p += f1 # log likelihood if child = u p += P[n+v] # log(p) for right branch p += f2 # log likelihood if child = v sL.append(p) # will need the actual probs to add them sL = [(math.e)**p for p in sL] logS = math.log(sum(sL)) rL.append(math.log(sum(sL))) if root: # do pi calculation rL = [e + pi for e in rL] return rL
if __name__ == '__main__': #for k in sorted(P.keys()): print k, P[k] #for u in nt: #for v in nt: #ext_ext(u,v) #print '-'*40 L = [-0.1] * 4 int_ext(L,'C') print '-'*40 int_int(L,L) |