import sys from cogent import LoadTree
s = ''' ((Stenotrophomonas_maltophilia:0.07574, Kingella_oralis:0.08026) :0.00827, Pseudomonas_aeruginosa:0.05950, ((Salmonella_typhi:0.01297, Escherichia_coli:0.01491) :0.03356, Haemophilus_parainfluenzae:0.06113) :0.03863);'''
def load_data(fn): FH = open(fn,'r') data = FH.read().strip() FH.close() return data
# for flexibility, we can this function with # a tree_string # a filename # or a PyCogent tree def make_tree_dict(ts=None,fn=None,tr=None,debug=False): if tr: pass elif fn: tr = LoadTree(filename=fn) else: tr = LoadTree(treestring=ts) nodes = tr.getNodesDict() # and their names all_node_names = tr.getNodeNames() # PyCogent organizes the external node names # in the correct order, so the rest is easy
# rearrange the data my way D = dict() D['meta'] = { 'all_node_names':all_node_names } D['meta']['tree'] = tr e_node_names = list() i_node_names = list() for name in all_node_names: node = nodes[name] external = node.isTip() if external: e_node_names.append(name) else: i_node_names.append(name) # in order up the tree aL = [a.Name for a in node.ancestors()] dist_to_root = node.distance(nodes['root']) nD = {'name':name, 'node':node, 'is_external':external, 'is_root': name == 'root', 'x':dist_to_root, 'ancestors':aL } if not nD['is_root']: dist = node.distance(nodes[aL[0]]) nD['dist_to_parent'] = dist else: nD['dist_to_parent'] = 0 D[name] = nD D['meta']['e_node_names'] = e_node_names D['meta']['i_node_names'] = i_node_names D = compute_y_pos(D,debug=debug) if debug: show(D) return D def compute_y_pos(D,debug=False): all_node_names = D['meta']['all_node_names'] e_node_names = D['meta']['e_node_names'] i_node_names = D['meta']['i_node_names']
# actual node dictionaries e_node_dicts = [D[n] for n in e_node_names] i_node_dicts = [D[n] for n in i_node_names]
# just make a square plot xmax = max([nD['x'] for nD in e_node_dicts]) ymax = xmax D['meta']['max_xy'] = xmax, ymax
# compute y for external nodes N = 1.0*(len(e_node_names)-1) for nD in e_node_dicts: i = e_node_names.index(nD['name']) f = i/N nD['y'] = f*ymax
def mean(L): return sum(L)*1.0/len(L)
def get_child_node_names(parent): if debug: print parent cL = list() for name in all_node_names: if name == 'root': continue nD = D[name] if parent == nD['ancestors'][0]: cL.append(name) if debug: print cL return cL
# compute y for internal nodes # must fill these from the bottom up L = i_node_names[:] #L.sort() # sorts 0,1,2 etc. then root def f(name): # sort by x-value ! nD = D[name] return nD['x'] L = sorted(L,key=f,reverse=True) if debug: print 'finding children' for name in L: nD = D[name] cL = get_child_node_names(parent=name) nD['immediate_children'] = cL yL = [D[child]['y'] for child in cL] y0 = min(yL) y1 = max(yL) if len(yL) == 3: y = sorted(yL)[1] else: y = mean([y0,y1]) nD = D[name] nD['y'] = y nD['y_top'] = y1 nD['y_bott'] = y0 if debug: print # sanity check for ancestor lists for name in all_node_names: if name == 'root': continue assert D[name]['ancestors'][-1] == 'root' return D def show(D): e_node_names = D['meta']['e_node_names'] i_node_names = D['meta']['i_node_names'] for name in e_node_names + i_node_names: print name nD = D[name] for k in nD: if k == 'node' and not nD['is_external']: continue print k, nD[k] print
if __name__ == '__main__': tree_string = None if len(sys.argv) > 1: fn = sys.argv[1] try: tree_string = load_data(fn) except IOError: print 'could not open file:', fn sys.exit() if not tree_string: print 'making default tree', '\n' tree_string = s.strip() D = make_tree_dict(ts=tree_string,debug=True) |