Quick script to download Uniprot info for proteins from Pfam trees (Newick tree format)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | from ete3 import Tree import sys import os import urllib.request t = Tree(sys.argv[1]) for l in t: id = l.name.split("/")[0] print(f"Accession: {id}") if os.path.exists(f"{id}.txt") == False: print("Downloading...") page = urllib.request.urlopen(f"https://www.uniprot.org/uniprot/{id}.txt") out_file = open(f"{id}.txt","wb") out_file.write(page.read()) out_file.close() |