Quick script to download Uniprot info for proteins from Pfam trees (Newick tree format)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from ete3 import Tree
import sys
import os
import urllib.request
 
t = Tree(sys.argv[1])
 
for l in t:
        id = l.name.split("/")[0]
        print(f"Accession: {id}")
        if os.path.exists(f"{id}.txt") == False:
                print("Downloading...")
                page = urllib.request.urlopen(f"https://www.uniprot.org/uniprot/{id}.txt")
 
                out_file = open(f"{id}.txt","wb")
                out_file.write(page.read())
                out_file.close()

Leave a Reply