This is probably a fairly basic question, so I apologize in advance, but I can't seem to figure out how to output xml format using Biopython. Basically, I have a fairly large BLAST results file in xml format and I'm trying to extract a portion of that file using a list of specific queries I am interest in. I can find the queries in the larger file, but I can't seem to output them into xml format. Here is the script I am currently using:
#!/usr/bin/env python
import sys
import os
import sets
import Bio
from sets import Set
from Bio.Blast import NCBIXML
# Usage.
if len(sys.argv) < 2:
print ""
print "This program extracts blast results from an xml file given a list of query sequences"
print "Usage: %s -list file1 -xml file2 -out file3"
print "-list: list of sequence names"
print "-xml: fasta file"
print "-out: outfile name"
print ""
sys.exit()
# Parse args.
for i in range(len(sys.argv)):
if sys.argv[i] == "-list":
infile1 = sys.argv[i+1]
elif sys.argv[i] == "-xml":
infile2 = sys.argv[i+1]
elif sys.argv[i] == "-out":
outfile = sys.argv[i+1]
fls = [infile1,infile2,outfile]
results_handle = open(fls[1], "r")
fin1 = open(fls[0],"r")
save_file = open(fls[2], "w")
geneContigs = Set([])
results_list = list()
blast_records = NCBIXML.parse(results_handle)
for line in fin1:
temp=line.lstrip('>').split()
geneContigs.add(temp[0])
f ...