---+ Python snippets for biology Requires BioPython to be installed ---++ Open common formats: .gbk / .gbf / .gb %CODE{"python"}% import SeqIO with open('/my/path/file.gb','r') as file_handle: record_dict = SeqIO.to_dict(SeqIO.parse(file_handle, 'gb')) gbkFile = record_dict[list(record_dict.keys())[0]] # the above is if there is only 1 record in the GBK file # if multiple files are in the record, such as a genome&plas, # this will only extract the first record %ENDCODE% .fa / .fasta / .fna %CODE{"python"}% import SeqIO fastaList = list(SeqIO.parse("path/file.fasta", "fasta")) %ENDCODE% ---++ Write common formats: .gbk / .gbf / .gb %CODE{"python"}% import SeqIO SeqIO.write(seqRecordObj_or_list,'/my/path/file.gbk', 'gb') %ENDCODE% .fa / .fasta / .fna %CODE{"python"}% import SeqIO SeqIO.write(seqRecordObj_or_list,'/my/path/file.fasta', 'fasta') %ENDCODE% ---++ local BLASTing from Python Also requires NCBI BLAST command line software, a local BLAST database, and Pandas %CODE{"python"}% from Bio.Seq import Seq from Bio import SeqIO from Bio.SeqRecord import SeqRecord import subprocess from tempfile import NamedTemporaryFile import pandas as pd def BLAST(seq, db = 'nr_db', type = "p"): # 'seq' is a sequence (as a str) of a protein or nucleotide sequence # 'db' points to location of local BLAST database # 'type' specifies the type of BLAST (e.g. 'n', 'p', 'x', etc) query = NamedTemporaryFile() tmp = NamedTemporaryFile() SeqIO.write(SeqRecord(Seq(seq), id="temp"), query.name, "fasta") flags = 'qstart qend sseqid sframe pident slen sseq length sstart send qlen' # 'flags' specifies the specific outputs for 'output format 6' in the BLAST CL software extras = "-max_target_seqs 20000 -culling_limit 10 -perc_identity 75" # 'extras' are further flags that can be called on the CL subprocess.call( #the actual CL BLAST (f'blast{type} -query {query.name} -out {tmp.name} ' f'-db {db} {extras} -word_size {str(wordsize)} -outfmt "6 {flags}"'), shell=True) with open(tmp.name, "r") as file_handle: #opens BLAST file align = file_handle.readlines() tmp.close() query.close() df = pd.DataFrame([ele.split() for ele in align], columns = flags.split()) df = df.apply(pd.to_numeric, errors='ignore') # puts the output of BLAST into a tidy Pandas dataframe return df %ENDCODE%
Edit
|
Attach
|
Watch
|
P
rint version
|
H
istory
:
r3
<
r2
<
r1
|
B
acklinks
|
V
iew topic
|
More topic actions...
Barrick Lab
>
ComputationList
>
ProtocolsPythonSnippets
Contributors to this topic
MattMcGuffie, JeffreyBarrick
Topic revision: r2 - 2020-12-03 - 21:51:05 - Main.JeffreyBarrick
Barrick Lab
Contact
Research
Publications
Team
Protocols
Reference
Software
UT Austin
Mol Biosciences
ILS
Microbiology
EEB
CSSB
CBRS
The LTEE
iGEM team
SynBioCyc
SynBio course
NGS course
BEACON
Search
Log in
Copyright ©2025 Barrick Lab contributing authors. Ideas, requests, problems?
Send feedback