---+ Python snippets for biology Requires BioPython to be installed ---++ Open common formats: .gbk / .gbf / .gb %CODE{"python"}% import SeqIO with open('/my/path/file.gb','r') as file_handle: record_dict = SeqIO.to_dict(SeqIO.parse(file_handle, 'gb')) gbkFile = record_dict[list(record_dict.keys())[0]] # the above is if there is only 1 record in the GBK file # if multiple files are in the record, such as a genome&plas, # this will only extract the first record %ENDCODE% .fa / .fasta / .fna %CODE{"python"}% import SeqIO fastaList = list(SeqIO.parse("path/file.fasta", "fasta")) %ENDCODE% ---++ Write common formats: .gbk / .gbf / .gb %CODE{"python"}% import SeqIO SeqIO.write(seqRecordObj_or_list,'/my/path/file.gbk', 'gb') %ENDCODE% .fa / .fasta / .fna %CODE{"python"}% import SeqIO SeqIO.write(seqRecordObj_or_list,'/my/path/file.fasta', 'fasta') %ENDCODE% ---++ local BLASTing from Python Also requires NCBI BLAST command line software, a local BLAST database, and Pandas %CODE{"python"}% from Bio.Seq import Seq from Bio import SeqIO from Bio.SeqRecord import SeqRecord import subprocess from tempfile import NamedTemporaryFile import pandas as pd def BLAST(seq, db = 'path/to/db', Type = "blastn"): # 'seq' is a sequence (as a str) of a protein or nucleotide sequence # 'db' points to location of local BLAST database # 'type' specifies the type of BLAST (e.g. 'n', 'p', 'x', etc) query = NamedTemporaryFile() tmp = NamedTemporaryFile() SeqIO.write(SeqRecord(Seq(seq), id="temp"), query.name, "fasta") flags = 'qstart qend sseqid sframe pident slen sseq length sstart send qlen' # 'flags' specifies the specific outputs for 'output format 6' in the BLAST CL software extras = "-max_target_seqs 20000 -culling_limit 10 -perc_identity 75" # 'extras' are further flags that can be called on the CL subprocess.call( #the actual CL BLAST (f'{Type} -query {query.name} -out {tmp.name} ' f'-db {db} {extras} -outfmt "6 {flags}"'), shell=True) with open(tmp.name, "r") as file_handle: #opens BLAST file align = file_handle.readlines() tmp.close() query.close() df = pd.DataFrame([ele.split() for ele in align], columns = flags.split()) df = df.apply(pd.to_numeric, errors='ignore') # puts the output of BLAST into a tidy Pandas dataframe return df %ENDCODE%
E
dit
|
A
ttach
|
Watch
|
P
rint version
|
H
istory
: r3
<
r2
<
r1
|
B
acklinks
|
V
iew topic
|
M
ore topic actions
Barrick Lab
>
ComputationList
>
ProtocolsPythonSnippets
Topic revision: r3 - 2021-11-30 - 20:40:55 - Main.MattMcGuffie
Barrick Lab
Homepage
Contact Information
Michigan State University
Department of Microbiology, Genetics, & Immunology
Department of Entomology
Ecology, Evolution, and Behavior Grad Program
UT Austin iGEM team
Team
Research
Overview
Publications
Software
Protocols
Protocol List
Reference Information
The LTEE
Copyright ©2025 Barrick Lab contributing authors. Ideas, requests, problems?
Send feedback