Question
I have python coding question for bioinformatics : Please help! Currently my program does following pythonalign.py--seq1seq1--seq2seq2--typelocal--matrixblosum62--gap-2 takes seq1, seq2 file, choose local/global type 'alignment', choose
I have python coding question for bioinformatics: Please help!
Currently my program does following "pythonalign.py--seq1seq1--seq2seq2--typelocal--matrixblosum62--gap-2" takes seq1, seq2 file, choose local/global type 'alignment', choose scoring matrix and gets result as gap2
and I wanted to change the file format to "python align.py--seq1sequence_file.fa--databasedb.fa--typeglobal--matrixblosum62--gap_penaly-2--outoutput.txt" which takes sequence file1 and query it against "database" db.fa and have output file as output.txt
The program should keep track of the runtime and display it in the output file in following format:
score = alignment score, query = protein sequence pair, sequence header = starting point
> score | query | sequence header
---------------------------------------------------------------------------------------------------------------------------------------
importargparse
fromcollectionsimportdefaultdict
importtime
blosum62={
("W","F"):1,("L","R"):-2,("S","P"):-1,("V","T"):0,
("Q","Q"):5,("N","A"):-2,("Z","Y"):-2,("W","R"):-3,
("Q","A"):-1,("S","D"):0,("H","H"):8,("S","H"):-1,
("H","D"):-1,("L","N"):-3,("W","A"):-3,("Y","M"):-1,
("G","R"):-2,("Y","I"):-1,("Y","E"):-2,("B","Y"):-3,
("Y","A"):-2,("V","D"):-3,("B","S"):0,("Y","Y"):7,
("G","N"):0,("E","C"):-4,("Y","Q"):-1,("Z","Z"):4,
("V","A"):0,("C","C"):9,("M","R"):-1,("V","E"):-2,
("T","N"):0,("P","P"):7,("V","I"):3,("V","S"):-2,
("Z","P"):-1,("V","M"):1,("T","F"):-2,("V","Q"):-2,
("K","K"):5,("P","D"):-1,("I","H"):-3,("I","D"):-3,
("T","R"):-1,("P","L"):-3,("K","G"):-2,("M","N"):-2,
("P","H"):-2,("F","Q"):-3,("Z","G"):-2,("X","L"):-1,
("T","M"):-1,("Z","C"):-3,("X","H"):-1,("D","R"):-2,
("B","W"):-4,("X","D"):-1,("Z","K"):1,("F","A"):-2,
("Z","W"):-3,("F","E"):-3,("D","N"):1,("B","K"):0,
("X","X"):-1,("F","I"):0,("B","G"):-1,("X","T"):0,
("F","M"):0,("B","C"):-3,("Z","I"):-3,("Z","V"):-2,
("S","S"):4,("L","Q"):-2,("W","E"):-3,("Q","R"):1,
("N","N"):6,("W","M"):-1,("Q","C"):-3,("W","I"):-3,
("S","C"):-1,("L","A"):-1,("S","G"):0,("L","E"):-3,
("W","Q"):-2,("H","G"):-2,("S","K"):0,("Q","N"):0,
("N","R"):0,("H","C"):-3,("Y","N"):-2,("G","Q"):-2,
("Y","F"):3,("C","A"):0,("V","L"):1,("G","E"):-2,
("G","A"):0,("K","R"):2,("E","D"):2,("Y","R"):-2,
("M","Q"):0,("T","I"):-1,("C","D"):-3,("V","F"):-1,
("T","A"):0,("T","P"):-1,("B","P"):-2,("T","E"):-1,
("V","N"):-3,("P","G"):-2,("M","A"):-1,("K","H"):-1,
("V","R"):-3,("P","C"):-3,("M","E"):-2,("K","L"):-2,
("V","V"):4,("M","I"):1,("T","Q"):-1,("I","G"):-4,
("P","K"):-1,("M","M"):5,("K","D"):-1,("I","C"):-1,
("Z","D"):1,("F","R"):-3,("X","K"):-1,("Q","D"):0,
("X","G"):-1,("Z","L"):-3,("X","C"):-2,("Z","H"):0,
("B","L"):-4,("B","H"):0,("F","F"):6,("X","W"):-2,
("B","D"):4,("D","A"):-2,("S","L"):-2,("X","S"):0,
("F","N"):-3,("S","R"):-1,("W","D"):-4,("V","Y"):-1,
("W","L"):-2,("H","R"):0,("W","H"):-2,("H","N"):1,
("W","T"):-2,("T","T"):5,("S","F"):-2,("W","P"):-4,
("L","D"):-4,("B","I"):-3,("L","H"):-3,("S","N"):1,
("B","T"):-1,("L","L"):4,("Y","K"):-2,("E","Q"):2,
("Y","G"):-3,("Z","S"):0,("Y","C"):-2,("G","D"):-1,
("B","V"):-3,("E","A"):-1,("Y","W"):2,("E","E"):5,
("Y","S"):-2,("C","N"):-3,("V","C"):-1,("T","H"):-2,
("P","R"):-2,("V","G"):-3,("T","L"):-1,("V","K"):-2,
("K","Q"):1,("R","A"):-1,("I","R"):-3,("T","D"):-1,
("P","F"):-4,("I","N"):-3,("K","I"):-3,("M","D"):-3,
("V","W"):-3,("W","W"):11,("M","H"):-2,("P","N"):-2,
("K","A"):-1,("M","L"):2,("K","E"):1,("Z","E"):4,
("X","N"):-1,("Z","A"):-1,("Z","M"):-1,("X","F"):-1,
("K","C"):-3,("B","Q"):0,("X","B"):-1,("B","M"):-3,
("F","C"):-2,("Z","Q"):3,("X","Z"):-1,("F","G"):-3,
("B","E"):1,("X","V"):-1,("F","K"):-3,("B","A"):-2,
("X","R"):-1,("D","D"):6,("W","G"):-2,("Z","F"):-3,
("S","Q"):0,("W","C"):-2,("W","K"):-3,("H","Q"):0,
("L","C"):-1,("W","N"):-4,("S","A"):1,("L","G"):-4,
("W","S"):-3,("S","E"):0,("H","E"):0,("S","I"):-2,
("H","A"):-2,("S","M"):-1,("Y","L"):-1,("Y","H"):2,
("Y","D"):-3,("E","R"):0,("X","P"):-2,("G","G"):6,
("G","C"):-3,("E","N"):0,("Y","T"):-2,("Y","P"):-3,
("T","K"):-1,("A","A"):4,("P","Q"):-1,("T","C"):-1,
("V","H"):-3,("T","G"):-2,("I","Q"):-3,("Z","T"):-1,
("C","R"):-3,("V","P"):-2,("P","E"):-1,("M","C"):-1,
("K","N"):0,("I","I"):4,("P","A"):-1,("M","G"):-3,
("T","S"):1,("I","E"):-3,("P","M"):-2,("M","K"):-1,
("I","A"):-1,("P","I"):-3,("R","R"):5,("X","M"):-1,
("L","I"):2,("X","I"):-1,("Z","B"):1,("X","E"):-1,
("Z","N"):0,("X","A"):0,("B","R"):-1,("B","N"):3,
("F","D"):-3,("X","Y"):-1,("Z","R"):0,("F","H"):-1,
("B","F"):-3,("F","L"):0,("X","Q"):-1,("B","B"):4
}
pam250={
("W","F"):0,("L","R"):-3,("S","P"):1,("V","T"):0,
("Q","Q"):4,("N","A"):0,("Z","Y"):-4,("W","R"):2,
("Q","A"):0,("S","D"):0,("H","H"):6,("S","H"):-1,
("H","D"):1,("L","N"):-3,("W","A"):-6,("Y","M"):-2,
("G","R"):-3,("Y","I"):-1,("Y","E"):-4,("B","Y"):-3,
("Y","A"):-3,("V","D"):-2,("B","S"):0,("Y","Y"):10,
("G","N"):0,("E","C"):-5,("Y","Q"):-4,("Z","Z"):3,
("V","A"):0,("C","C"):12,("M","R"):0,("V","E"):-2,
("T","N"):0,("P","P"):6,("V","I"):4,("V","S"):-1,
("Z","P"):0,("V","M"):2,("T","F"):-3,("V","Q"):-2,
("K","K"):5,("P","D"):-1,("I","H"):-2,("I","D"):-2,
("T","R"):-1,("P","L"):-3,("K","G"):-2,("M","N"):-2,
("P","H"):0,("F","Q"):-5,("Z","G"):0,("X","L"):-1,
("T","M"):-1,("Z","C"):-5,("X","H"):-1,("D","R"):-1,
("B","W"):-5,("X","D"):-1,("Z","K"):0,("F","A"):-3,
("Z","W"):-6,("F","E"):-5,("D","N"):2,("B","K"):1,
("X","X"):-1,("F","I"):1,("B","G"):0,("X","T"):0,
("F","M"):0,("B","C"):-4,("Z","I"):-2,("Z","V"):-2,
("S","S"):2,("L","Q"):-2,("W","E"):-7,("Q","R"):1,
("N","N"):2,("W","M"):-4,("Q","C"):-5,("W","I"):-5,
("S","C"):0,("L","A"):-2,("S","G"):1,("L","E"):-3,
("W","Q"):-5,("H","G"):-2,("S","K"):0,("Q","N"):1,
("N","R"):0,("H","C"):-3,("Y","N"):-2,("G","Q"):-1,
("Y","F"):7,("C","A"):-2,("V","L"):2,("G","E"):0,
("G","A"):1,("K","R"):3,("E","D"):3,("Y","R"):-4,
("M","Q"):-1,("T","I"):0,("C","D"):-5,("V","F"):-1,
("T","A"):1,("T","P"):0,("B","P"):-1,("T","E"):0,
("V","N"):-2,("P","G"):0,("M","A"):-1,("K","H"):0,
("V","R"):-2,("P","C"):-3,("M","E"):-2,("K","L"):-3,
("V","V"):4,("M","I"):2,("T","Q"):-1,("I","G"):-3,
("P","K"):-1,("M","M"):6,("K","D"):0,("I","C"):-2,
("Z","D"):3,("F","R"):-4,("X","K"):-1,("Q","D"):2,
("X","G"):-1,("Z","L"):-3,("X","C"):-3,("Z","H"):2,
("B","L"):-3,("B","H"):1,("F","F"):9,("X","W"):-4,
("B","D"):3,("D","A"):0,("S","L"):-3,("X","S"):0,
("F","N"):-3,("S","R"):0,("W","D"):-7,("V","Y"):-2,
("W","L"):-2,("H","R"):2,("W","H"):-3,("H","N"):2,
("W","T"):-5,("T","T"):3,("S","F"):-3,("W","P"):-6,
("L","D"):-4,("B","I"):-2,("L","H"):-2,("S","N"):1,
("B","T"):0,("L","L"):6,("Y","K"):-4,("E","Q"):2,
("Y","G"):-5,("Z","S"):0,("Y","C"):0,("G","D"):1,
("B","V"):-2,("E","A"):0,("Y","W"):0,("E","E"):4,
("Y","S"):-3,("C","N"):-4,("V","C"):-2,("T","H"):-1,
("P","R"):0,("V","G"):-1,("T","L"):-2,("V","K"):-2,
("K","Q"):1,("R","A"):-2,("I","R"):-2,("T","D"):0,
("P","F"):-5,("I","N"):-2,("K","I"):-2,("M","D"):-3,
("V","W"):-6,("W","W"):17,("M","H"):-2,("P","N"):0,
("K","A"):-1,("M","L"):4,("K","E"):0,("Z","E"):3,
("X","N"):0,("Z","A"):0,("Z","M"):-2,("X","F"):-2,
("K","C"):-5,("B","Q"):1,("X","B"):-1,("B","M"):-2,
("F","C"):-4,("Z","Q"):3,("X","Z"):-1,("F","G"):-5,
("B","E"):3,("X","V"):-1,("F","K"):-5,("B","A"):0,
("X","R"):-1,("D","D"):4,("W","G"):-7,("Z","F"):-5,
("S","Q"):-1,("W","C"):-8,("W","K"):-3,("H","Q"):3,
("L","C"):-6,("W","N"):-4,("S","A"):1,("L","G"):-4,
("W","S"):-2,("S","E"):0,("H","E"):1,("S","I"):-1,
("H","A"):-1,("S","M"):-2,("Y","L"):-1,("Y","H"):0,
("Y","D"):-4,("E","R"):-1,("X","P"):-1,("G","G"):5,
("G","C"):-3,("E","N"):1,("Y","T"):-3,("Y","P"):-5,
("T","K"):0,("A","A"):2,("P","Q"):0,("T","C"):-2,
("V","H"):-2,("T","G"):0,("I","Q"):-2,("Z","T"):-1,
("C","R"):-4,("V","P"):-1,("P","E"):-1,("M","C"):-5,
("K","N"):1,("I","I"):5,("P","A"):1,("M","G"):-3,
("T","S"):1,("I","E"):-2,("P","M"):-2,("M","K"):0,
("I","A"):-1,("P","I"):-2,("R","R"):6,("X","M"):-1,
("L","I"):2,("X","I"):-1,("Z","B"):2,("X","E"):-1,
("Z","N"):1,("X","A"):0,("B","R"):-1,("B","N"):2,
("F","D"):-6,("X","Y"):-2,("Z","R"):0,("F","H"):-2,
("B","F"):-4,("F","L"):2,("X","Q"):-1,("B","B"):3
}
gap=-1
matrix=None
dp=defaultdict(lambda:defaultdict(int))
seq1=""
seq2=""
#out=""#added
#start_time=time.time()#timemeasurement
defmain():
parser=argparse.ArgumentParser()
parser.add_argument('--seq1',required=True,dest='seq1')
parser.add_argument('--seq2',required=True,dest='seq2')#parser.add_argument('--database',required=True,dest='db')
parser.add_argument('--type',required=True,dest='type')
parser.add_argument('--matrix',dest='matrix',default=None)
parser.add_argument('--gap',dest='gap',default=-2)
#parser.add_argument('--out',required=True,dest='out')#added
arg=parser.parse_args()
globalseq1
globalseq2
globalgap
globalmatrix
seq1=get_seq(arg.seq1)
seq2=get_seq(arg.seq2)
print(seq1)
print(seq2)
gap=int(arg.gap)
#out=str(arg)
matrix=arg.matrix
alg_type=arg.type
if(matrixnotin["blosum62","pam250",None]):
print("wrongscoringmatrix")
exit(0)
if(matrix=="pam250"):
matrix=pam250
if(matrix=="blosum62"):
matrix=blosum62
if(alg_type=="global"):
global_alignment()
find_path(len(seq1),len(seq2))
#print_matrix()
else:
local_min=local_alignment()
#print_matrix()
for(i,j)inlocal_min:
find_path(i,j)
defget_seq(file_name):
seq=""
withopen(file_name)asf:
forlineinf:
if(line.find(">")==-1):
seq+=line[:-1]
returnseq
#defout_seq(file_name):
#returnout
defscore(e1,e2):
globalmatrix
if(notmatrix):
if(e1==e2):
return1
else:
return-1
if((e1,e2)inmatrix.keys()):
returnmatrix[(e1,e2)]
else:
returnmatrix[(e2,e1)]
defprint_matrix():
globaldp
foriinrange(len(seq1)+1):
forjinrange(len(seq2)+1):
print(str(dp[i][j])+"\t",end="")
print("")
print("")
defglobal_alignment():
globalseq1
globalseq2
globaldp
globalgap
foriinrange(len(seq1)+1):
dp[i][0]=0+i*gap
foriinrange(len(seq2)+1):
dp[0][i]=0+i*gap
foriinrange(1,len(seq1)+1):
forjinrange(1,len(seq2)+1):
dp[i][j]=max([dp[i-1][j]+gap,dp[i][j-1]+gap,dp[i-1][j-1]+score(seq1[i-1],seq2[j-1])])
deflocal_alignment():
globalseq1
globalseq2
globaldp
globalgap
local_min=[]
foriinrange(len(seq1)+1):
dp[i][0]=0
foriinrange(len(seq2)+1):
dp[0][i]=0
foriinrange(1,len(seq1)+1):
forjinrange(1,len(seq2)+1):
dp[i][j]=max([dp[i-1][j]+gap,dp[i][j-1]+gap,dp[i-1][j-1]+score(seq1[i-1],seq2[j-1]),0])
max_value=0
foriinrange(0,len(seq1)+1):
forjinrange(0,len(seq2)+1):
ifmax_value max_value=dp[i][j] foriinrange(0,len(seq1)+1): forjinrange(0,len(seq2)+1): ifmax_value==dp[i][j]: local_min.append((i,j)) returnlocal_min deffind_path(_i,_j): globalseq1 globalseq2 globalgap globaldp path1="" path2="" align_score=0 identity=0 long_path=len(seq1)iflen(seq1)>len(seq2)elselen(seq2) short_path=len(seq2)iflen(seq1)>len(seq2)elselen(seq1) i=_i j=_j print("startseq1:"+str(len(seq1)-i+1)+"startseq2:"+str(len(seq2)-j+1)) while(i!=0orj!=0): top=dp[i][j-1ifj-1>0else0] diagonal=dp[i][j] left=dp[i-1ifi-1>0else0][j] if(top>max([diagonal,left])): path1="-"+path1 path2=seq2[j-1]+path2 align_score+=gap j-=1 elif(diagonal>=max([top,left])): path1=seq1[i-1]+path1 path2=seq2[j-1]+path2 align_score+=score(seq1[i-1],seq2[j-1]) identity+=1 i-=1 j-=1 else: path1=seq1[i-1]+path1 path2="-"+path2 align_score+=gap i-=1 i=max([i,0]) j=max([j,0]) length=len(path1) print("alignmentscoreis"+str(align_score)) print("sequenceidentityis"+str(identity)+"("+str(round(100*identity/len(path1),2))+"%="+str(identity)+"/"+str(len(path1))+")") forbinrange(max(int(length/60)+1,1)): block=60 foriinrange(b*block,min([block*(b+1),length])): print(path1[i],end="") print("") foriinrange(b*block,min([block*(b+1),length])): print("|"ifpath1[i]==path2[i]else"",end="") print("") foriinrange(b*block,min([block*(b+1),length])): print(path2[i],end="") print("") print("") if__name__=='__main__': main() -------------------------------------------- pdbaa.fasta file (db file) looks like following: (multiple seq) >4HZI_A Crystal structure of the Leptospira interrogans ATPase subunit of an orphan ABC transporter [Leptospira interrogans serovar Copenhageni str. Fiocruz L1-130]4HZI_B Crystal structure of the Leptospira interrogans ATPase subunit of an orphan ABC transporter [Leptospira interrogans serovar Copenhageni str. Fiocruz L1-130] MSYYHHHHHHLESTSLYKKAGSENLYFQGKINSLLSLEKISYKPTGKTILDSVSFEIKTNEHCVLLGRNGAGKSTLVNLI YGMIWATSGTIRLFQETYGEIAIQDLRKRIGILDSSQQENSIQRKLTVKDTILTGLFHTIGYYRDPSPEEETKTLQILKD SDLLSKKDQLYNTLSSGEKKKILFLRSIVNEPDFLIMDEPCSSLDLTAREDFLGFLKEYHSKKKFTSLYITHRPEEIPDF YSKAVLLKEGKVIHFGPIEECFTEKNLEDLYDIPLQVQRIENTWSVIPKQKRTY >5A5T_M Structure of mammalian eIF3 in the context of the 43S preinitiation complex [Oryctolagus cuniculus]6FEC_8 Human cap-dependent 48S pre-initiation complex [Homo sapiens]6W2S_8 Structure of the Cricket Paralysis Virus 5-UTR IRES (CrPV 5-UTR-IRES) bound to the small ribosomal subunit in the open state (Class 1) [Oryctolagus cuniculus]6W2T_8 Structure of the Cricket Paralysis Virus 5-UTR IRES (CrPV 5-UTR-IRES) bound to the small ribosomal subunit in the closed state (Class 2) [Oryctolagus cuniculus]6YAM_u Mammalian 48S late-stage translation initiation complex (LS48S+eIF3 IC) with beta-globin mRNA [Oryctolagus cuniculus]6YBD_6 Structure of a human 48S translational initiation complex - eIF3 [Homo sapiens]6ZMW_6 Structure of a human 48S translational initiation complex [Homo sapiens]6ZON_M SARS-CoV-2 Nsp1 bound to a human 43S preinitiation ribosome complex - state 1 [Homo sapiens]6ZP4_M SARS-CoV-2 Nsp1 bound to a human 43S preinitiation ribosome complex - state 2 [Homo sapiens] MSVPAFIDISEEDQAAELRAYLKSKGAEISEENSEGGLHVDLAQIIEACDVCLKEDDKDVESVMNSVVSLLLILEPDKQE ALIESLCEKLVKFREGERPSLRLQLLSNLFHGMDKNTPVRYTVYCSLIKVAASCGAIQYIPTELDQVRKWISDWNLTTEK KHTLLRLLYEALVDCKKSDAASKVMVELLGSYTEDNASQARVDAHRCIVRALKDPNAFLFDHLLTLKPVKFLEGELIHDL LTIFVSAKLASYVKFYQNNKDFIDSLGLLHEQNMAKMRLLTFMGMAVENKEISFDTMQQELQIGADDVEAFVIDAVRTKM VYCKIDQTQRKVVVSHSTHRTFGKQQWQQLYDTLNAWKQNLNKVKNSLLSLSDT -------------------------------------------- seq.fasta file (db file) looks like following: (multiple seq) >YP_009724390.1 surface glycoprotein [Severe acute respiratory syndrome coronavirus 2] MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHSTQDLFLPFFSNVTWFHAIHV SGTNGTKRFDNPVLPFNDGVYFASTEKSNIIRGWIFGTTLDSKTQSLLIVNNATNVVIKVCEFQFCNDPF LGVYYHKNNKSWMESEFRVYSSANNCTFEYVSQPFLMDLEGKQGNFKNLREFVFKNIDGYFKIYSKHTPI NLVRDLPQGFSALEPLVDLPIGINITRFQTLLALHRSYLTPGDSSSGWTAGAAAYYVGYLQPRTFLLKYN ENGTITDAVDCALDPLSETKCTLKSFTVEKGIYQTSNFRVQPTESIVRFPNITNLCPFGEVFNATRFASV YAWNRKRISNCVADYSVLYNSASFSTFKCYGVSPTKLNDLCFTNVYADSFVIRGDEVRQIAPGQTGKIAD YNYKLPDDFTGCVIAWNSNNLDSKVGGNYNYLYRLFRKSNLKPFERDISTEIYQAGSTPCNGVEGFNCYF PLQSYGFQPTNGVGYQPYRVVVLSFELLHAPATVCGPKKSTNLVKNKCVNFNFNGLTGTGVLTESNKKFL PFQQFGRDIADTTDAVRDPQTLEILDITPCSFGGVSVITPGTNTSNQVAVLYQDVNCTEVPVAIHADQLT PTWRVYSTGSNVFQTRAGCLIGAEHVNNSYECDIPIGAGICASYQTQTNSPRRARSVASQSIIAYTMSLG AENSVAYSNNSIAIPTNFTISVTTEILPVSMTKTSVDCTMYICGDSTECSNLLLQYGSFCTQLNRALTGI AVEQDKNTQEVFAQVKQIYKTPPIKDFGGFNFSQILPDPSKPSKRSFIEDLLFNKVTLADAGFIKQYGDC LGDIAARDLICAQKFNGLTVLPPLLTDEMIAQYTSALLAGTITSGWTFGAGAALQIPFAMQMAYRFNGIG VTQNVLYENQKLIANQFNSAIGKIQDSLSSTASALGKLQDVVNQNAQALNTLVKQLSSNFGAISSVLNDI LSRLDKVEAEVQIDRLITGRLQSLQTYVTQQLIRAAEIRASANLAATKMSECVLGQSKRVDFCGKGYHLM SFPQSAPHGVVFLHVTYVPAQEKNFTTAPAICHDGKAHFPREGVFVSNGTHWFVTQRNFYEPQIITTDNT FVSGNCDVVIGIVNNTVYDPLQPELDSFKEELDKYFKNHTSPDVDLGDISGINASVVNIQKEIDRLNEVA KNLNESLIDLQELGKYEQYIKWPWYIWLGFIAGLIAIVMVTIMLCCMTSCCSCLKGCCSCGSCCKFDEDD SEPVLKGVKLHYT
Step by Step Solution
There are 3 Steps involved in it
Step: 1
Get Instant Access to Expert-Tailored Solutions
See step-by-step solutions with expert insights and AI powered tools for academic success
Step: 2
Step: 3
Ace Your Homework with AI
Get the answers you need in no time with our AI-driven, step-by-step assistance
Get Started