{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# SpecLibFasta usage"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%reload_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from alphabase.protein.fasta import SpecLibFasta"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Proteins from a dict (or loaded from fasta files)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"prot1 = 'MABCDESTKAFGHIJKLMNOPQRAFGHIJK'\n",
"prot2 = 'AFGHIJKLMNOPQR'\n",
"protein_dict = {\n",
" 'xx': {\n",
" 'protein_id': 'xx',\n",
" 'gene_name': '',\n",
" 'sequence': prot1\n",
" },\n",
" 'yy': {\n",
" 'protein_id': 'yy',\n",
" 'gene_name': 'gene',\n",
" 'sequence': prot2\n",
" }\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`alphabase.protein.fasta.SpecLibFasta.get_peptides_from_protein_dict` will digest a protein dict into a peptide dataframe. \n",
"\n",
"`alphabase.protein.fasta.SpecLibFasta.get_peptides_from_fasta` will digest a fasta file or a fasta list into a peptide dataframe. "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sequence | \n",
" protein_idxes | \n",
" miss_cleavage | \n",
" is_prot_nterm | \n",
" is_prot_cterm | \n",
" mods | \n",
" mod_sites | \n",
" nAA | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" AFGHIJK | \n",
" 0;1 | \n",
" 0 | \n",
" True | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
"
\n",
" \n",
" | 1 | \n",
" LMNOPQR | \n",
" 0;1 | \n",
" 0 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
"
\n",
" \n",
" | 2 | \n",
" ABCDESTK | \n",
" 0 | \n",
" 0 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 8 | \n",
"
\n",
" \n",
" | 3 | \n",
" MABCDESTK | \n",
" 0 | \n",
" 0 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 9 | \n",
"
\n",
" \n",
" | 4 | \n",
" AFGHIJKLMNOPQR | \n",
" 0;1 | \n",
" 1 | \n",
" True | \n",
" True | \n",
" | \n",
" | \n",
" 14 | \n",
"
\n",
" \n",
" | 5 | \n",
" LMNOPQRAFGHIJK | \n",
" 0 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 14 | \n",
"
\n",
" \n",
" | 6 | \n",
" ABCDESTKAFGHIJK | \n",
" 0 | \n",
" 1 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 15 | \n",
"
\n",
" \n",
" | 7 | \n",
" MABCDESTKAFGHIJK | \n",
" 0 | \n",
" 1 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 16 | \n",
"
\n",
" \n",
" | 8 | \n",
" AFGHIJKLMNOPQRAFGHIJK | \n",
" 0 | \n",
" 2 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 21 | \n",
"
\n",
" \n",
" | 9 | \n",
" ABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 22 | \n",
"
\n",
" \n",
" | 10 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 23 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sequence protein_idxes miss_cleavage is_prot_nterm \\\n",
"0 AFGHIJK 0;1 0 True \n",
"1 LMNOPQR 0;1 0 False \n",
"2 ABCDESTK 0 0 True \n",
"3 MABCDESTK 0 0 True \n",
"4 AFGHIJKLMNOPQR 0;1 1 True \n",
"5 LMNOPQRAFGHIJK 0 1 False \n",
"6 ABCDESTKAFGHIJK 0 1 True \n",
"7 MABCDESTKAFGHIJK 0 1 True \n",
"8 AFGHIJKLMNOPQRAFGHIJK 0 2 False \n",
"9 ABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"10 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"\n",
" is_prot_cterm mods mod_sites nAA \n",
"0 True 7 \n",
"1 True 7 \n",
"2 False 8 \n",
"3 False 9 \n",
"4 True 14 \n",
"5 True 14 \n",
"6 False 15 \n",
"7 False 16 \n",
"8 True 21 \n",
"9 False 22 \n",
"10 False 23 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib = SpecLibFasta(\n",
" ['b_z1','y_z1'], I_to_L=False, decoy='pseudo_reverse',\n",
" var_mods=['Acetyl@Protein_N-term', 'Oxidation@M'],\n",
" fix_mods=['Carbamidomethyl@C'],\n",
")\n",
"# fasta_lib.get_peptides_from_fasta(fasta_files)\n",
"fasta_lib.get_peptides_from_protein_dict(protein_dict)\n",
"fasta_lib.precursor_df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" protein_id | \n",
" gene_name | \n",
" sequence | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" xx | \n",
" | \n",
" MABCDESTKAFGHIJKLMNOPQRAFGHIJK | \n",
"
\n",
" \n",
" | 1 | \n",
" yy | \n",
" gene | \n",
" AFGHIJKLMNOPQR | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" protein_id gene_name sequence\n",
"0 xx MABCDESTKAFGHIJKLMNOPQRAFGHIJK\n",
"1 yy gene AFGHIJKLMNOPQR"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.protein_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also append the protein names to precursor_df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sequence | \n",
" protein_idxes | \n",
" miss_cleavage | \n",
" is_prot_nterm | \n",
" is_prot_cterm | \n",
" mods | \n",
" mod_sites | \n",
" nAA | \n",
" proteins | \n",
" genes | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" AFGHIJK | \n",
" 0;1 | \n",
" 0 | \n",
" True | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
"
\n",
" \n",
" | 1 | \n",
" LMNOPQR | \n",
" 0;1 | \n",
" 0 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
"
\n",
" \n",
" | 2 | \n",
" ABCDESTK | \n",
" 0 | \n",
" 0 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 8 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 3 | \n",
" MABCDESTK | \n",
" 0 | \n",
" 0 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 9 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 4 | \n",
" AFGHIJKLMNOPQR | \n",
" 0;1 | \n",
" 1 | \n",
" True | \n",
" True | \n",
" | \n",
" | \n",
" 14 | \n",
" xx;yy | \n",
" gene | \n",
"
\n",
" \n",
" | 5 | \n",
" LMNOPQRAFGHIJK | \n",
" 0 | \n",
" 1 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 14 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 6 | \n",
" ABCDESTKAFGHIJK | \n",
" 0 | \n",
" 1 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 15 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 7 | \n",
" MABCDESTKAFGHIJK | \n",
" 0 | \n",
" 1 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 16 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 8 | \n",
" AFGHIJKLMNOPQRAFGHIJK | \n",
" 0 | \n",
" 2 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 21 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 9 | \n",
" ABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 22 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 10 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" | \n",
" | \n",
" 23 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sequence protein_idxes miss_cleavage is_prot_nterm \\\n",
"0 AFGHIJK 0;1 0 True \n",
"1 LMNOPQR 0;1 0 False \n",
"2 ABCDESTK 0 0 True \n",
"3 MABCDESTK 0 0 True \n",
"4 AFGHIJKLMNOPQR 0;1 1 True \n",
"5 LMNOPQRAFGHIJK 0 1 False \n",
"6 ABCDESTKAFGHIJK 0 1 True \n",
"7 MABCDESTKAFGHIJK 0 1 True \n",
"8 AFGHIJKLMNOPQRAFGHIJK 0 2 False \n",
"9 ABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"10 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"\n",
" is_prot_cterm mods mod_sites nAA proteins genes \n",
"0 True 7 xx;yy gene \n",
"1 True 7 xx;yy gene \n",
"2 False 8 xx \n",
"3 False 9 xx \n",
"4 True 14 xx;yy gene \n",
"5 True 14 xx \n",
"6 False 15 xx \n",
"7 False 16 xx \n",
"8 True 21 xx \n",
"9 False 22 xx \n",
"10 False 23 xx "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.append_protein_name()\n",
"fasta_lib.precursor_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If we have our own precursor_df loaded by psm_readers, we can directly assign it to fasta_lib. \n",
"\n",
"``` python\n",
"fasta_lib._precursor_df = precursor_df\n",
"```\n",
"Thus, we can still use SpecLibFasta functionalities for this precursor_df."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Add modifications including both var_mods (`Acetyl@Protein_N-term`, `Oxidation@M`, see initialzation of fasta_lib) and fix_mods (`Carbamidomethyl@C`) into the precursor_df."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sequence | \n",
" mods | \n",
" mod_sites | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" AFGHIJK | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | 1 | \n",
" AFGHIJK | \n",
" Acetyl@Protein_N-term | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" LMNOPQR | \n",
" Oxidation@M | \n",
" 2 | \n",
"
\n",
" \n",
" | 3 | \n",
" LMNOPQR | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | 4 | \n",
" ABCDESTK | \n",
" Carbamidomethyl@C | \n",
" 3 | \n",
"
\n",
" \n",
" | 5 | \n",
" ABCDESTK | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
" 0;3 | \n",
"
\n",
" \n",
" | 6 | \n",
" MABCDESTK | \n",
" Oxidation@M;Carbamidomethyl@C | \n",
" 1;4 | \n",
"
\n",
" \n",
" | 7 | \n",
" MABCDESTK | \n",
" Carbamidomethyl@C | \n",
" 4 | \n",
"
\n",
" \n",
" | 8 | \n",
" MABCDESTK | \n",
" Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
" 0;1;4 | \n",
"
\n",
" \n",
" | 9 | \n",
" MABCDESTK | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
" 0;4 | \n",
"
\n",
" \n",
" | 10 | \n",
" AFGHIJKLMNOPQR | \n",
" Oxidation@M | \n",
" 9 | \n",
"
\n",
" \n",
" | 11 | \n",
" AFGHIJKLMNOPQR | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | 12 | \n",
" AFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Oxidation@M | \n",
" 0;9 | \n",
"
\n",
" \n",
" | 13 | \n",
" AFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term | \n",
" 0 | \n",
"
\n",
" \n",
" | 14 | \n",
" LMNOPQRAFGHIJK | \n",
" Oxidation@M | \n",
" 2 | \n",
"
\n",
" \n",
" | 15 | \n",
" LMNOPQRAFGHIJK | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | 16 | \n",
" ABCDESTKAFGHIJK | \n",
" Carbamidomethyl@C | \n",
" 3 | \n",
"
\n",
" \n",
" | 17 | \n",
" ABCDESTKAFGHIJK | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
" 0;3 | \n",
"
\n",
" \n",
" | 18 | \n",
" MABCDESTKAFGHIJK | \n",
" Oxidation@M;Carbamidomethyl@C | \n",
" 1;4 | \n",
"
\n",
" \n",
" | 19 | \n",
" MABCDESTKAFGHIJK | \n",
" Carbamidomethyl@C | \n",
" 4 | \n",
"
\n",
" \n",
" | 20 | \n",
" MABCDESTKAFGHIJK | \n",
" Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
" 0;1;4 | \n",
"
\n",
" \n",
" | 21 | \n",
" MABCDESTKAFGHIJK | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
" 0;4 | \n",
"
\n",
" \n",
" | 22 | \n",
" AFGHIJKLMNOPQRAFGHIJK | \n",
" Oxidation@M | \n",
" 9 | \n",
"
\n",
" \n",
" | 23 | \n",
" AFGHIJKLMNOPQRAFGHIJK | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | 24 | \n",
" ABCDESTKAFGHIJKLMNOPQR | \n",
" Oxidation@M;Carbamidomethyl@C | \n",
" 17;3 | \n",
"
\n",
" \n",
" | 25 | \n",
" ABCDESTKAFGHIJKLMNOPQR | \n",
" Carbamidomethyl@C | \n",
" 3 | \n",
"
\n",
" \n",
" | 26 | \n",
" ABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
" 0;17;3 | \n",
"
\n",
" \n",
" | 27 | \n",
" ABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
" 0;3 | \n",
"
\n",
" \n",
" | 28 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Oxidation@M;Carbamidomethyl@C | \n",
" 1;4 | \n",
"
\n",
" \n",
" | 29 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Oxidation@M;Carbamidomethyl@C | \n",
" 18;4 | \n",
"
\n",
" \n",
" | 30 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Oxidation@M;Oxidation@M;Carbamidomethyl@C | \n",
" 1;18;4 | \n",
"
\n",
" \n",
" | 31 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Carbamidomethyl@C | \n",
" 4 | \n",
"
\n",
" \n",
" | 32 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
" 0;1;4 | \n",
"
\n",
" \n",
" | 33 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... | \n",
" 0;18;4 | \n",
"
\n",
" \n",
" | 34 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... | \n",
" 0;1;18;4 | \n",
"
\n",
" \n",
" | 35 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
" 0;4 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" sequence \\\n",
"0 AFGHIJK \n",
"1 AFGHIJK \n",
"2 LMNOPQR \n",
"3 LMNOPQR \n",
"4 ABCDESTK \n",
"5 ABCDESTK \n",
"6 MABCDESTK \n",
"7 MABCDESTK \n",
"8 MABCDESTK \n",
"9 MABCDESTK \n",
"10 AFGHIJKLMNOPQR \n",
"11 AFGHIJKLMNOPQR \n",
"12 AFGHIJKLMNOPQR \n",
"13 AFGHIJKLMNOPQR \n",
"14 LMNOPQRAFGHIJK \n",
"15 LMNOPQRAFGHIJK \n",
"16 ABCDESTKAFGHIJK \n",
"17 ABCDESTKAFGHIJK \n",
"18 MABCDESTKAFGHIJK \n",
"19 MABCDESTKAFGHIJK \n",
"20 MABCDESTKAFGHIJK \n",
"21 MABCDESTKAFGHIJK \n",
"22 AFGHIJKLMNOPQRAFGHIJK \n",
"23 AFGHIJKLMNOPQRAFGHIJK \n",
"24 ABCDESTKAFGHIJKLMNOPQR \n",
"25 ABCDESTKAFGHIJKLMNOPQR \n",
"26 ABCDESTKAFGHIJKLMNOPQR \n",
"27 ABCDESTKAFGHIJKLMNOPQR \n",
"28 MABCDESTKAFGHIJKLMNOPQR \n",
"29 MABCDESTKAFGHIJKLMNOPQR \n",
"30 MABCDESTKAFGHIJKLMNOPQR \n",
"31 MABCDESTKAFGHIJKLMNOPQR \n",
"32 MABCDESTKAFGHIJKLMNOPQR \n",
"33 MABCDESTKAFGHIJKLMNOPQR \n",
"34 MABCDESTKAFGHIJKLMNOPQR \n",
"35 MABCDESTKAFGHIJKLMNOPQR \n",
"\n",
" mods mod_sites \n",
"0 \n",
"1 Acetyl@Protein_N-term 0 \n",
"2 Oxidation@M 2 \n",
"3 \n",
"4 Carbamidomethyl@C 3 \n",
"5 Acetyl@Protein_N-term;Carbamidomethyl@C 0;3 \n",
"6 Oxidation@M;Carbamidomethyl@C 1;4 \n",
"7 Carbamidomethyl@C 4 \n",
"8 Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... 0;1;4 \n",
"9 Acetyl@Protein_N-term;Carbamidomethyl@C 0;4 \n",
"10 Oxidation@M 9 \n",
"11 \n",
"12 Acetyl@Protein_N-term;Oxidation@M 0;9 \n",
"13 Acetyl@Protein_N-term 0 \n",
"14 Oxidation@M 2 \n",
"15 \n",
"16 Carbamidomethyl@C 3 \n",
"17 Acetyl@Protein_N-term;Carbamidomethyl@C 0;3 \n",
"18 Oxidation@M;Carbamidomethyl@C 1;4 \n",
"19 Carbamidomethyl@C 4 \n",
"20 Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... 0;1;4 \n",
"21 Acetyl@Protein_N-term;Carbamidomethyl@C 0;4 \n",
"22 Oxidation@M 9 \n",
"23 \n",
"24 Oxidation@M;Carbamidomethyl@C 17;3 \n",
"25 Carbamidomethyl@C 3 \n",
"26 Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... 0;17;3 \n",
"27 Acetyl@Protein_N-term;Carbamidomethyl@C 0;3 \n",
"28 Oxidation@M;Carbamidomethyl@C 1;4 \n",
"29 Oxidation@M;Carbamidomethyl@C 18;4 \n",
"30 Oxidation@M;Oxidation@M;Carbamidomethyl@C 1;18;4 \n",
"31 Carbamidomethyl@C 4 \n",
"32 Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... 0;1;4 \n",
"33 Acetyl@Protein_N-term;Oxidation@M;Carbamidomet... 0;18;4 \n",
"34 Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... 0;1;18;4 \n",
"35 Acetyl@Protein_N-term;Carbamidomethyl@C 0;4 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.add_modifications()\n",
"fasta_lib.precursor_df[['sequence','mods','mod_sites']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`alphabase.protein.fasta.SpecLibFasta.add_additional_modifications` is specially designed for `Phospho`, as it may generate thousands of peptidoforms for a peptide with multiple phospho sites. "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sequence | \n",
" protein_idxes | \n",
" miss_cleavage | \n",
" is_prot_nterm | \n",
" is_prot_cterm | \n",
" mods | \n",
" mod_sites | \n",
" nAA | \n",
" proteins | \n",
" genes | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" AFGHIJK | \n",
" 0;1 | \n",
" 0 | \n",
" True | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
"
\n",
" \n",
" | 1 | \n",
" AFGHIJK | \n",
" 0;1 | \n",
" 0 | \n",
" True | \n",
" True | \n",
" Acetyl@Protein_N-term | \n",
" 0 | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
"
\n",
" \n",
" | 2 | \n",
" LMNOPQR | \n",
" 0;1 | \n",
" 0 | \n",
" False | \n",
" True | \n",
" Oxidation@M | \n",
" 2 | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
"
\n",
" \n",
" | 3 | \n",
" LMNOPQR | \n",
" 0;1 | \n",
" 0 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
"
\n",
" \n",
" | 4 | \n",
" ABCDESTK | \n",
" 0 | \n",
" 0 | \n",
" True | \n",
" False | \n",
" Carbamidomethyl@C;Phospho@S | \n",
" 3;6 | \n",
" 8 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 79 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... | \n",
" 0;1;18;4;8 | \n",
" 23 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 80 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... | \n",
" 0;1;18;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 81 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Phospho@S | \n",
" 0;4;7 | \n",
" 23 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 82 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Phospho@T | \n",
" 0;4;8 | \n",
" 23 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
" | 83 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C | \n",
" 0;4 | \n",
" 23 | \n",
" xx | \n",
" | \n",
"
\n",
" \n",
"
\n",
"
84 rows × 10 columns
\n",
"
"
],
"text/plain": [
" sequence protein_idxes miss_cleavage is_prot_nterm \\\n",
"0 AFGHIJK 0;1 0 True \n",
"1 AFGHIJK 0;1 0 True \n",
"2 LMNOPQR 0;1 0 False \n",
"3 LMNOPQR 0;1 0 False \n",
"4 ABCDESTK 0 0 True \n",
".. ... ... ... ... \n",
"79 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"80 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"81 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"82 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"83 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"\n",
" is_prot_cterm mods \\\n",
"0 True \n",
"1 True Acetyl@Protein_N-term \n",
"2 True Oxidation@M \n",
"3 True \n",
"4 False Carbamidomethyl@C;Phospho@S \n",
".. ... ... \n",
"79 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n",
"80 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n",
"81 False Acetyl@Protein_N-term;Carbamidomethyl@C;Phospho@S \n",
"82 False Acetyl@Protein_N-term;Carbamidomethyl@C;Phospho@T \n",
"83 False Acetyl@Protein_N-term;Carbamidomethyl@C \n",
"\n",
" mod_sites nAA proteins genes \n",
"0 7 xx;yy gene \n",
"1 0 7 xx;yy gene \n",
"2 2 7 xx;yy gene \n",
"3 7 xx;yy gene \n",
"4 3;6 8 xx \n",
".. ... ... ... ... \n",
"79 0;1;18;4;8 23 xx \n",
"80 0;1;18;4 23 xx \n",
"81 0;4;7 23 xx \n",
"82 0;4;8 23 xx \n",
"83 0;4 23 xx \n",
"\n",
"[84 rows x 10 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from alphabase.protein.fasta import append_special_modifications\n",
"fasta_lib._precursor_df = append_special_modifications(\n",
" fasta_lib.precursor_df, ['Phospho@S','Phospho@T'],\n",
" min_mod_num=0, max_mod_num=1, max_peptidoform_num=100\n",
")\n",
"fasta_lib.precursor_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Flexible method to add peptide labeling"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sequence | \n",
" protein_idxes | \n",
" miss_cleavage | \n",
" is_prot_nterm | \n",
" is_prot_cterm | \n",
" mods | \n",
" mod_sites | \n",
" nAA | \n",
" proteins | \n",
" genes | \n",
" labeling_channel | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" AFGHIJK | \n",
" 0;1 | \n",
" 0 | \n",
" True | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
" | \n",
"
\n",
" \n",
" | 1 | \n",
" AFGHIJK | \n",
" 0;1 | \n",
" 0 | \n",
" True | \n",
" True | \n",
" Acetyl@Protein_N-term | \n",
" 0 | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
" | \n",
"
\n",
" \n",
" | 2 | \n",
" LMNOPQR | \n",
" 0;1 | \n",
" 0 | \n",
" False | \n",
" True | \n",
" Oxidation@M | \n",
" 2 | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
" | \n",
"
\n",
" \n",
" | 3 | \n",
" LMNOPQR | \n",
" 0;1 | \n",
" 0 | \n",
" False | \n",
" True | \n",
" | \n",
" | \n",
" 7 | \n",
" xx;yy | \n",
" gene | \n",
" | \n",
"
\n",
" \n",
" | 4 | \n",
" ABCDESTK | \n",
" 0 | \n",
" 0 | \n",
" True | \n",
" False | \n",
" Carbamidomethyl@C;Phospho@S | \n",
" 3;6 | \n",
" 8 | \n",
" xx | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 247 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... | \n",
" 0;1;18;4;8;9;16 | \n",
" 23 | \n",
" xx | \n",
" | \n",
" 8 | \n",
"
\n",
" \n",
" | 248 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... | \n",
" 0;1;18;4;9;16 | \n",
" 23 | \n",
" xx | \n",
" | \n",
" 8 | \n",
"
\n",
" \n",
" | 249 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... | \n",
" 0;4;7;9;16 | \n",
" 23 | \n",
" xx | \n",
" | \n",
" 8 | \n",
"
\n",
" \n",
" | 250 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... | \n",
" 0;4;8;9;16 | \n",
" 23 | \n",
" xx | \n",
" | \n",
" 8 | \n",
"
\n",
" \n",
" | 251 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" 0 | \n",
" 2 | \n",
" True | \n",
" False | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... | \n",
" 0;4;9;16 | \n",
" 23 | \n",
" xx | \n",
" | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
252 rows × 11 columns
\n",
"
"
],
"text/plain": [
" sequence protein_idxes miss_cleavage is_prot_nterm \\\n",
"0 AFGHIJK 0;1 0 True \n",
"1 AFGHIJK 0;1 0 True \n",
"2 LMNOPQR 0;1 0 False \n",
"3 LMNOPQR 0;1 0 False \n",
"4 ABCDESTK 0 0 True \n",
".. ... ... ... ... \n",
"247 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"248 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"249 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"250 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"251 MABCDESTKAFGHIJKLMNOPQR 0 2 True \n",
"\n",
" is_prot_cterm mods \\\n",
"0 True \n",
"1 True Acetyl@Protein_N-term \n",
"2 True Oxidation@M \n",
"3 True \n",
"4 False Carbamidomethyl@C;Phospho@S \n",
".. ... ... \n",
"247 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n",
"248 False Acetyl@Protein_N-term;Oxidation@M;Oxidation@M;... \n",
"249 False Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... \n",
"250 False Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... \n",
"251 False Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... \n",
"\n",
" mod_sites nAA proteins genes labeling_channel \n",
"0 7 xx;yy gene \n",
"1 0 7 xx;yy gene \n",
"2 2 7 xx;yy gene \n",
"3 7 xx;yy gene \n",
"4 3;6 8 xx \n",
".. ... ... ... ... ... \n",
"247 0;1;18;4;8;9;16 23 xx 8 \n",
"248 0;1;18;4;9;16 23 xx 8 \n",
"249 0;4;7;9;16 23 xx 8 \n",
"250 0;4;8;9;16 23 xx 8 \n",
"251 0;4;9;16 23 xx 8 \n",
"\n",
"[252 rows x 11 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.add_peptide_labeling({\n",
" '': [], # not labelled for reference\n",
" '0': ['Dimethyl@Any_N-term','Dimethyl@K'],\n",
" '8': ['Dimethyl:2H(6)13C(2)@Any_N-term','Dimethyl:2H(6)13C(2)@K'],\n",
"})\n",
"fasta_lib.precursor_df"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" sequence | \n",
" mods | \n",
" mod_sites | \n",
" charge | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" AFGHIJK | \n",
" | \n",
" | \n",
" 2 | \n",
"
\n",
" \n",
" | 1 | \n",
" AFGHIJK | \n",
" | \n",
" | \n",
" 3 | \n",
"
\n",
" \n",
" | 2 | \n",
" AFGHIJK | \n",
" | \n",
" | \n",
" 4 | \n",
"
\n",
" \n",
" | 3 | \n",
" AFGHIJK | \n",
" Acetyl@Protein_N-term | \n",
" 0 | \n",
" 2 | \n",
"
\n",
" \n",
" | 4 | \n",
" AFGHIJK | \n",
" Acetyl@Protein_N-term | \n",
" 0 | \n",
" 3 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 751 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... | \n",
" 0;4;8;9;16 | \n",
" 3 | \n",
"
\n",
" \n",
" | 752 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... | \n",
" 0;4;8;9;16 | \n",
" 4 | \n",
"
\n",
" \n",
" | 753 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... | \n",
" 0;4;9;16 | \n",
" 2 | \n",
"
\n",
" \n",
" | 754 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... | \n",
" 0;4;9;16 | \n",
" 3 | \n",
"
\n",
" \n",
" | 755 | \n",
" MABCDESTKAFGHIJKLMNOPQR | \n",
" Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... | \n",
" 0;4;9;16 | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
756 rows × 4 columns
\n",
"
"
],
"text/plain": [
" sequence \\\n",
"0 AFGHIJK \n",
"1 AFGHIJK \n",
"2 AFGHIJK \n",
"3 AFGHIJK \n",
"4 AFGHIJK \n",
".. ... \n",
"751 MABCDESTKAFGHIJKLMNOPQR \n",
"752 MABCDESTKAFGHIJKLMNOPQR \n",
"753 MABCDESTKAFGHIJKLMNOPQR \n",
"754 MABCDESTKAFGHIJKLMNOPQR \n",
"755 MABCDESTKAFGHIJKLMNOPQR \n",
"\n",
" mods mod_sites charge \n",
"0 2 \n",
"1 3 \n",
"2 4 \n",
"3 Acetyl@Protein_N-term 0 2 \n",
"4 Acetyl@Protein_N-term 0 3 \n",
".. ... ... ... \n",
"751 Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... 0;4;8;9;16 3 \n",
"752 Acetyl@Protein_N-term;Carbamidomethyl@C;Phosph... 0;4;8;9;16 4 \n",
"753 Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... 0;4;9;16 2 \n",
"754 Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... 0;4;9;16 3 \n",
"755 Acetyl@Protein_N-term;Carbamidomethyl@C;Dimeth... 0;4;9;16 4 \n",
"\n",
"[756 rows x 4 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.add_charge()\n",
"fasta_lib.precursor_df[['sequence','mods','mod_sites','charge']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Append precursor mz and isotope information"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/wenfengzeng/workspace/alphabase/alphabase/peptide/precursor.py:613: RuntimeWarning: invalid value encountered in divide\n",
" precursor_dist /= np.sum(precursor_dist, axis=1, keepdims=True)\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" precursor_mz | \n",
" i_0 | \n",
" i_1 | \n",
" i_2 | \n",
" i_3 | \n",
" i_4 | \n",
" i_5 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 3.932371e+02 | \n",
" 0.625822 | \n",
" 0.285918 | \n",
" 0.072883 | \n",
" 0.013411 | \n",
" 0.001966 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2.624938e+02 | \n",
" 0.625822 | \n",
" 0.285918 | \n",
" 0.072883 | \n",
" 0.013411 | \n",
" 0.001966 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1.971222e+02 | \n",
" 0.625822 | \n",
" 0.285918 | \n",
" 0.072883 | \n",
" 0.013411 | \n",
" 0.001966 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4.142423e+02 | \n",
" 0.610921 | \n",
" 0.292699 | \n",
" 0.078690 | \n",
" 0.015312 | \n",
" 0.002378 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 2.764973e+02 | \n",
" 0.610921 | \n",
" 0.292699 | \n",
" 0.078690 | \n",
" 0.015312 | \n",
" 0.002378 | \n",
" 0.0 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 751 | \n",
" 4.000960e+06 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 752 | \n",
" 3.000720e+06 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 753 | \n",
" 6.001400e+06 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 754 | \n",
" 4.000934e+06 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" | 755 | \n",
" 3.000700e+06 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
756 rows × 7 columns
\n",
"
"
],
"text/plain": [
" precursor_mz i_0 i_1 i_2 i_3 i_4 i_5\n",
"0 3.932371e+02 0.625822 0.285918 0.072883 0.013411 0.001966 0.0\n",
"1 2.624938e+02 0.625822 0.285918 0.072883 0.013411 0.001966 0.0\n",
"2 1.971222e+02 0.625822 0.285918 0.072883 0.013411 0.001966 0.0\n",
"3 4.142423e+02 0.610921 0.292699 0.078690 0.015312 0.002378 0.0\n",
"4 2.764973e+02 0.610921 0.292699 0.078690 0.015312 0.002378 0.0\n",
".. ... ... ... ... ... ... ...\n",
"751 4.000960e+06 NaN NaN NaN NaN NaN NaN\n",
"752 3.000720e+06 NaN NaN NaN NaN NaN NaN\n",
"753 6.001400e+06 NaN NaN NaN NaN NaN NaN\n",
"754 4.000934e+06 NaN NaN NaN NaN NaN NaN\n",
"755 3.000700e+06 NaN NaN NaN NaN NaN NaN\n",
"\n",
"[756 rows x 7 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.calc_precursor_mz()\n",
"fasta_lib.calc_precursor_isotope()\n",
"fasta_lib.precursor_df[['precursor_mz']+[col for col in fasta_lib.precursor_df.columns if col.startswith('i_')]]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using `alphabase.spectral_library.base.SpecLibBase.calc_fragment_mz_df` to calculate fragment mz dataframe."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" b_z1 | \n",
" y_z1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 7.204439e+01 | \n",
" 714.429749 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2.191128e+02 | \n",
" 567.361328 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2.761343e+02 | \n",
" 510.339844 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4.131932e+02 | \n",
" 373.280945 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5.262772e+02 | \n",
" 260.196869 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 11911 | \n",
" 1.200205e+07 | \n",
" 751.420959 | \n",
"
\n",
" \n",
" | 11912 | \n",
" 1.200216e+07 | \n",
" 637.377991 | \n",
"
\n",
" \n",
" | 11913 | \n",
" 1.200240e+07 | \n",
" 400.230286 | \n",
"
\n",
" \n",
" | 11914 | \n",
" 1.200250e+07 | \n",
" 303.177521 | \n",
"
\n",
" \n",
" | 11915 | \n",
" 1.200262e+07 | \n",
" 175.118958 | \n",
"
\n",
" \n",
"
\n",
"
11916 rows × 2 columns
\n",
"
"
],
"text/plain": [
" b_z1 y_z1\n",
"0 7.204439e+01 714.429749\n",
"1 2.191128e+02 567.361328\n",
"2 2.761343e+02 510.339844\n",
"3 4.131932e+02 373.280945\n",
"4 5.262772e+02 260.196869\n",
"... ... ...\n",
"11911 1.200205e+07 751.420959\n",
"11912 1.200216e+07 637.377991\n",
"11913 1.200240e+07 400.230286\n",
"11914 1.200250e+07 303.177521\n",
"11915 1.200262e+07 175.118958\n",
"\n",
"[11916 rows x 2 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.calc_fragment_mz_df()\n",
"fasta_lib.fragment_mz_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`calc_fragment_mz_df()` also generate pointers `frag_start_idx` and `frag_stop_idx` in the precursor_df to locate fragments of each precursor. "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" frag_start_idx | \n",
" frag_stop_idx | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 6 | \n",
"
\n",
" \n",
" | 1 | \n",
" 6 | \n",
" 12 | \n",
"
\n",
" \n",
" | 2 | \n",
" 12 | \n",
" 18 | \n",
"
\n",
" \n",
" | 3 | \n",
" 18 | \n",
" 24 | \n",
"
\n",
" \n",
" | 4 | \n",
" 24 | \n",
" 30 | \n",
"
\n",
" \n",
" | ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" | 751 | \n",
" 11806 | \n",
" 11828 | \n",
"
\n",
" \n",
" | 752 | \n",
" 11828 | \n",
" 11850 | \n",
"
\n",
" \n",
" | 753 | \n",
" 11850 | \n",
" 11872 | \n",
"
\n",
" \n",
" | 754 | \n",
" 11872 | \n",
" 11894 | \n",
"
\n",
" \n",
" | 755 | \n",
" 11894 | \n",
" 11916 | \n",
"
\n",
" \n",
"
\n",
"
756 rows × 2 columns
\n",
"
"
],
"text/plain": [
" frag_start_idx frag_stop_idx\n",
"0 0 6\n",
"1 6 12\n",
"2 12 18\n",
"3 18 24\n",
"4 24 30\n",
".. ... ...\n",
"751 11806 11828\n",
"752 11828 11850\n",
"753 11850 11872\n",
"754 11872 11894\n",
"755 11894 11916\n",
"\n",
"[756 rows x 2 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fasta_lib.precursor_df[['frag_start_idx','frag_stop_idx']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that all fragment ions are stored from peptide's N-terminal to C-terminal, so the b-ions are in the ascending order (from b1 to bn) and y-ions are in the decending order (from yn to y1)."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" b_z1 | \n",
" y_z1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 6 | \n",
" 72.044388 | \n",
" 714.429749 | \n",
"
\n",
" \n",
" | 7 | \n",
" 219.112808 | \n",
" 567.361328 | \n",
"
\n",
" \n",
" | 8 | \n",
" 276.134277 | \n",
" 510.339844 | \n",
"
\n",
" \n",
" | 9 | \n",
" 413.193176 | \n",
" 373.280945 | \n",
"
\n",
" \n",
" | 10 | \n",
" 526.277222 | \n",
" 260.196869 | \n",
"
\n",
" \n",
" | 11 | \n",
" 639.361328 | \n",
" 147.112808 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" b_z1 y_z1\n",
"6 72.044388 714.429749\n",
"7 219.112808 567.361328\n",
"8 276.134277 510.339844\n",
"9 413.193176 373.280945\n",
"10 526.277222 260.196869\n",
"11 639.361328 147.112808"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"start, end = fasta_lib.precursor_df[['frag_start_idx','frag_stop_idx']].values[1]\n",
"fasta_lib.fragment_mz_df.iloc[start:end,:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save protein_df, precursor_df, fragment_mz_df, fragment_intensity_df into a hdf file."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# fasta_lib.save_hdf('path/to/hdf_file.hdf')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.3 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "8a3b27e141e49c996c9b863f8707e97aabd49c4a7e8445b9b783b34e4a21a9b2"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}