Source code for alphabase.protein.protein_level_decoy

import pandas as pd

from alphabase.protein.fasta import SpecLibFasta
from alphabase.spectral_library.decoy import SpecLibDecoy


[docs] class ProteinReverseDecoy(SpecLibDecoy):
[docs] def __init__(self, target_lib: SpecLibFasta): self.target_lib = target_lib self._precursor_df: pd.DataFrame = pd.DataFrame() self.protein_df = pd.DataFrame() self.decoy_tag = "REV_"
def _add_tag_to_a_column_in_protein_df(self, column: str): if column in self.protein_df.columns: self.protein_df[column] = self.decoy_tag + self.protein_df[column] def _make_empty_loc_for_target_protein_df(self): self.protein_df = pd.concat( [ pd.DataFrame({"sequence": [""] * len(self.target_lib.protein_df)}), self.protein_df, ], ignore_index=True, ).fillna("") def _decoy_protein_df(self): self.protein_df = self.target_lib.protein_df.copy() self.protein_df["sequence"] = self.protein_df.sequence.str[::-1] self._add_tag_to_a_column_in_protein_df("protein_id") self._add_tag_to_a_column_in_protein_df("full_name") self._add_tag_to_a_column_in_protein_df("gene_name") self._make_empty_loc_for_target_protein_df() def _generate_decoy_sequences(self): _target_prot_df = self.target_lib.protein_df _target_pep_df = self.target_lib.precursor_df self.target_lib.get_peptides_from_protein_df(self.protein_df) self._precursor_df = self.target_lib.precursor_df self.target_lib.protein_df = _target_prot_df self.target_lib._precursor_df = _target_pep_df
[docs] def decoy_sequence(self): if ( not hasattr(self.target_lib, "protein_df") or len(self.target_lib.protein_df) == 0 ): return self._decoy_protein_df() self._generate_decoy_sequences() self._remove_target_seqs()
[docs] def append_to_target_lib(self): if ( not hasattr(self.target_lib, "protein_df") or len(self.target_lib.protein_df) == 0 ): return super().append_to_target_lib() self._append_protein_df_to_target_lib()
def _append_protein_df_to_target_lib(self): self.protein_df["decoy"] = 1 self.target_lib.protein_df["decoy"] = 0 self.target_lib.protein_df = pd.concat( [ self.target_lib.protein_df, self.protein_df.loc[len(self.target_lib.protein_df) :], ] )
# remove "protein_reverse" decoy due to conflicting with DecoyGenerator, # as DecoyGenerator only works for peptide-level, but ProteinReverseDecoy # is protein-level. # def register_decoy(): # decoy_lib_provider.register("protein_reverse", ProteinReverseDecoy)