Source code for alphabase.psm_reader.dia_psm_reader

"""Readers for Spectronaut's output library and reports, Swath data and DIANN data."""

from typing import List, Optional

import numpy as np
import pandas as pd

from alphabase.psm_reader.keys import PsmDfCols
from alphabase.psm_reader.maxquant_reader import ModifiedSequenceReader
from alphabase.psm_reader.psm_reader import psm_reader_provider


[docs] class SpectronautReader(ModifiedSequenceReader): """Reader for Spectronaut's output library TSV/CSV.""" _reader_type = "spectronaut" _add_unimod_to_mod_mapping = True _min_max_rt_norm = True def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: """Spectronaut-specific preprocessing of output data.""" if "ReferenceRun" in df.columns: df.drop_duplicates( ["ReferenceRun", self.mod_seq_column, "PrecursorCharge"], inplace=True ) else: df.drop_duplicates([self.mod_seq_column, "PrecursorCharge"], inplace=True) df.reset_index(drop=True, inplace=True) return df
[docs] class SwathReader(SpectronautReader): """Reader for SWATH or OpenSWATH library TSV/CSV.""" _reader_type = "spectronaut" # no typo _add_unimod_to_mod_mapping = True
[docs] class DiannReader(ModifiedSequenceReader): """Reader for DIANN data.""" _reader_type = "diann" _add_unimod_to_mod_mapping = True _min_max_rt_norm = False
[docs] def __init__( # noqa: PLR0913, D417 # too many arguments in function definition, missing argument descriptions self, *, column_mapping: Optional[dict] = None, modification_mapping: Optional[dict] = None, mod_seq_columns: Optional[List[str]] = None, fdr: float = 0.01, keep_decoy: bool = False, rt_unit: Optional[str] = None, # DIANN reader-specific filter_first_search_fdr: bool = False, filter_second_search_fdr: bool = False, **kwargs, ): """Reader for DIANN data. See documentation of `PSMReaderBase` for more information. Parameters ---------- filter_first_search_fdr : bool, optional If True, the FDR filtering will be done also to the first search columns (fdr1_search1 and fdr2_search1) filter_second_search_fdr : bool, optional If True, the FDR filtering will be done also to the second columns (fdr1_search2 and fdr2_search2) See documentation of `PSMReaderBase` for the rest of parameters. """ super().__init__( column_mapping=column_mapping, modification_mapping=modification_mapping, mod_seq_columns=mod_seq_columns, fdr=fdr, keep_decoy=keep_decoy, rt_unit=rt_unit, **kwargs, ) self._filter_first_search_fdr = filter_first_search_fdr self._filter_second_search_fdr = filter_second_search_fdr
def _post_process(self, origin_df: pd.DataFrame) -> None: self._psm_df.rename( columns={PsmDfCols.SPEC_IDX: PsmDfCols.DIANN_SPEC_INDEX}, inplace=True ) super()._post_process(origin_df) def _filter_fdr(self) -> None: """Filter PSMs based on additional FDR columns if requested. If a column is not present in the dataframe, it is ignored. """ super()._filter_fdr() extra_fdr_columns = [] if self._filter_first_search_fdr: extra_fdr_columns += [PsmDfCols.FDR1_SEARCH1, PsmDfCols.FDR2_SEARCH1] if self._filter_second_search_fdr: extra_fdr_columns += [PsmDfCols.FDR1_SEARCH2, PsmDfCols.FDR2_SEARCH2] mask = np.ones(len(self._psm_df), dtype=bool) for col in extra_fdr_columns: if col in self._psm_df.columns: mask &= self._psm_df[col] <= self._fdr_threshold if not all(mask): self._psm_df = self._psm_df[mask]
[docs] class SpectronautReportReader(ModifiedSequenceReader): """Reader for Spectronaut's report TSV/CSV.""" _reader_type = "spectronaut_report" _add_unimod_to_mod_mapping = True _min_max_rt_norm = False def _pre_process(self, df: pd.DataFrame) -> pd.DataFrame: """Spectronaut report-specific preprocessing of output data.""" df[[self.mod_seq_column, PsmDfCols.CHARGE]] = df[ self._precursor_id_column ].str.split(".", expand=True, n=2) df[PsmDfCols.CHARGE] = df[PsmDfCols.CHARGE].astype(np.int8) return df
[docs] def register_readers() -> None: """Register readers for Spectronaut's output library and reports, Swath data and DIANN data.""" psm_reader_provider.register_reader("spectronaut", SpectronautReader) psm_reader_provider.register_reader("speclib_tsv", SpectronautReader) psm_reader_provider.register_reader("openswath", SwathReader) psm_reader_provider.register_reader("swath", SwathReader) psm_reader_provider.register_reader("diann", DiannReader) psm_reader_provider.register_reader("spectronaut_report", SpectronautReportReader)