Source code for alphabase.pg_reader.spectronaut_reader

"""Spectronaut Protein Group Reader."""

import re
from typing import Any, Literal, Optional, Union

import numpy as np
import pandas as pd

from .pg_reader import PGReaderBase, pg_reader_provider


[docs] class SpectronautPGReader(PGReaderBase): """Reader for pivot reports from the Spectronaut search engine. Examples -------- Get example data .. code-block:: python import os import tempfile from alphabase.tools.data_downloader import DataShareDownloader from alphabase.pg_reader import SpectronautPGReader # Download to temporary directory # Data provided by Pham et al, 2024 (https://doi.org/10.1093/bioinformatics/btae432), # originally generated by Bekker-Jensen et al, 2020 (https://doi.org/10.1038/s41467-020-14609-1) URL = "https://datashare.biochem.mpg.de/s/ot008eF6wwSISvk" download_dir = tempfile.mkdtemp() download_path = DataShareDownloader(url=URL, output_dir=download_dir).download() The reader will return the values that are stored in the spectronaut report. Note that the nature of these values depends on the selection made by the user during the data export in Spectronaut and might represent Protein Quantities or Peptide Quantities. .. code-block:: python reader = SpectronautPGReader() results = reader.import_file(download_path) results.index.names > FrozenList(['proteins', 'genes']) Spectronaut allows users to export a multitude of feature-level metadata. You can retrieve additional columns from the report by using the `add_column_mapping` method .. code-block:: python reader = SpectronautPGReader() reader.add_column_mapping({"organism": "PG.Organisms"}) results = reader.import_file(spectronaut_path) results.index.names > FrozenList(['proteins', 'genes', 'organism']) """ _reader_type: str = "spectronaut" _to_nan_values: tuple[Any] = ("Filtered",)
[docs] def __init__( self, *, column_mapping: Optional[dict[str, str]] = None, measurement_regex: Union[str, Literal["default"], None] = "default", # noqa: PYI051 default are special cases and not equivalent to string ): """Initialize Spectronaut protein group matrix reader. Parameters ---------- column_mapping Dictionary mapping alphabase column names (keys) to Spectronaut column names (values). If `None`, uses default mapping from configuration file. measurement_regex Pattern to select quantity columns - "default" (default): Intensities exported by the user in the spectronaut report. Might depend on the specific report. - custom: Any valid regular expression See class documentation for usage examples and `get_preconfigured_regex()` for available patterns. """ super().__init__( column_mapping=column_mapping, measurement_regex=measurement_regex )
def _post_process(self, df: pd.DataFrame) -> pd.DataFrame: """Process Spectronaut protein group table after standardization. Notes ----- Spectronaut reports might contain "Filtered" as values. Replace these values with NAN and assure that floating point values are returned """ # Only modify the intensity columns, as defined by the `measurement_regex` pattern = re.compile(self.measurement_regex) regex_columns = [col for col in df.columns if re.search(pattern, col)] df[regex_columns] = df[regex_columns].replace(self._to_nan_values, np.nan) df[regex_columns] = df[regex_columns].astype(float) return df
pg_reader_provider.register_reader("spectronaut", reader_class=SpectronautPGReader)