Source code for alphabase.pg_reader.spectronaut_reader
"""Spectronaut Protein Group Reader."""
import re
from typing import Any, Literal, Optional, Union
import numpy as np
import pandas as pd
from .pg_reader import PGReaderBase, pg_reader_provider
[docs]
class SpectronautPGReader(PGReaderBase):
"""Reader for pivot reports from the Spectronaut search engine.
Examples
--------
Get example data
.. code-block:: python
import os
import tempfile
from alphabase.tools.data_downloader import DataShareDownloader
from alphabase.pg_reader import SpectronautPGReader
# Download to temporary directory
# Data provided by Pham et al, 2024 (https://doi.org/10.1093/bioinformatics/btae432),
# originally generated by Bekker-Jensen et al, 2020 (https://doi.org/10.1038/s41467-020-14609-1)
URL = "https://datashare.biochem.mpg.de/s/ot008eF6wwSISvk"
download_dir = tempfile.mkdtemp()
download_path = DataShareDownloader(url=URL, output_dir=download_dir).download()
The reader will return the values that are stored in the spectronaut report. Note that the nature of
these values depends on the selection made by the user during the data export in Spectronaut and might
represent Protein Quantities or Peptide Quantities.
.. code-block:: python
reader = SpectronautPGReader()
results = reader.import_file(download_path)
results.index.names
> FrozenList(['proteins', 'genes'])
Spectronaut allows users to export a multitude of feature-level metadata. You can retrieve additional
columns from the report by using the `add_column_mapping` method
.. code-block:: python
reader = SpectronautPGReader()
reader.add_column_mapping({"organism": "PG.Organisms"})
results = reader.import_file(spectronaut_path)
results.index.names
> FrozenList(['proteins', 'genes', 'organism'])
"""
_reader_type: str = "spectronaut"
_to_nan_values: tuple[Any] = ("Filtered",)
[docs]
def __init__(
self,
*,
column_mapping: Optional[dict[str, str]] = None,
measurement_regex: Union[str, Literal["default"], None] = "default", # noqa: PYI051 default are special cases and not equivalent to string
):
"""Initialize Spectronaut protein group matrix reader.
Parameters
----------
column_mapping
Dictionary mapping alphabase column names (keys) to Spectronaut column names (values).
If `None`, uses default mapping from configuration file.
measurement_regex
Pattern to select quantity columns
- "default" (default): Intensities exported by the user in the spectronaut report. Might depend on the specific report.
- custom: Any valid regular expression
See class documentation for usage examples and `get_preconfigured_regex()` for available patterns.
"""
super().__init__(
column_mapping=column_mapping, measurement_regex=measurement_regex
)
def _post_process(self, df: pd.DataFrame) -> pd.DataFrame:
"""Process Spectronaut protein group table after standardization.
Notes
-----
Spectronaut reports might contain "Filtered" as values. Replace these values with NAN
and assure that floating point values are returned
"""
# Only modify the intensity columns, as defined by the `measurement_regex`
pattern = re.compile(self.measurement_regex)
regex_columns = [col for col in df.columns if re.search(pattern, col)]
df[regex_columns] = df[regex_columns].replace(self._to_nan_values, np.nan)
df[regex_columns] = df[regex_columns].astype(float)
return df
pg_reader_provider.register_reader("spectronaut", reader_class=SpectronautPGReader)