Source code for alphabase.pg_reader.maxquant_pg_reader

"""MaxQuant Protein Group Reader."""

from typing import Literal, Optional, Union

import pandas as pd

from .keys import PGCols
from .pg_reader import PGReaderBase, pg_reader_provider


[docs] class MaxQuantPGReader(PGReaderBase): r"""Reader for protein group matrices from the MaxQuant search engine. By default, the reader will read raw protein intensities from the protein group matrix. By passing a suitable regular expression, it is also possible to extract LFQ Examples -------- Get example data .. code-block:: python import os import tempfile from alphabase.tools.data_downloader import DataShareDownloader from alphabase.pg_reader import MaxQuantPGReader # Download to temporary directory URL = "https://datashare.biochem.mpg.de/s/KvToteOu0zzH17C" download_dir = tempfile.mkdtemp() download_path = DataShareDownloader(url=URL, output_dir=download_dir).download() Per default, the reader will return the raw intensities. Additional protein features are stored in the dataframe index, samples are stored as columns. .. code-block:: python # Get raw intensities reader = MaxQuantPGReader() results = reader.import_file(download_path) results.index.names > FrozenList(['proteins', 'uniprot_ids', 'genes', 'is_decoy']) results.columns > Index([...], dtype='object', length=312) You can get other intensity types by passing a specific pattern to the `measurment_regex` parameter during class initialization. To checkout all preconfigured regular expressions that enable you to retrieve different intensity modalities, use the `get_preconfigured_regex` method: .. code-block:: python MaxQuantPGReader.get_preconfigured_regex() > { 'raw': '^Intensity(?!\\s[LHM]\\s).+$', 'lfq': '^LFQ intensity(?!\\s[LHM]\\s).+$', 'ibaq': '^iBAQ(?!\\s[LHM]\\s).+$' } You can also pass a custom regular expression, e.g. to retrieve specific channels in TMT experiments .. code-block:: python # Match "Intensity H+ <sample>" reader = MaxQuantPGReader(measurement_regex="^Intensity H .+") References ---------- - MaxQuant Documentation (Cox Lab, 2024-06-27): https://cox-labs.github.io/coxdocs/output_tables.html#protein-groups, (last viewed 2025-08) """ _reader_type = "maxquant"
[docs] def __init__( self, *, column_mapping: Optional[dict[str, str]] = None, measurement_regex: Union[str, Literal["raw", "lfq", "ibaq"], None] = "raw", # noqa: PYI051 raw and lfq are special cases and not equivalent to string ): """Initialize MaxQuant protein group matrix reader. Parameters ---------- column_mapping Dictionary mapping alphabase column names (keys) to MaxQuant column names (values). If `None`, uses default mapping from configuration file. measurement_regex Pattern to select quantity columns - "raw" (default): Raw intensities - "lfq": LFQ-corrected intensities - "ibaq": Intensity-Based Absolute Quantification-corrected intensities - custom: Any valid regular expression See class documentation for usage examples and `get_preconfigured_regex()` for available patterns. """ super().__init__( column_mapping=column_mapping, measurement_regex=measurement_regex )
def _post_process(self, df: pd.DataFrame) -> pd.DataFrame: """Process MaxQuant protein group table after standardization. Convert MaxQuant-specific decoy indicator (+) to standardized boolean series. Notes ----- MaxQuant marks peptides/proteins that were found to be part of a protein derived from the reversed part of the decoy database with +. These should be removed for further data analysis. References ---------- https://cox-labs.github.io/coxdocs/output_tables.html#protein-groups (Status: 2025-08) """ # Convert `+` indicator to boolean if PGCols.DECOY_INDICATOR in df.columns: df[PGCols.DECOY_INDICATOR] = df[PGCols.DECOY_INDICATOR].apply( lambda x: x == "+" ) return df
pg_reader_provider.register_reader("maxquant", reader_class=MaxQuantPGReader)