PSM readers#

[1]:
%reload_ext autoreload
%autoreload 2
[2]:
from alphabase.io.psm_reader import psm_reader_provider

alphabase.io.psm_reader.psm_reader_provider has registered some basic reader classes for the following search engine results:

[3]:
psm_reader_provider.reader_dict
[3]:
{'alphapept': alphabase.psm_reader.alphapept_reader.AlphaPeptReader,
 'maxquant': alphabase.psm_reader.maxquant_reader.MaxQuantReader,
 'spectronaut': alphabase.psm_reader.dia_psm_reader.SpectronautReader,
 'openswath': alphabase.psm_reader.dia_psm_reader.SwathReader,
 'swath': alphabase.psm_reader.dia_psm_reader.SwathReader,
 'diann': alphabase.psm_reader.dia_psm_reader.DiannReader,
 'spectronaut_report': alphabase.psm_reader.dia_psm_reader.SpectronautReportReader,
 'pfind': alphabase.psm_reader.pfind_reader.pFindReader,
 'msfragger_psm_tsv': alphabase.psm_reader.msfragger_reader.MSFragger_PSM_TSV_Reader,
 'msfragger': alphabase.psm_reader.msfragger_reader.MSFragger_PSM_TSV_Reader,
 'msfragger_pepxml': alphabase.psm_reader.msfragger_reader.MSFraggerPepXML}

We can then use psm_reader_provider.get_reader to get a reader and then import_file.

MaxQaunt msms.txt:

[4]:
import io
import numpy as np
msms_txt = io.StringIO('''Raw file  Scan number     Scan index      Sequence        Length  Missed cleavages        Modifications   Modified sequence       Oxidation (M) Probabilities     Oxidation (M) Score diffs       Acetyl (Protein N-term) Oxidation (M)   Proteins        Charge  Fragmentation   Mass analyzer   Type    Scan event number       Isotope index   m/z     Mass    Mass error [ppm]        Mass error [Da] Simple mass error [ppm] Retention time  PEP     Score   Delta score     Score diff      Localization prob       Combinatorics   PIF     Fraction of total spectrum      Base peak fraction      Precursor full scan number      Precursor Intensity     Precursor apex fraction Precursor apex offset   Precursor apex offset time      Matches Intensities     Mass deviations [Da]    Mass deviations [ppm]   Masses  Number of matches       Intensity coverage      Peak coverage   Neutral loss level      ETD identification type Reverse All scores      All sequences   All modified sequences  Reporter PIF    Reporter fraction       id      Protein group IDs       Peptide ID      Mod. peptide ID Evidence ID     Oxidation (M) site IDs
20190402_QX1_SeVW_MA_HeLa_500ng_LC11        81358   73979   AAAAAAAAAPAAAATAPTTAATTAATAAQ   29      0       Unmodified      _(Acetyl (Protein N-term))AAAAAAAAM(Oxidation (M))PAAAATAPTTAATTAATAAQ_                 0       0       sp|P37108|SRP14_HUMAN   3       HCD     FTMS    MULTI-MSMS      13      1       790.07495       2367.203        0.35311 0.00027898      -0.061634807    70.261  0.012774        41.423  36.666  NaN     NaN     1       0       0       0       81345   10653955        0.0338597821787898      -11     0.139877319335938       y1;y2;y3;y4;y11;y1-NH3;y2-NH3;a2;b2;b3;b4;b5;b6;b7;b8;b9;b11;b12;b6(2+);b8(2+);b13(2+);b18(2+)  2000000;2000000;300000;400000;200000;1000000;400000;300000;600000;1000000;2000000;3000000;3000000;3000000;3000000;2000000;600000;500000;1000000;2000000;300000;200000   5.2861228709844E-06;-6.86980268369553E-05;-0.00238178789771837;0.000624715964988809;-0.0145624692099773;-0.000143471782706683;-0.000609501446461991;-0.000524972720768346;0.00010190530804266;5.8620815195809E-05;0.000229901232955854;-0.000108750048696038;-0.000229593152369034;0.00183148682538103;0.00276641182404092;0.000193118923334623;0.00200988580445483;0.000102216846016745;5.86208151389656E-05;0.000229901232955854;-0.00104559184393338;0.00525030008475369     0.0359413365445091;-0.314964433555295;-8.23711898839045;1.60102421155213;-14.8975999917227;-1.10320467763838;-3.03102462870716;-4.56152475051625;0.712219104095465;0.273777366204575;0.806231096969562;-0.305312183824154;-0.537399178230218;3.67572664689217;4.85930954169285;0.301587577451224;2.48616190909398;0.116225745519871;0.273777365939099;0.806231096969562;-2.19774169175011;7.53961026980589      147.076413378177;218.113601150127;289.153028027798;390.197699998035;977.50437775671;130.050013034583;201.087592852046;115.087114392821;143.081402136892;214.118559209185;285.155501716567;356.192954155649;427.230188786552;498.265241494374;569.301420357176;640.341107437877;808.429168310795;879.468189767554;214.118559209185;285.155501716567;475.757386711244;696.362265007215    22      0.262893575628735       0.0826446280991736      None    Unknown         41.4230894199432;4.75668724862449;3.9515580701967       AAAAAAAAAPAAAATAPTTAATTAATAAQ;FHRGPPDKDDMVSVTQILQGK;PVTLWITVTHMQADEVSVWR        _AAAAAAAAAPAAAATAPTTAATTAATAAQ_;_FHRGPPDKDDMVSVTQILQGK_;_PVTLWITVTHMQADEVSVWR_                  0       1443    0       0       0
20190402_QX1_SeVW_MA_HeLa_500ng_LC11        81391   74010   AAAAAAAAAAPAAAATAPTTAATTAATAAQ  29      0       Unmodified      _AAAAAAAAAPAAAATAPTTAATTAATAAQ_                 0       0       sp|P37108|SRP14_HUMAN   2       HCD     FTMS    MULTI-MSMS      14      0       1184.6088       2367.203        0.037108        4.3959E-05      1.7026696       70.287  7.1474E-09      118.21  100.52  NaN     NaN     1       0       0       0       81377   9347701 0.166790347889974       -10     0.12664794921875        y1;y2;y3;y4;y5;y9;y12;y13;y14;y20;y13-H2O;y20-H2O;y1-NH3;y20-NH3;b3;b4;b5;b6;b7;b8;b9;b11;b12;b13;b14;b15;b16;b19;b15-H2O;b16-H2O       500000;600000;200000;400000;200000;100000;200000;1000000;200000;300000;200000;100000;100000;70000;300000;900000;2000000;3000000;5000000;8000000;6000000;600000;800000;600000;200000;300000;200000;300000;300000;1000000 -0.000194444760495571;0.000149986878682284;0.000774202587820128;-0.0002445094036716;0.000374520568641401;-0.00694293246522193;-0.0109837291331587;-0.0037745820627606;-0.000945546471939451;0.00152326440706929;0.00506054832726477;0.00996886361417637;6.25847393393997E-05;-0.024881067836759;-3.11821549132674E-05;-0.000183099230639527;0.000161332473453513;0.000265434980121881;0.000747070697229901;0.000975534518261156;0.00101513939785036;0.00651913000274362;0.0058584595163893;0.00579536744021425;0.00131097834105276;-0.0131378531671089;0.00472955218901916;-0.00161006322559842;-0.00201443239325272;0.0227149399370319 -1.32206444236914;0.687655553213019;2.6775131607882;-0.626628140021726;0.811995006209331;-8.6203492854282;-10.1838066275079;-3.21078702288986;-0.758483069159249;0.881072738747222;4.37168212373889;5.82682888353564;0.481236695337485;-14.5343986203644;-0.145630261806375;-0.642102166533079;0.452935954800214;0.621293379181583;1.49934012872483;1.71355878380837;1.58531240493271;8.06399202403175;6.6614096214532;6.09718023739784;1.28333378040908;-11.7030234519348;3.96235146626144;-1.07856912288932;-1.82370619437775;19.3220953109188        147.07661310906;218.113382465221;289.149872037312;390.198569223404;461.235063981231;805.411965958065;1078.54847749073;1175.59403219566;1246.62831694787;1728.87474561429;1157.57463237897;1710.85573532879;130.049806978061;1711.87460084504;214.118649012155;285.155914717031;356.192684073126;427.22969375842;498.266325910503;569.303211234482;640.340285417402;808.424659066597;879.462433524883;950.49961040476;1021.54120858166;1122.60333588727;1193.62258226971;1492.77704268533;1104.58164778019;1175.59403219566      30      0.474003002083763       0.167630057803468       None    Unknown         118.209976573419;17.6937689289157;17.2534171481793      AAAAAAAAAPAAAATAPTTAATTAATAAQ;SELKQEAMQSEQLQSVLYLK;VGSSVPSKASELVVMGDHDAARR      _AAAAAAAAAPAAAATAPTTAATTAATAAQ_;_SELKQEAM(Oxidation (M))QSEQLQSVLYLK_;_VGSSVPSKASELVVMGDHDAARR_                 1       1443    0       0       1
20190402_QX1_SeVW_MA_HeLa_500ng_LC11        107307  98306   AAAAAAAGDSDSWDADAFSVEDPVRK      26      1       Acetyl (Protein N-term) _(Acetyl (Protein N-term))AAAAAAAGDSDSWDADAFSVEDPVRK_                   1       0       sp|O75822|EIF3J_HUMAN   3       HCD     FTMS    MULTI-MSMS      10      2       879.06841       2634.1834       -0.93926        -0.00082567     -3.2012471      90.978  2.1945E-12      148.95  141.24  NaN     NaN     1       0       0       0       107297  10193939        0.267970762043589       -8      0.10211181640625        y1;y2;y4;y5;y6;y7;y8;y9;y10;y11;y12;y13;y14;y15;y17;y18;y19;y20;y21;y23;y21-H2O;y1-NH3;y19-NH3;y14(2+);y16(2+);y22(2+);a2;b2;b3;b4;b5;b6;b7     300000;200000;3000000;600000;1000000;500000;2000000;1000000;1000000;1000000;90000;1000000;400000;900000;1000000;400000;3000000;2000000;1000000;400000;100000;200000;200000;80000;100000;200000;200000;2000000;5000000;5000000;5000000;2000000;300000    1.34859050149316E-07;-6.05140996867704E-06;2.27812602133781E-05;0.00128986659160546;-0.00934536073077652;0.000941953783126337;-0.00160424237344614;-0.00239257341399934;-0.00111053968612396;-0.00331340710044969;0.00330702864630439;0.000963683996815234;0.00596290290945944;-0.00662057038289277;-0.0117122701335575;0.00777853472800416;0.0021841542961738;0.000144322111736983;-0.00087403893667215;0.0197121595674616;-0.021204007680808;-0.000308954599830713;-0.026636719419912;-0.0137790992353075;0.00596067266928912;-0.0077053835773313;9.11402199221811E-06;-0.000142539300128419;-0.000251999832926231;1.90791054137662E-05;-0.00236430185879044;-9.54583337602344E-05;-0.000556959493223985      0.000916705048437201;-0.0199575598103408;0.0456231928690862;2.09952637717462;-12.5708704058425;1.11808305811426;-1.72590731777249;-2.22239181008062;-0.967696370445928;-2.62418809422166;2.47964286628144;0.665205752892023;3.64753748704453;-3.84510115530963;-6.08782672045773;3.81508105974837;1.04209904973991;0.0666012719936656;-0.390545453668809;8.28224925531311;-9.55133250134922;-2.37499239179248;-12.8127653858411;-16.846761946123;6.48662354975264;-6.67117082062383;0.0580151981289049;-0.770098855873447;-0.983876895688683;0.0583162347158579;-5.93738717724506;-0.203431522818505;-1.03087538746314  147.112804035741;303.21392125011;499.33507018564;614.360746132308;743.413974455831;842.472101057517;929.506675663573;1076.57587791081;1147.61170966489;1262.6408555643;1333.67134891635;1448.700635293;1634.77494902759;1721.81956091078;1923.88362405243;2038.89107627957;2095.9181343836;2166.95728800359;2237.99542015244;2380.04906152953;2220.00518543488;130.0865640237;2078.92040615582;817.907873297785;918.917619246831;1155.02717356753;157.097144992378;185.0922112678;256.129434516133;327.166277224995;398.205774393759;469.240619338034;540.278194626993  33      0.574496146107112       0.14410480349345        None    Unknown         148.951235201399;7.71201258444522;7.36039532447559      AAAAAAAGDSDSWDADAFSVEDPVRK;PSRQESELMWQWVDQRSDGER;HTLTSFWNFKAGCEEKCYSNR  _(Acetyl (Protein N-term))AAAAAAAGDSDSWDADAFSVEDPVRK_;_PSRQESELM(Oxidation (M))WQWVDQRSDGER_;_HTLTSFWNFKAGCEEKCYSNR_                    2       625     1       1       2       '''
)

mq_reader = psm_reader_provider.get_reader('maxquant')
mq_reader.import_file(msms_txt)
[4]:
sequence charge rt scan_num raw_name precursor_mz score proteins decoy spec_idx mods mod_sites nAA rt_norm
0 AAAAAAAGDSDSWDADAFSVEDPVRK 3 90.978 107307 20190402_QX1_SeVW_MA_HeLa_500ng_LC11 879.06841 148.950 sp|O75822|EIF3J_HUMAN 0 107306 Acetyl@Protein N-term 0 26 1.000000
1 AAAAAAAAAPAAAATAPTTAATTAATAAQ 3 70.261 81358 20190402_QX1_SeVW_MA_HeLa_500ng_LC11 790.07495 41.423 sp|P37108|SRP14_HUMAN 0 81357 Acetyl@Protein N-term;Oxidation@M 0;9 29 0.772286
2 AAAAAAAAAAPAAAATAPTTAATTAATAAQ 2 70.287 81391 20190402_QX1_SeVW_MA_HeLa_500ng_LC11 1184.60880 118.210 sp|P37108|SRP14_HUMAN 0 81390 30 0.772571

pFind pfind.spectra file at 1% FDR by default:

[5]:
from io import StringIO
txt = StringIO("""File_Name Scan_No Exp.MH+ Charge  Q-value Sequence        Calc.MH+        Mass_Shift(Exp.-Calc.)  Raw_Score       Final_Score     Modification    Specificity     Proteins        Positions       Label   Target/Decoy    Miss.Clv.Sites  Avg.Frag.Mass.Shift     Others
Ecoli-1to1to1-un-C13-N15-10mM-20150823.30507.30507.2.0.dta  30507   2074.030369     2       0       AMIEAGAAAVHFEDQLASVK    2074.027271     0.003098        35.299588       5.15726e-013    2,Oxidation[M]; 3       gi|16131841|ref|NP_418439.1|/   173,K,K/        1|0|    target  0       0.948977        131070  0       0       0       262143  0       0       0       32
Ecoli-1to1to1-un-C13-N15-150mM-20150823.41501.41501.3.0.dta 41501   2712.197421     3       0       EGDNYVVLSDILGDEDHLGDMDFK        2712.198013     -0.000592       27.073978       9.82619e-010    21,Unknown[M];  3       gi|145698316|ref|NP_417633.4|/  470,K,V/        1|0|    target  0       0.814438        65596   0       0       0       4194288 0       0       0       36
XXX.25802.25802.4.0.dta     25802   2388.339186     4       0.0032066       SVFLIKGDKVWVYPPEKKEK    2388.332468     0.006718        17.822784       0.100787        21,Didehydro[AnyC-termK];       0       sp|P02790|HEMO_HUMAN/   106,N,G/        1|0|    target  0       0.704714        36
""")
psm_reader_provider.get_reader('pfind').import_file(txt)
/Users/zengwenfeng/opt/anaconda3/lib/python3.8/site-packages/pandas/util/_decorators.py:311: ParserWarning: Length of header or names does not match length of data. This leads to a loss of data with index_col=False.
  return func(*args, **kwargs)
[5]:
sequence charge raw_name query_id scan_num score proteins uniprot_ids fdr decoy spec_idx mods mod_sites nAA precursor_mz
0 AMIEAGAAAVHFEDQLASVK 2 Ecoli-1to1to1-un-C13-N15-10mM-20150823 Ecoli-1to1to1-un-C13-N15-10mM-20150823.30507.3... 30507 28.293201 gi|16131841|ref|NP_418439.1| gi|16131841|ref|NP_418439.1| 0.000000 0 30506 Oxidation@M 2 20 1037.517322
1 SVFLIKGDKVWVYPPEKKEK 4 XXX XXX.25802.25802.4.0.dta 25802 2.294746 sp|P02790|HEMO_HUMAN sp|P02790|HEMO_HUMAN 0.003207 0 25801 Didehydro@K^Any C-term -1 20 597.838602

Diann TSV report:

[6]:
from io import StringIO
tsv = StringIO('''File.Name Run     Protein.Group   Protein.Ids     Protein.Names   Genes   PG.Quantity     PG.Normalised   PG.MaxLFQ       Genes.Quantity  Genes.Normalised        Genes.MaxLFQ    Genes.MaxLFQ.Unique     Modified.Sequence       Stripped.Sequence       Precursor.Id    Precursor.Charge        Q.Value Global.Q.Value  Protein.Q.Value PG.Q.Value      Global.PG.Q.Value       GG.Q.Value      Translated.Q.Value      Proteotypic     Precursor.Quantity      Precursor.Normalised    Precursor.Translated    Quantity.Quality        RT      RT.Start        RT.Stop iRT     Predicted.RT    Predicted.iRT   Lib.Q.Value     Ms1.Profile.Corr        Ms1.Area        Evidence        Spectrum.Similarity     Mass.Evidence   CScore  Decoy.Evidence  Decoy.CScore    Fragment.Quant.Raw      Fragment.Quant.Corrected        Fragment.Correlations   MS2.Scan        IM      iIM     Predicted.IM    Predicted.iIM
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636.d  20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636       Q9UH36  Q9UH36          SRRD    3296.49 3428.89 3428.89 3296.49 3428.89 3428.89 3428.89 (UniMod:1)AAAAAAALESWQAAAPR     AAAAAAALESWQAAAPR       (UniMod:1)AAAAAAALESWQAAAPR2    2       3.99074e-05     1.96448e-05     0.000159821     0.000159821     0.000146135     0.000161212     0       1       3296.49 3428.89 3296.49 0.852479        19.9208 19.8731 19.9685 123.9   19.8266 128.292 0       0.960106        5308.05 1.96902 0.683134        0.362287        0.999997        1.23691 3.43242e-05     1212.01;2178.03;1390.01;1020.01;714.008;778.008;        1212.01;1351.73;887.591;432.92;216.728;732.751; 0.956668;0.757581;0.670497;0.592489;0.47072;0.855203;   30053   1.19708 1.19328 1.19453 1.19469
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642.d  20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A8_1_22642       Q9UH36  Q9UH36          SRRD    2365    2334.05 2334.05 2365    2334.05 2334.05 2334.05 (UniMod:1)AAAAAAALESWQAAAPR     AAAAAAALESWQAAAPR       (UniMod:1)AAAAAAALESWQAAAPR2    2       0.000184434     1.96448e-05     0.000596659     0.000596659     0.000146135     0.000604961     0       1       2365    2334.05 2365    0.922581        19.905  19.8573 19.9527 123.9   19.782  128.535 0       0.940191        4594.04 1.31068 0.758988        0       0.995505        0.28633 2.12584e-06     1209.02;1210.02;1414.02;1051.01;236.003;130.002;        1209.02;1109.89;732.154;735.384;0;46.0967;      0.919244;0.937624;0.436748;0.639369;0.296736;0.647924;  30029   1.195   1.19328 1.19381 1.19339
F:\XXX\20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648.d  20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648       Q9UH36  Q9UH36          SRRD    1664.51 1635.46 1635.47 1664.51 1635.46 1635.47 1635.47 (UniMod:1)AAAAAAALESWQAAAPR     AAAAAAALESWQAAAPR       (UniMod:1)AAAAAAALESWQAAAPR2    2       0.000185123     1.96448e-05     0.000307409     0.000307409     0.000146135     0.000311332     0       1       1664.51 1635.46 1664.51 0.811147        19.8893 19.8416 19.937  123.9   19.7567 128.896 0       0.458773        6614.06 1.7503  0.491071        0.00111683      0.997286        1.92753 2.80543e-05     744.01;1708.02;1630.02;1475.02;0;533.006;       322.907;808.594;577.15;536.033;0;533.006;       0.760181;0.764072;0.542005;0.415779;0;0.913438; 30005   1.19409 1.19328 1.19323 1.19308
''')
psm_reader_provider.get_reader('diann').import_file(tsv)
[6]:
raw_name sequence charge rt mobility proteins uniprot_ids genes scan_num score fdr spec_idx mods mod_sites nAA rt_norm precursor_mz ccs
0 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp... AAAAAAALESWQAAAPR 2 19.9208 1.19708 NaN Q9UH36 SRRD 30053 0.999997 0.000040 30052 Acetyl@Any N-term 0 17 1.0 834.428635 483.435307
1 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp... AAAAAAALESWQAAAPR 2 19.9050 1.19500 NaN Q9UH36 SRRD 30029 0.995505 0.000184 30028 Acetyl@Any N-term 0 17 1.0 834.428635 482.595308
2 20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_sp... AAAAAAALESWQAAAPR 2 19.8893 1.19409 NaN Q9UH36 SRRD 30005 0.997286 0.000185 30004 Acetyl@Any N-term 0 17 1.0 834.428635 482.227809

Note that, to support both Bruker and Thermo data, we did not use Scan Number in the output dataframe but spec_idx (starts with 0). spec_idx = scan_num - 1 in thermo data.

We can also read spectronaut/openswath’s TSV library with out fragment information.

Spectronaut’s output library:

[7]:
from io import StringIO
tsv = StringIO('''ReferenceRun      PrecursorCharge Workflow        IntModifiedPeptide      CV      AllowForNormalization   ModifiedPeptide StrippedPeptide iRT     IonMobility     iRTSourceSpecific       BGSInferenceId  IsProteotypic   IntLabeledPeptide       LabeledPeptide  PrecursorMz     ReferenceRunQvalue      ReferenceRunMS1Response FragmentLossType        FragmentNumber  FragmentType    FragmentCharge  FragmentMz      RelativeIntensity       ExcludeFromAssay        Database        ProteinGroups   UniProtIds      Protein Name    ProteinDescription      Organisms       OrganismId      Genes   Protein Existence       Sequence Version        FASTAName
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843    2               _ALVAT[+80]PGK_         True    _ALVAT[Phospho (STY)]PGK_       ALVATPGK        -5.032703       0.758   -5.032703       P19338  False   _ALVAT[+80]PGK_ _ALVAT[Phospho (STY)]PGK_       418.717511324722        0       10352   noloss  3       y       1       301.187031733932        53.1991 False   sp      P19338  P19338  NUCL_HUMAN      Nucleolin       Homo sapiens            NCL     1       3       MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843    2               _ALVAT[+80]PGK_         True    _ALVAT[Phospho (STY)]PGK_       ALVATPGK        -5.032703       0.758   -5.032703       P19338  False   _ALVAT[+80]PGK_ _ALVAT[Phospho (STY)]PGK_       418.717511324722        0       10352   H3PO4   4       y       1       384.224142529733        26.31595        False   sp      P19338  P19338  NUCL_HUMAN      Nucleolin       Homo sapiens            NCL     1       3       MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843    2               _TLT[+80]PCPLR_         True    _TLT[Phospho (STY)]PC[Carbamidomethyl (C)]PLR_  TLTPCPLR        27.71659        0.818   27.71659        Q5T200  False   _TLT[+80]PPLR_  _TLT[Phospho (STY)]PPLR_        439.230785875227        0.000138389150379226    23117   noloss  3       b       1       396.153027901512        6.3264  False   sp      Q5T200  Q5T200  ZC3HD_HUMAN     Zinc finger CCCH domain-containing protein 13   Homo sapiens            ZC3H13  1       1       MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_100ug_test_S4-A1_1_25843    2               _TLT[+80]PCPLR_         True    _TLT[Phospho (STY)]PC[Carbamidomethyl (C)]PLR_  TLTPCPLR        27.71659        0.818   27.71659        Q5T200  False   _TLT[+80]PPLR_  _TLT[Phospho (STY)]PPLR_        439.230785875227        0.000138389150379226    23117   noloss  3       y       1       385.255780000092        29.70625        False   sp      Q5T200  Q5T200  ZC3HD_HUMAN     Zinc finger CCCH domain-containing protein 13   Homo sapiens            ZC3H13  1       1       MCT_human_UP000005640_9606
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_library25_S4-C1_1_25867     2               _LFVT[+80]PPEGSSR_              True    _[Acetyl (Protein N-term)]LFVS[Phospho (STY)]PPEGSSR_   LFVSPPEGSSR     38.05031        0.917   38.05031        Q14244;Q14244-6;Q14244-7        False   _LFVT[+80]PPEGSSR_      _LFVT[Phospho (STY)]PPEGSSR_    635.297385373987        0       14164   H3PO4   4       b       1       443.265279065723        12.24525        False   sp      Q14244;Q14244-6;Q14244-7        Q14244;Q14244-6;Q14244-7        MAP7_HUMAN      Ensconsin;Isoform of Q14244, Isoform 6 of Ensconsin;Isoform of Q14244, Isoform 7 of Ensconsin   Homo sapiens            MAP7    1;;     1;;     MCT_human_UP000005640_9606;MCT_human2_UP000005640_9606_additional;MCT_human2_UP000005640_9606_additional
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_library25_S4-C1_1_25867     2               _LFVT[+80]PPEGSSR_              True    _[Acetyl (Protein N-term)]LFVS[Phospho (STY)]PPEGSSR_   LFVSPPEGSSR     38.05031        0.917   38.05031        Q14244;Q14244-6;Q14244-7        False   _LFVT[+80]PPEGSSR_      _LFVT[Phospho (STY)]PPEGSSR_    635.297385373987        0       14164   noloss  6       y       1       632.299829640042        46.07855        False   sp      Q14244;Q14244-6;Q14244-7        Q14244;Q14244-6;Q14244-7        MAP7_HUMAN      Ensconsin;Isoform of Q14244, Isoform 6 of Ensconsin;Isoform of Q14244, Isoform 7 of Ensconsin   Homo sapiens            MAP7    1;;     1;;     MCT_human_UP000005640_9606;MCT_human2_UP000005640_9606_additional;MCT_human2_UP000005640_9606_additional
202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phospho_library25_S4-C1_1_25867     2               _LFVT[+80]PPEGSSR_              True    _[Acetyl (Protein N-term)]LFVS[Phospho (STY)]PPEGSSR_   LFVSPPEGSSR     38.05031        0.917   38.05031        Q14244;Q14244-6;Q14244-7        False   _LFVT[+80]PPEGSSR_      _LFVT[Phospho (STY)]PPEGSSR_    635.297385373987        0       14164   noloss  7       y       1       729.352593488892        100     False   sp      Q14244;Q14244-6;Q14244-7        Q14244;Q14244-6;Q14244-7        MAP7_HUMAN      Ensconsin;Isoform of Q14244, Isoform 6 of Ensconsin;Isoform of Q14244, Isoform 7 of Ensconsin   Homo sapiens            MAP7    1;;     1;;     MCT_human_UP000005640_9606;MCT_human2_UP000005640_9606_additional;MCT_human2_UP000005640_9606_additional
''')

psm_reader_provider.get_reader('spectronaut').import_file(tsv)
[7]:
raw_name sequence charge rt precursor_mz mobility proteins uniprot_ids genes mods mod_sites nAA rt_norm ccs
0 202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phosph... ALVATPGK 2 -5.032703 418.717511 0.758 NUCL_HUMAN P19338 NCL Phospho@T 5 8 0.0 308.612143
1 202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phosph... TLTPCPLR 2 27.716590 439.230786 0.818 ZC3HD_HUMAN Q5T200 ZC3H13 Phospho@T;Carbamidomethyl@C 3;5 8 1.0 332.788837
2 202106018_TIMS03_EVO03_PaSk_SA_HeLa_EGF_Phosph... LFVSPPEGSSR 2 38.050310 635.297385 0.917 MAP7_HUMAN Q14244;Q14244-6;Q14244-7 MAP7 Acetyl@Protein N-term;Phospho@S 0;4 11 1.0 371.282739

OpenSWATH’s library:

[8]:
from io import StringIO
tsv = StringIO('''PrecursorMz       ProductMz       Tr_recalibrated transition_name CE      LibraryIntensity        transition_group_id     decoy   PeptideSequence ProteinName     Annotation      FullUniModPeptideName   PrecursorCharge GroupLabel      UniprotID       FragmentType    FragmentCharge  FragmentSeriesNumber
685.732240417       886.020494795   59.0    255_AAAAAAAAAASGAAIPPLIPPRR_3   -1      5257.9  13_AAAAAAAAAASGAAIPPLIPPRR_3    0       AAAAAAAAAASGAAIPPLIPPRR 1/O14654        y19^2/0.002     AAAAAAAAAASGAAIPPLIPPRR 3       light   1/O14654        y       2       19
514.550999438       473.303261576   59.2    268_AAAAAAAAAASGAAIPPLIPPRR_4   -1      10000.0 14_AAAAAAAAAASGAAIPPLIPPRR_4    0       AAAAAAAAAASGAAIPPLIPPRR 1/O14654        y8^2/0.002      AAAAAAAAAASGAAIPPLIPPRR 4       light   1/O14654        y       2       8
514.550999438       629.39313922    59.2    276_AAAAAAAAAASGAAIPPLIPPRR_4   -1      5923.1  14_AAAAAAAAAASGAAIPPLIPPRR_4    0       AAAAAAAAAASGAAIPPLIPPRR 1/O14654        y12^2/0.001     AAAAAAAAAASGAAIPPLIPPRR 4       light   1/O14654        y       2       12
514.550999438       672.909153425   59.2    279_AAAAAAAAAASGAAIPPLIPPRR_4   -1      5249.8  14_AAAAAAAAAASGAAIPPLIPPRR_4    0       AAAAAAAAAASGAAIPPLIPPRR 1/O14654        y13^2/0.001     AAAAAAAAAASGAAIPPLIPPRR 4       light   1/O14654        y       2       13
514.550999438       356.19284545    59.2    262_AAAAAAAAAASGAAIPPLIPPRR_4   -1      5233.6  14_AAAAAAAAAASGAAIPPLIPPRR_4    0       AAAAAAAAAASGAAIPPLIPPRR 1/O14654        b5/0.001,b10^2/0.001,m6:10/0.001        AAAAAAAAAASGAAIPPLIPPRR 4       light   1/O14654        b       1       5
514.550999438       498.26707303    59.2    269_AAAAAAAAAASGAAIPPLIPPRR_4   -1      4976.0  14_AAAAAAAAAASGAAIPPLIPPRR_4    0       AAAAAAAAAASGAAIPPLIPPRR 1/O14654        b7/0.001,m4:10/0.001    AAAAAAAAAASGAAIPPLIPPRR 4       light   1/O14654        b       1       7
514.550999438       427.22995924    59.2    265_AAAAAAAAAASGAAIPPLIPPRR_4   -1      4859.4  14_AAAAAAAAAASGAAIPPLIPPRR_4    0       AAAAAAAAAASGAAIPPLIPPRR 1/O14654        b6/0.002,m5:10/0.002    AAAAAAAAAASGAAIPPLIPPRR 4       light   1/O14654        b       1       6
728.201724416       356.19284545    101.8   292_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5    -1      10000.0 15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5     0       AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR    1/O14654        b5/0.003,b10^2/0.003,m6:10/0.003        AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR  5       light   1/O14654        b       1       5
728.201724416       576.310000482   101.8   297_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5    -1      7611.0  15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5     0       AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR    1/O14654        y5/0.002        AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR  5       light   1/O14654        y       1       5
728.201724416       427.22995924    101.8   293_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5    -1      6805.1  15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5     0       AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR    1/O14654        b6/-0.002,m5:10/-0.002  AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR  5       light   1/O14654        b       1       6
728.201724416       569.30418682    101.8   296_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5    -1      6312.7  15_AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR_5     0       AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR    1/O14654        b8/0.009,m3:10/0.009    AAAAAAAAAASGAAIPPLIPPRRVITLYQC(UniMod:4)FSVSQR  5       light   1/O14654        b       1       8
''')

osw_reader = psm_reader_provider.get_reader('openswath')
osw_reader.import_file(tsv)
osw_reader.psm_df
[8]:
sequence charge rt precursor_mz proteins mods mod_sites nAA rt_norm
0 AAAAAAAAAASGAAIPPLIPPRR 3 59.0 685.732240 1/O14654 23 0.579568
1 AAAAAAAAAASGAAIPPLIPPRR 4 59.2 514.550999 1/O14654 23 0.581532
2 AAAAAAAAAASGAAIPPLIPPRRVITLYQCFSVSQR 5 101.8 728.201724 1/O14654 Carbamidomethyl@C 30 36 1.000000

Spectronaut report:

[9]:
tsv = StringIO('''R.FileName,R.Replicate,EG.PrecursorId,EG.ApexRT,FG.CalibratedMassAccuracy (PPM),FG.CalibratedMz
20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01,1,_VIETPENDFK_.2,40.826847076416,-0.6350307649846,596.298998773218
20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01,1,_GFSNEVSSK_.2,19.1254806518555,-1.54873822486555,477.730400257423
20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01,1,_HLLNQAVGEEEVPK_.3,42.0593299865723,-0.309173676987587,521.611288926824
20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01,1,_DATM[Oxidation (M)]EVQR_.2,12.8398199081421,-3.31103772642203,483.222124398527
''')

spn_reader = psm_reader_provider.get_reader('spectronaut_report')
spn_reader.import_file(tsv)
spn_reader.psm_df
[9]:
raw_name rt charge mods mod_sites sequence nAA rt_norm precursor_mz
0 20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01 12.839820 2 Oxidation@M 4 DATMEVQR 8 0.305279 483.221474
1 20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01 19.125481 2 GFSNEVSSK 9 0.454726 477.729989
2 20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01 40.826847 2 VIETPENDFK 10 0.970697 596.298236
3 20211203_EXPL2_SoSt_SA_DIA_HeLa_1000mz_noCB_01 42.059330 3 HLLNQAVGEEEVPK 14 1.000000 521.610617
[ ]: