libpappsomspp
Library for mass spectrometry
msrunretentiontime.cpp
Go to the documentation of this file.
1
2/*******************************************************************************
3 * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
4 *
5 * This file is part of the PAPPSOms++ library.
6 *
7 * PAPPSOms++ is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * PAPPSOms++ is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
19 *
20 * Contributors:
21 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
22 *implementation
23 ******************************************************************************/
24
25#include "msrunretentiontime.h"
26#include "../../exception/exceptionnotpossible.h"
27#include <QDebug>
28#include <QObject>
29
30//#include <odsstream/odsexception.h>
31//#include <odsstream/odsdocwriter.h>
32//#include <QFileInfo>
33
34using namespace pappso;
35
36template <class T>
38 : m_ms2MedianFilter(10), m_ms2MeanFilter(15), m_ms1MeanFilter(1)
39{
40 msp_msrunReader = msrun_reader_sp;
41 mcsp_msrunId = msp_msrunReader.get()->getMsRunId();
42 m_ms1RetentionTimeVector = msp_msrunReader.get()->getRetentionTimeLine();
43
44
45 std::sort(m_ms1RetentionTimeVector.begin(),
47 [](const double &a, const double &b) { return (a < b); });
48}
49
50template <class T>
52 : m_ms2MedianFilter(other.m_ms2MedianFilter),
53 m_ms2MeanFilter(other.m_ms2MeanFilter),
54 m_ms1MeanFilter(other.m_ms1MeanFilter)
55{
60
61 m_seamarks = other.m_seamarks;
63
65
67}
68
69template <class T>
71{
72}
73
74template <class T>
75const MsRunId &
77{
78 return *(mcsp_msrunId.get());
79}
80
81template <typename T>
84{
85 return m_ms2MedianFilter;
86}
87
88template <class T>
89void
91 const FilterMorphoMedian &ms2MedianFilter)
92{
93 m_ms2MedianFilter = ms2MedianFilter;
94}
95
96template <typename T>
99{
100 return m_ms2MeanFilter;
101}
102
103
104template <class T>
105void
107{
108 m_ms2MeanFilter = ms2MeanFilter;
109}
110
111template <typename T>
114{
115 return m_ms1MeanFilter;
116}
117
118template <class T>
119void
121{
122 m_ms1MeanFilter = ms1MeanFilter;
123}
124
125template <class T>
126const std::vector<MsRunRetentionTimeSeamarkPoint<T>> &
128{
129 qDebug();
130 return m_seamarks;
131}
132
133template <class T>
134const std::vector<double> &
136{
137 return m_alignedRetentionTimeVector;
138}
139
140template <class T>
141std::size_t
143{
144 return m_valuesCorrected;
145}
146template <class T>
147const std::vector<double> &
149{
150 return m_ms1RetentionTimeVector;
151}
152
153template <class T>
154Trace
156 const std::vector<MsRunRetentionTimeSeamarkPoint<T>> &other_seamarks) const
157{
158 Trace common_points;
159 getCommonDeltaRt(common_points, other_seamarks);
160 return common_points;
161}
162
163template <class T>
164void
166 std::size_t ms2_spectrum_index)
167{
168
169 qDebug();
170 msp_msrunReader.get()->acquireDevice();
171 PeptideMs2Point ms2point;
172 ms2point.entityHash = peptide_id;
173 QualifiedMassSpectrum spectrum =
174 msp_msrunReader.get()->qualifiedMassSpectrum(ms2_spectrum_index, false);
175 ms2point.precursorIntensity = spectrum.getPrecursorIntensity();
176 ms2point.retentionTime = spectrum.getRtInSeconds();
177
178 // addSeamark(m_hash_fn(peptide_str.toStdString()), retentionTime);
179
180 m_allMs2Points.push_back(ms2point);
181
182 qDebug();
183}
184
185template <class T>
186void
188 double retentionTime,
189 double precursorIntensity)
190{
191 PeptideMs2Point ms2point;
192 ms2point.entityHash = peptide_id;
193 ms2point.precursorIntensity = precursorIntensity;
194 ms2point.retentionTime = retentionTime;
195 m_allMs2Points.push_back(ms2point);
196}
197
198
199template <class T>
200void
202{
203
204 qDebug();
205 if(m_allMs2Points.size() == 0)
206 {
207 // already computed
208 return;
209 }
210 m_seamarks.clear();
211 if(m_retentionTimeReferenceMethod ==
212 ComputeRetentionTimeReference::maximum_intensity)
213 {
214
215
216 std::sort(m_allMs2Points.begin(),
217 m_allMs2Points.end(),
218 [](const PeptideMs2Point &a, const PeptideMs2Point &b) {
219 if(a.entityHash == b.entityHash)
220 {
221 return (a.precursorIntensity > b.precursorIntensity);
222 }
223 return (a.entityHash < b.entityHash);
224 });
225
226 auto itend =
227 std::unique(m_allMs2Points.begin(),
228 m_allMs2Points.end(),
229 [](const PeptideMs2Point &a, const PeptideMs2Point &b) {
230 return (a.entityHash == b.entityHash);
231 });
232
233 auto it = m_allMs2Points.begin();
234 while(it != itend)
235 {
236 m_seamarks.push_back(
237 {it->entityHash, it->retentionTime, it->precursorIntensity});
238 it++;
239 }
240 }
241 m_allMs2Points.clear();
242
243 std::sort(m_seamarks.begin(),
244 m_seamarks.end(),
247 return (a.entityHash < b.entityHash);
248 });
249 qDebug();
250}
251
252template <class T>
253void
255 Trace &delta_rt,
256 const std::vector<MsRunRetentionTimeSeamarkPoint<T>> &other_seamarks) const
257{
258
259 qDebug();
260 auto it = other_seamarks.begin();
261
262 for(const MsRunRetentionTimeSeamarkPoint<T> &seamark : m_seamarks)
263 {
264 while((it != other_seamarks.end()) &&
265 (it->entityHash < seamark.entityHash))
266 {
267 it++;
268 }
269 if(it == other_seamarks.end())
270 break;
271 if(it->entityHash == seamark.entityHash)
272 {
273 delta_rt.push_back(DataPoint(
274 seamark.retentionTime, seamark.retentionTime - it->retentionTime));
275 }
276 }
277
278 qDebug();
279 if((m_ms2MedianFilter.getHalfWindowSize() * 2 + 1) >= delta_rt.size())
280 {
282 QObject::tr("ERROR : MS2 alignment of MS run '%1' (%2)' not possible : "
283 "\ntoo few MS2 points (%3) in common")
284 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
285 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
286 .arg(delta_rt.size()));
287 }
288
289 qDebug();
290 if((m_ms2MeanFilter.getHalfWindowSize() * 2 + 1) >= delta_rt.size())
291 {
293 QObject::tr("ERROR : MS2 alignment of MS run '%1' (%2)' not possible : "
294 "\ntoo few MS2 points (%3) in common")
295 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
296 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
297 .arg(delta_rt.size()));
298 }
299 delta_rt.sortX();
300
301 // there can be multiple entities (peptides) at one retention time
302 // in this case, avoid retention time redundancy by applying unique on trace :
303 delta_rt.unique();
304
305 qDebug();
306}
307
308
309template <class T>
310double
312{
313 if(isAligned())
314 {
315 return m_alignedRetentionTimeVector.front();
316 }
317 return m_ms1RetentionTimeVector.front();
318}
319template <class T>
320double
322{
323 if(isAligned())
324 {
325 return m_alignedRetentionTimeVector.back();
326 }
327 return m_ms1RetentionTimeVector.back();
328}
329
330
331template <class T>
332double
334 double original_retention_time) const
335{
336 if(m_alignedRetentionTimeVector.size() < 3)
337 {
339 QObject::tr("ERROR : too few aligned points to compute aligned "
340 "retention time (%1)")
341 .arg(m_ms1RetentionTimeVector.size()));
342 }
343 if(m_alignedRetentionTimeVector.size() != m_ms1RetentionTimeVector.size())
344 {
346 QObject::tr("ERROR : m_alignedRetentionTimeVector.size() %1 != %2 "
347 "m_ms1RetentionTimeVector.size()")
348 .arg(m_alignedRetentionTimeVector.size())
349 .arg(m_ms1RetentionTimeVector.size()));
350 }
351 auto it_plus =
352 std::find_if(m_ms1RetentionTimeVector.begin(),
353 m_ms1RetentionTimeVector.end(),
354 [original_retention_time](const double &rt_point) {
355 return original_retention_time < rt_point;
356 });
357 double rt1_a, rt2_a, rt1_b, rt2_b;
358 if(it_plus == m_ms1RetentionTimeVector.end())
359 {
360 it_plus--;
361 }
362 if(it_plus == m_ms1RetentionTimeVector.begin())
363 {
364 it_plus++;
365 }
366 auto it_minus = it_plus - 1;
367
368 rt1_a = *it_minus;
369 rt2_a = *it_plus;
370
371 double ratio = (original_retention_time - rt1_a) / (rt2_a - rt1_a);
372
373 auto itref = m_alignedRetentionTimeVector.begin() +
374 std::distance(m_ms1RetentionTimeVector.begin(), it_minus);
375
376 rt1_b = *itref;
377 itref++;
378 rt2_b = *itref;
379
380 return (((rt2_b - rt1_b) * ratio) + rt1_b);
381}
382
383template <class T>
384double
386 double aligned_retention_time) const
387{
388 if(m_alignedRetentionTimeVector.size() < 3)
389 {
391 QObject::tr("ERROR : too few aligned points to compute aligned "
392 "retention time (%1)")
393 .arg(m_ms1RetentionTimeVector.size()));
394 }
395 if(m_alignedRetentionTimeVector.size() != m_ms1RetentionTimeVector.size())
396 {
398 QObject::tr("ERROR : m_alignedRetentionTimeVector.size() %1 != %2 "
399 "m_ms1RetentionTimeVector.size()")
400 .arg(m_alignedRetentionTimeVector.size())
401 .arg(m_ms1RetentionTimeVector.size()));
402 }
403 auto it_plus = std::find_if(m_alignedRetentionTimeVector.begin(),
404 m_alignedRetentionTimeVector.end(),
405 [aligned_retention_time](const double &rt_point) {
406 return aligned_retention_time < rt_point;
407 });
408 double rt1_a, rt2_a, rt1_b, rt2_b;
409 if(it_plus == m_alignedRetentionTimeVector.end())
410 {
411 it_plus--;
412 }
413 if(it_plus == m_alignedRetentionTimeVector.begin())
414 {
415 it_plus++;
416 }
417 auto it_minus = it_plus - 1;
418
419 rt1_a = *it_minus;
420 rt2_a = *it_plus;
421
422 double ratio = (aligned_retention_time - rt1_a) / (rt2_a - rt1_a);
423
424 auto itref = m_ms1RetentionTimeVector.begin() +
425 std::distance(m_alignedRetentionTimeVector.begin(), it_minus);
426
427 rt1_b = *itref;
428 itref++;
429 rt2_b = *itref;
430
431 return (((rt2_b - rt1_b) * ratio) + rt1_b);
432}
433
434template <class T>
435const std::vector<MsRunRetentionTimeSeamarkPoint<T>>
437{
438 std::vector<MsRunRetentionTimeSeamarkPoint<T>> other_seamarks = m_seamarks;
439 for(auto &seamark : other_seamarks)
440 {
441 seamark.retentionTime =
442 translateOriginal2AlignedRetentionTime(seamark.retentionTime);
443 }
444 return other_seamarks;
445}
446
447template <class T>
448bool
450{
451 return (m_alignedRetentionTimeVector.size() > 0);
452}
453
454template <class T>
455Trace
457 const MsRunRetentionTime<T> &msrun_retention_time_reference)
458{
459 computeSeamarks();
460 std::vector<MsRunRetentionTimeSeamarkPoint<T>> other_seamarks;
461 if(msrun_retention_time_reference.isAligned())
462 {
463 other_seamarks = msrun_retention_time_reference.getSeamarksReferences();
464 }
465 else
466 {
467 other_seamarks = msrun_retention_time_reference.getSeamarks();
468 }
469 qDebug();
470 if((m_ms1MeanFilter.getHalfWindowSize() * 2 + 1) >=
471 m_ms1RetentionTimeVector.size())
472 {
474 QObject::tr("ERROR : MS1 alignment of MS run '%1' (%2)' not possible : "
475 "\ntoo few MS1 points (%3)")
476 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
477 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
478 .arg(m_ms1RetentionTimeVector.size()));
479 }
480
481 qDebug() << m_seamarks[0].entityHash << " " << m_seamarks[0].retentionTime
482 << " " << other_seamarks[0].entityHash
483 << other_seamarks[0].retentionTime << " ";
484 // both seamarks has to be ordered
485 Trace common_points;
486 getCommonDeltaRt(common_points, other_seamarks);
487
488 // writeTrace("lib_ms2_delta_rt.ods", common_points);
489
490 qDebug() << common_points.front().x << " " << common_points.front().y;
491 m_ms2MedianFilter.filter(common_points);
492 // writeTrace("lib_ms2_delta_rt_median.ods", common_points);
493 m_ms2MeanFilter.filter(common_points);
494 // writeTrace("lib_ms2_delta_rt_mean.ods", common_points);
495 // convert common delta rt to real retention times (for convenience)
496 qDebug() << common_points.front().x << " " << common_points.front().y;
497
498
499 // add a first point to ensure coherence:
500 DataPoint first_point;
501 first_point.x = m_ms1RetentionTimeVector.front() - (double)1;
502 if(first_point.x < 0)
503 {
504 first_point.x = 0;
505 }
506 first_point.y =
507 m_ms1RetentionTimeVector.front() -
508 msrun_retention_time_reference.getFrontRetentionTimeReference();
509
510 common_points.push_back(first_point);
511 // add a last point to ensure coherence:
512 DataPoint last_point;
513 last_point.x = m_ms1RetentionTimeVector.back() + 1;
514 last_point.y = m_ms1RetentionTimeVector.back() -
515 msrun_retention_time_reference.getBackRetentionTimeReference();
516 common_points.push_back(last_point);
517 common_points.sortX();
518
519 // now, it is possible for each time range to give a new MS1 time using a
520 // linear regression on MS2 corrected times
521 m_alignedRetentionTimeVector.clear();
522
523 qDebug() << common_points.front().x << " " << common_points.front().y;
524
525 Trace ms1_aligned_points;
526
527 linearRegressionMs2toMs1(ms1_aligned_points, common_points);
528
529 // writeTrace("lib_ms1_map_rt.ods", ms1_aligned_points);
530 qDebug();
531 // smoothing on MS1 points
532 m_ms1MeanFilter.filter(ms1_aligned_points);
533
534 // writeTrace("lib_ms1_map_rt_mean.ods", ms1_aligned_points);
535 // final aligned retentionTime vector
536
537 for(DataPoint &data_point : ms1_aligned_points)
538 {
539 data_point.y = (data_point.x - data_point.y);
540 }
541
542 qDebug();
543 // Here, the correction parameter is the slope of old rt points curve
544 // (divided by 4 to get a finer correction).
545 double correction_parameter =
546 (m_ms1RetentionTimeVector.back() - m_ms1RetentionTimeVector.front()) /
547 (ms1_aligned_points.size());
548 // set_correction_parameter(correction_parameter / 4);
549 correction_parameter = correction_parameter / (double)4;
550 correctNewTimeValues(ms1_aligned_points, correction_parameter);
551
552 m_alignedRetentionTimeVector = ms1_aligned_points.yValues();
553
554 qDebug();
555 return ms1_aligned_points;
556}
557
558
559template <class T>
560void
562 const Trace &common_points)
563{
564
565 // first slope :
566 std::vector<DataPoint>::const_iterator itms2 = common_points.begin();
567 std::vector<DataPoint>::const_iterator itms2next = itms2 + 1;
568 if(itms2next == common_points.end())
569 {
570 // error
572 QObject::tr("ERROR : MS1 alignment of MS run '%1' (%2)' not possible : "
573 "\ntoo few common points (%3)")
574 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
575 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
576 .arg(common_points.size()));
577 }
578 qDebug() << "() itms2->x=" << itms2->x << " itms2->y=" << itms2->y;
579
580 for(double &original_rt_point : m_ms1RetentionTimeVector)
581 {
582 DataPoint ms1_point;
583 ms1_point.x = original_rt_point;
584
585 while(ms1_point.x > itms2next->x)
586 {
587 itms2++;
588 itms2next++;
589 }
590
591 double ratio = (itms2next->x - itms2->x);
592 if(ratio != 0)
593 {
594 ratio = (ms1_point.x - itms2->x) / ratio;
595 }
596 else
597 {
598 // avoid division by zero
599 ratio = 1;
600 }
601 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "() " <<
602 // ratio;
603
604 ms1_point.y = itms2->y + ((itms2next->y - itms2->y) * ratio);
605
606 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "() "
607 // << ms1_point.y;
608 ms1_aligned_points.push_back(ms1_point);
609 }
610}
611
612template <class T>
613void
615 double correction_parameter)
616{
617
618 m_valuesCorrected = 0;
619 auto new_it(ms1_aligned_points.begin());
620 auto new_nextit(ms1_aligned_points.begin());
621 new_nextit++;
622 for(; new_nextit != ms1_aligned_points.end(); ++new_nextit, ++new_it)
623 {
624 if(new_nextit->y < new_it->y)
625 {
626 ++m_valuesCorrected;
627 new_nextit->y = new_it->y + correction_parameter;
628 }
629 }
630}
631
632
633template <class T>
636{
637 return msp_msrunReader;
638}
639
640template <class T>
641void
643 const std::vector<double> &aligned_times)
644{
645
646 if(aligned_times.size() == m_ms1RetentionTimeVector.size())
647 {
648 m_alignedRetentionTimeVector = aligned_times;
649 }
650 else
651 {
652 if(aligned_times.size() == m_ms1RetentionTimeVector.size() * 2)
653 {
654 m_alignedRetentionTimeVector = m_ms1RetentionTimeVector;
655 for(std::size_t i = 0; i < m_ms1RetentionTimeVector.size(); i++)
656 {
657 if(aligned_times[2 * i] != m_ms1RetentionTimeVector[i])
658 {
660 QObject::tr(
661 "ERROR : aligned_times (size=%1) vector does not have "
662 "required size (size=%2)")
663 .arg(aligned_times.size())
664 .arg(m_ms1RetentionTimeVector.size()));
665 }
666 m_alignedRetentionTimeVector[i] = aligned_times[(2 * i) + 1];
667 }
668 }
669 else
670 {
672 QObject::tr("ERROR : aligned_times (size=%1) vector does not have "
673 "required size (size=%2)")
674 .arg(aligned_times.size())
675 .arg(m_ms1RetentionTimeVector.size()));
676 }
677 }
678}
679
680
681template <class T>
682Trace
684 const MsRunRetentionTime<T> &msrun_retention_time_reference) const
685{
686 // computeSeamarks();
687 std::vector<MsRunRetentionTimeSeamarkPoint<T>> other_seamarks;
688 if(msrun_retention_time_reference.isAligned())
689 {
690 other_seamarks = msrun_retention_time_reference.getSeamarksReferences();
691 }
692 else
693 {
694 other_seamarks = msrun_retention_time_reference.getSeamarks();
695 }
696 qDebug();
697
698 qDebug() << m_seamarks[0].entityHash << " " << m_seamarks[0].retentionTime
699 << " " << other_seamarks[0].entityHash
700 << other_seamarks[0].retentionTime << " ";
701 // both seamarks has to be ordered
702 Trace common_points;
703 getCommonDeltaRt(common_points, other_seamarks);
704 return common_points;
705}
mean filter apply mean of y values inside the window : this results in a kind of smoothing
Definition: filtermorpho.h:210
median filter apply median of y values inside the window
Definition: filtermorpho.h:191
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition: msrunid.h:53
void setMs2MedianFilter(const FilterMorphoMedian &ms2MedianFilter)
std::vector< double > m_alignedRetentionTimeVector
std::vector< PeptideMs2Point > m_allMs2Points
const FilterMorphoMean & getMs1MeanFilter() const
const FilterMorphoMedian & getMs2MedianFilter() const
void computeSeamarks()
convert PeptideMs2Point into Peptide seamarks this is required before computing alignment
Trace getCommonSeamarksDeltaRt(const MsRunRetentionTime< T > &msrun_retention_time_reference) const
get common seamarks between msrunretentiontime objects and their deltart
void setMs1MeanFilter(const FilterMorphoMean &ms1MeanFilter)
void linearRegressionMs2toMs1(Trace &ms1_aligned_points, const Trace &common_points)
void setMs2MeanFilter(const FilterMorphoMean &ms2MeanFilter)
pappso::MsRunReaderSPtr msp_msrunReader
double getFrontRetentionTimeReference() const
MsRunRetentionTime(MsRunReaderSPtr msrun_reader_sp)
const std::vector< double > & getAlignedRetentionTimeVector() const
get aligned retention time vector
void setAlignedRetentionTimeVector(const std::vector< double > &aligned_times)
std::vector< MsRunRetentionTimeSeamarkPoint< T > > m_seamarks
Trace align(const MsRunRetentionTime< T > &msrun_retention_time_reference)
align the current msrunretentiontime object using the given reference
double translateAligned2OriginalRetentionTime(double aligned_retention_time) const
double translateOriginal2AlignedRetentionTime(double original_retention_time) const
const std::vector< double > & getMs1RetentionTimeVector() const
get orginal retention time vector (not aligned)
void correctNewTimeValues(Trace &ms1_aligned_points, double correction_parameter)
const std::vector< MsRunRetentionTimeSeamarkPoint< T > > & getSeamarks() const
Trace getCommonDeltaRt(const std::vector< MsRunRetentionTimeSeamarkPoint< T > > &other_seamarks) const
ComputeRetentionTimeReference m_retentionTimeReferenceMethod
const std::vector< MsRunRetentionTimeSeamarkPoint< T > > getSeamarksReferences() const
std::vector< double > m_ms1RetentionTimeVector
const FilterMorphoMean & getMs2MeanFilter() const
pappso::MsRunReaderSPtr getMsRunReaderSPtr() const
const MsRunId & getMsRunId() const
double getBackRetentionTimeReference() const
std::size_t getNumberOfCorrectedValues() const
void addPeptideAsSeamark(const T &peptide_id, std::size_t ms2_spectrum_index)
collects all peptide evidences of a given MSrun seamarks has to be converted to peptide retention tim...
pappso::MsRunIdCstSPtr mcsp_msrunId
Class representing a fully specified mass spectrum.
pappso_double getPrecursorIntensity(bool *ok=nullptr) const
Get the intensity of the precursor ion.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
A simple container of DataPoint instances.
Definition: trace.h:148
void unique()
Definition: trace.cpp:972
void sortX()
Definition: trace.cpp:956
virtual Trace & filter(const FilterInterface &filter) final
apply a filter on this trace
Definition: trace.cpp:1001
std::vector< pappso_double > yValues() const
Definition: trace.cpp:637
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition: msrunreader.h:185
pappso_double x
Definition: datapoint.h:23
pappso_double y
Definition: datapoint.h:24