libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
fastafileindexer.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/fasta/fastafileindexer.cpp
3 * \date 22/06/2109
4 * \author Olivier Langella
5 * \brief Quick random access to sequences in a fasta file using an index
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "fastafileindexer.h"
29
30#include <QDebug>
31#include <QTextStream>
32#include <QDataStream>
33#include <QFileInfo>
35#include "fastareader.h"
36
37namespace pappso
38{
39FastaFileIndexer::FastaFileIndexer(const QFileInfo &fastaFile)
40 : m_fasta_file(fastaFile.absoluteFilePath())
41{
42
43 if(m_fasta_file.fileName().isEmpty())
44 {
45 throw PappsoException(QObject::tr("No FASTA file name specified"));
46 }
47 if(m_fasta_file.open(QIODevice::ReadOnly))
48 {
49 parseFastaFile();
50 m_fasta_file.close();
51 }
52 else
53 {
54 throw PappsoException(
55 QObject::tr("ERROR opening FASTA file %1 for read").arg(fastaFile.fileName()));
56 }
57}
58
60 : m_fasta_file(other.m_fasta_file.fileName())
61{
62
64 mpa_sequenceTxtIn = nullptr;
65}
70
71
72void
74{
75
76 qDebug();
77 QDataStream bin_in(&m_fasta_file);
78 qint64 position = 0;
79
80 // QChar first_char;
81 // txt_in >> first_char;
82 qint8 char_in;
83 bin_in >> char_in;
84 while(!bin_in.atEnd() && (char_in < (qint8)21))
85 { // eat Windows \r\n
86 position++;
87 bin_in >> char_in;
88 }
89 while(!bin_in.atEnd())
90 {
91 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
92 // << " first_char=" << first_char;
93 if(char_in == (qint8)'>')
94 {
95
96 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
97 // << " index=" << m_indexArray.size()
98 // << " position=" << position;
99 m_indexArray.push_back(position);
100 }
101 // eat line
102 position++;
103 bin_in >> char_in;
104 while(!bin_in.atEnd() && (char_in > (qint8)20))
105 {
106 position++;
107 bin_in >> char_in;
108 }
109 position++;
110 bin_in >> char_in;
111
112 if(!bin_in.atEnd() && (char_in < (qint8)21))
113 { // eat Windows \r\n
114 position++;
115 bin_in >> char_in;
116 }
117 }
118 qDebug();
119}
120
121void
123{
124 if(mpa_sequenceTxtIn != nullptr)
125 return;
126 if(m_fasta_file.open(QIODevice::ReadOnly))
127 {
128 mpa_sequenceTxtIn = new QTextStream(&m_fasta_file);
129 }
130 else
131 {
132 throw PappsoException(
133 QObject::tr("ERROR opening FASTA file %1 for read").arg(m_fasta_file.fileName()));
134 }
135}
136
137void
139{
140 if(mpa_sequenceTxtIn != nullptr)
141 {
142 delete mpa_sequenceTxtIn;
143 mpa_sequenceTxtIn = nullptr;
144 m_fasta_file.close();
145 }
146}
147
148void
150{
151 open();
152
153 qDebug() << " goto=" << index << " pos=" << m_indexArray[index];
154 bool seek_ok;
155 if((index < m_indexArray.size()) && (seek_ok = mpa_sequenceTxtIn->seek(m_indexArray[index])))
156 {
157
158 qDebug() << " realpos=" << mpa_sequenceTxtIn->pos();
159 ;
160 if(!seek_ok)
161 {
162
163 throw PappsoException(QObject::tr("ERROR FastaFileIndexer : seek to "
164 "sequence %1, position %2 failed")
165 .arg(index)
166 .arg(m_indexArray[index]));
167 }
168 FastaReader reader(fasta_handler);
170 }
171 else
172 {
173 throw ExceptionOutOfRange(QObject::tr("ERROR reading FASTA file %1 : sequence index %2 "
174 "unreachable, array size=%3")
175 .arg(m_fasta_file.fileName())
176 .arg(index)
177 .arg(m_indexArray.size()));
178 }
179}
180
181
184{
185
186 return std::make_shared<FastaFileIndexer>(*this);
187}
188
189
190std::size_t
192{
193 return m_indexArray.size();
194}
195} // namespace pappso
FastaFileIndexer(const QFileInfo &fastaFile)
FastaFileIndexerSPtr makeFastaFileIndexerSPtr() const
std::vector< qint64 > m_indexArray
void getSequenceByIndex(FastaHandlerInterface &fasta_handler, std::size_t index) override
void parseOnlyOne(QTextStream &p_in)
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< FastaFileIndexer > FastaFileIndexerSPtr