18 #include "SamInterface.h"
19 #include "SamRecordHelper.h"
24 SamInterface::SamInterface()
29 SamInterface::~SamInterface()
42 "Cannot read header since the file pointer is null");
51 std::string errorMessages =
"";
56 buffer.ReadLine(filePtr);
60 if (
ifeof(filePtr) ||
61 ((buffer.Length() != 0) && (buffer[0] !=
'@')) )
69 if(buffer.Length() != 0)
85 myFirstRecord = buffer;
91 std::cerr <<
"Failed to parse " << numInvalid <<
" header lines";
92 std::cerr <<
". No valid header lines.\n";
105 if((filePtr == NULL) || (filePtr->
isOpen() ==
false))
109 "Cannot write header since the file pointer is null");
117 std::string headerString =
"";
120 int32_t headerLen = headerString.length();
124 numWrite =
ifwrite(filePtr, headerString.c_str(), headerLen);
125 if(numWrite != headerLen)
128 "Failed to write the SAM header.");
142 if((filePtr == NULL) || (filePtr->
isOpen() ==
false))
146 "filePtr does not point to an open file.");
152 if(myFirstRecord.Length() != 0)
154 buffer = myFirstRecord;
155 myFirstRecord.Clear();
161 buffer.ReadLine(filePtr);
163 if ((
ifeof(filePtr)) && (buffer.Length() == 0))
167 "No more records in the file.");
172 tokens.ReplaceColumns(buffer,
'\t');
178 if (tokens.Length() < 11)
180 errorString =
"Too few columns (";
181 errorString += tokens.Length();
182 errorString +=
") in the Record, expected at least 11.";
184 errorString.c_str());
197 if(!tokens[1].AsInteger(flagInt))
199 errorString =
"flag, ";
200 errorString += tokens[1].c_str();
201 errorString +=
", is not an integer.";
203 errorString.c_str());
205 else if((flagInt < 0) || (flagInt > UINT16_MAX))
207 errorString =
"flag, ";
208 errorString += tokens[1].c_str();
209 errorString +=
", is not between 0 and (2^16)-1 = 65535.";
211 errorString.c_str());
213 else if(!record.
setFlag(flagInt))
226 if(!tokens[3].AsInteger(posInt))
228 errorString =
"position, ";
229 errorString += tokens[3].c_str();
230 errorString +=
", is not an integer.";
232 errorString.c_str());
234 else if((posInt < INT32_MIN) || (posInt > INT32_MAX))
237 errorString =
"position, ";
238 errorString += tokens[3].c_str();
239 errorString +=
", does not fit in a 32 bit signed int.";
241 errorString.c_str());
250 if(!tokens[4].AsInteger(mapInt))
252 errorString =
"map quality, ";
253 errorString += tokens[4].c_str();
254 errorString +=
", is not an integer.";
256 errorString.c_str());
258 else if((mapInt < 0) || (mapInt > UINT8_MAX))
260 errorString =
"map quality, ";
261 errorString += tokens[4].c_str();
262 errorString +=
", is not between 0 and (2^8)-1 = 255.";
264 errorString.c_str());
285 if(!tokens[7].AsInteger(matePosInt))
287 errorString =
"mate position, ";
288 errorString += tokens[7].c_str();
289 errorString +=
", is not an integer.";
291 errorString.c_str());
300 if(!tokens[8].AsInteger(insertInt))
302 errorString =
"insert size, ";
303 errorString += tokens[8].c_str();
304 errorString +=
", is not an integer.";
306 errorString.c_str());
330 for (
int i = 11; i < tokens.Length(); i++)
332 String & nugget = tokens[i];
334 if (nugget.Length() < 6 || nugget[2] !=
':' || nugget[4] !=
':')
337 errorString =
"Invalid Tag Format: ";
338 errorString += nugget.c_str();
339 errorString +=
", should be cc:c:x*.";
341 errorString.c_str());
347 if(!record.
addTag((
const char *)nugget, nugget[3],
348 (
const char *)nugget + 5))
366 recordString +=
"\t";
367 recordString += record.
getFlag();
368 recordString +=
"\t";
370 recordString +=
"\t";
372 recordString +=
"\t";
374 recordString +=
"\t";
376 recordString +=
"\t";
378 recordString +=
"\t";
380 recordString +=
"\t";
382 recordString +=
"\t";
384 recordString +=
"\t";
390 recordString +=
"\t";
394 recordString +=
"\n";
398 ifwrite(filePtr, recordString.c_str(), recordString.Length());
408 tokens.AddColumns(buffer,
'\t');
410 for (
int i = 1; i < tokens.Length(); i++)
412 tags.Add(tokens[i].Left(2), i - 1);
413 values.Push(tokens[i].SubStr(3));
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
bool setSequence(const char *seq)
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
bool setMateReferenceName(SamFileHeader &header, const char *mateReferenceName)
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name,...
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
void clearTags()
Clear the tags in this record.
method completed successfully.
Status getStatus() const
Return the enum for this status object.
uint16_t getFlag()
Get the flag (FLAG).
bool setReadName(const char *readName)
Set QNAME to the passed in name.
This class is used to track the status results of some methods in the BAM classes.
failed to parse a record/header - invalid format.
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
bool set1BasedPosition(int32_t position)
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Status
Return value enum for StatGenFile methods.
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
uint32_t getTagLength()
Returns the length of the BAM formatted tags.
bool setInsertSize(int32_t insertSize)
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
void addError(Status newStatus, const char *newMessage)
Add the specified error message to the status message, setting the status to newStatus if the current...
const char * getStatusMessage() const
Return the status message for this object.
bool addTag(const char *tag, char vtype, const char *value)
Add the specified tag,vtype,value to the record.
int32_t getInsertSize()
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
const char * getCigar()
Returns the SAM formatted CIGAR string.
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
int32_t get1BasedMatePosition()
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
bool setReferenceName(SamFileHeader &header, const char *referenceName)
Set the reference sequence name (RNAME) to the specified name, using the header to determine the refe...
void resetRecord()
Reset the fields of the record to a default value.
static bool genSamTagsString(SamRecord &record, String &returnString, char delim='\t')
Helper to append the SAM string representation of all the tags to the specified string.
const char * getMateReferenceNameOrEqual()
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the ...
method failed due to an I/O issue.
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
bool setQuality(const char *quality)
Sets the quality (QUAL) to the specified SAM formatted quality string.
bool set1BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value...