libStatGen Software  1
SamQuerySeqWithRef Class Reference

Contains methods for converting between the query sequence and reference. More...

#include <SamQuerySeqWithRefHelper.h>

Static Public Member Functions

static void seqWithEquals (const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
 Gets the sequence with '=' in any position where the sequence matches the reference. More...
 
static void seqWithoutEquals (const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
 Gets the sequence converting '=' to the appropriate base using the reference. More...
 

Detailed Description

Contains methods for converting between the query sequence and reference.

Definition at line 101 of file SamQuerySeqWithRefHelper.h.

Member Function Documentation

◆ seqWithEquals()

void SamQuerySeqWithRef::seqWithEquals ( const char *  currentSeq,
int32_t  seq0BasedPos,
Cigar cigar,
const char *  referenceName,
const GenomeSequence refSequence,
std::string &  updatedSeq 
)
static

Gets the sequence with '=' in any position where the sequence matches the reference.

NOTE: 'N' in both the sequence and the reference is not considered a match.

Parameters
currentSeqsequence that should be converted
seq0BasedPos0 based start position of currentSeq on the reference.
cigarcigar string for currentSeq (used for determining how the sequence aligns to the reference)
referenceNamereference name associated with this sequence
refSequencereference sequence object
updatedSeqreturn parameter that this method sets to the current sequence, replacing any matches to the reference with '='.

Definition at line 243 of file SamQuerySeqWithRefHelper.cpp.

References BaseUtilities::areEqual(), GenomeSequence::getGenomePosition(), Cigar::getRefOffset(), Cigar::INDEX_NA, and BaseUtilities::isAmbiguous().

Referenced by SamRecord::getSequence().

249 {
250  updatedSeq = currentSeq;
251 
252  int32_t seqLength = updatedSeq.length();
253  int32_t queryIndex = 0;
254 
255  uint32_t startOfReadOnRefIndex =
256  refSequence.getGenomePosition(referenceName);
257 
258  if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
259  {
260  // This reference name was not found in the reference file, so just
261  // return.
262  return;
263  }
264  startOfReadOnRefIndex += seq0BasedPos;
265 
266  // Loop until the entire sequence has been updated.
267  while(queryIndex < seqLength)
268  {
269  // Still more bases, look for matches.
270 
271  // Get the reference offset for this read position.
272  int32_t refOffset = cigar.getRefOffset(queryIndex);
273  if(refOffset != Cigar::INDEX_NA)
274  {
275  // Both the reference and the read have a base, so get the bases.
276  char readBase = currentSeq[queryIndex];
277  char refBase = refSequence[startOfReadOnRefIndex + refOffset];
278 
279  // If neither base is unknown and they are the same, count it
280  // as a match.
281  if(!BaseUtilities::isAmbiguous(readBase) &&
282  !BaseUtilities::isAmbiguous(refBase) &&
283  (BaseUtilities::areEqual(readBase, refBase)))
284  {
285  // Match.
286  updatedSeq[queryIndex] = '=';
287  }
288  }
289  // Increment the query index to the next position.
290  ++queryIndex;
291  continue;
292  }
293 }
static bool areEqual(char base1, char base2)
Returns whether or not two bases are equal (case insensitive), if one of the bases is &#39;=&#39;...
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
Definition: Cigar.h:492
int32_t getRefOffset(int32_t queryIndex)
Return the reference offset associated with the specified query index or INDEX_NA based on this cigar...
Definition: Cigar.cpp:187
static bool isAmbiguous(char base)
Returns whether or not the specified bases is an indicator for ambiguity.
genomeIndex_t getGenomePosition(const char *chromosomeName, unsigned int chromosomeIndex) const
given a chromosome name and position, return the genome position

◆ seqWithoutEquals()

void SamQuerySeqWithRef::seqWithoutEquals ( const char *  currentSeq,
int32_t  seq0BasedPos,
Cigar cigar,
const char *  referenceName,
const GenomeSequence refSequence,
std::string &  updatedSeq 
)
static

Gets the sequence converting '=' to the appropriate base using the reference.

Parameters
currentSeqsequence that should be converted
seq0BasedPos0 based start position of currentSeq on the reference.
cigarcigar string for currentSeq (used for determining how the sequence aligns to the reference)
referenceNamereference name associated with this sequence
refSequencereference sequence object
updatedSeqreturn parameter that this method sets to the current sequence, replacing any '=' with the base from the reference.

Definition at line 296 of file SamQuerySeqWithRefHelper.cpp.

References BaseUtilities::areEqual(), GenomeSequence::getGenomePosition(), Cigar::getRefOffset(), and Cigar::INDEX_NA.

Referenced by SamRecord::getSequence().

302 {
303  updatedSeq = currentSeq;
304 
305  int32_t seqLength = updatedSeq.length();
306  int32_t queryIndex = 0;
307 
308  uint32_t startOfReadOnRefIndex =
309  refSequence.getGenomePosition(referenceName);
310 
311  if(startOfReadOnRefIndex == INVALID_GENOME_INDEX)
312  {
313  // This reference name was not found in the reference file, so just
314  // return.
315  return;
316  }
317  startOfReadOnRefIndex += seq0BasedPos;
318 
319  // Loop until the entire sequence has been updated.
320  while(queryIndex < seqLength)
321  {
322  // Still more bases, look for matches.
323 
324  // Get the reference offset for this read position.
325  int32_t refOffset = cigar.getRefOffset(queryIndex);
326  if(refOffset != Cigar::INDEX_NA)
327  {
328  // Both the reference and the read have a base, so get the bases.
329  char readBase = currentSeq[queryIndex];
330  char refBase = refSequence[startOfReadOnRefIndex + refOffset];
331 
332  // If the bases are equal, set the sequence to the reference
333  // base. (Skips the check for ambiguous to catch a case where
334  // ambiguous had been converted to a '=', and if both are ambiguous,
335  // it will still be set to ambiguous.)
336  if(BaseUtilities::areEqual(readBase, refBase))
337  {
338  // Match.
339  updatedSeq[queryIndex] = refBase;
340  }
341  }
342 
343  // Increment the query index to the next position.
344  ++queryIndex;
345  continue;
346  }
347 }
static bool areEqual(char base1, char base2)
Returns whether or not two bases are equal (case insensitive), if one of the bases is &#39;=&#39;...
static const int32_t INDEX_NA
Value associated with an index that is not applicable/does not exist, used for converting between que...
Definition: Cigar.h:492
int32_t getRefOffset(int32_t queryIndex)
Return the reference offset associated with the specified query index or INDEX_NA based on this cigar...
Definition: Cigar.cpp:187
genomeIndex_t getGenomePosition(const char *chromosomeName, unsigned int chromosomeIndex) const
given a chromosome name and position, return the genome position

The documentation for this class was generated from the following files: