libStatGen Software  1
NonOverlapRegions.cpp
1 /*
2  * Copyright (C) 2011 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 //////////////////////////////////////////////////////////////////////////
19 
20 #include "NonOverlapRegions.h"
21 #include <iostream>
22 
23 NonOverlapRegions::NonOverlapRegions()
24  : myRegions()
25 {
26 }
27 
28 
29 NonOverlapRegions::~NonOverlapRegions()
30 {
31  myRegions.clear();
32 }
33 
34 
35 void NonOverlapRegions::add(const char* chrom, int32_t start, int32_t end)
36 {
37  // Add the region.
38  myRegions[chrom].add(start, end);
39 }
40 
41 
42 bool NonOverlapRegions::inRegion(const char* chrom, int32_t pos)
43 {
44  // Return whether or not the position was found within a region.
45  // Note, this will create a NonOverlapRegion for this chrom if it
46  // did not already exist, but it won't have any regions.
47  return(myRegions[chrom].inRegion(pos));
48 }
49 
50 
51 NonOverlapRegionPos::NonOverlapRegionPos()
52  : myRegions()
53 {
54  myRegionIter = myRegions.begin();
55  myTmpIter = myRegions.begin();
56 }
57 
58 
59 NonOverlapRegionPos::NonOverlapRegionPos(const NonOverlapRegionPos& reg)
60  : myRegions()
61 {
62  myRegionIter = myRegions.begin();
63  myTmpIter = myRegions.begin();
64 }
65 
66 
67 NonOverlapRegionPos::~NonOverlapRegionPos()
68 {
69  myRegionIter = myRegions.begin();
70  myTmpIter = myRegions.begin();
71  myRegions.clear();
72 }
73 
74 
75 void NonOverlapRegionPos::add(int32_t start, int32_t end)
76 {
77  // Check to see if the start/end are valid in relation.
78  if(start >= end)
79  {
80  std::cerr << "NonOverlapRegionPos::add: Invalid Range, "
81  << "start must be < end, but " << start << " >= " << end
82  << std::endl;
83  return;
84  }
85 
86  bool added = false;
87  // Locate the correct position in the region list for this start/end.
88  if(inRegion(start))
89  {
90  // Check if the region end needs to be updated.
91  if(end > myRegionIter->second)
92  {
93  myRegionIter->second = end;
94  }
95  added = true;
96  }
97  else
98  {
99  // Check to see if we are at the end.
100  if(myRegionIter != myRegions.end())
101  {
102  // Not at the end.
103  // Check to see if the region overlaps the current region.
104  if(end >= myRegionIter->first)
105  {
106  // Overlaps, so update the start.
107  myRegionIter->first = start;
108  // Check if the end needs to be updated.
109  if(myRegionIter->second < end)
110  {
111  myRegionIter->second = end;
112  }
113  added = true;
114  }
115  }
116  }
117 
118  // If we already added the record, check to see if the end of the
119  // new region overlaps any additional regions (know that myRegionIter is
120  // not at the end.
121  if(added)
122  {
123  // Check to see if any other regions were overlapped by this record.
124  myTmpIter = myRegionIter;
125  ++myTmpIter;
126  while(myTmpIter != myRegions.end())
127  {
128  // If the region starts before the end of this one, consume it.
129  if(myTmpIter->first <= end)
130  {
131  if(myTmpIter->second > myRegionIter->second)
132  {
133  // Update this region with the new end.
134  myRegionIter->second = myTmpIter->second;
135  }
136 
137  myTmpIter = myRegions.erase(myTmpIter);
138  }
139  else
140  {
141  // This region is not overlapped by the new region, so stop.
142  break;
143  }
144  }
145  }
146  else
147  {
148  // Add the region.
149  myRegionIter = myRegions.insert(myRegionIter,
150  std::make_pair(start, end));
151  }
152 }
153 
154 
156 {
157  // Return whether or not the position was found within a region.
158  // If it is found within the region, myRegionIter will point to the region
159  // otherwise myRegionIter will point to the region after the position
160  // or to the end if the position is after the last region.
161 
162  // Determine if it needs to search to the left
163  // a) it is at the end
164  // b) the region starts after the position.
165  if(myRegionIter == myRegions.end())
166  {
167  // If the iterator is at the end, search to the left.
168  return(findLeft(pos));
169  }
170  else if(pos < myRegionIter->first)
171  {
172  // Not at the end, so search left if the position is less
173  // than this region's start.
174  return(findLeft(pos));
175  }
176  else
177  {
178  return(findRight(pos));
179  }
180 }
181 
182 
183 bool NonOverlapRegionPos::findRight(int32_t pos)
184 {
185  // Keep looping until the end or until the position is found.
186  while(myRegionIter != myRegions.end())
187  {
188  // Check to see if we have passed the position.
189  if(pos < myRegionIter->first)
190  {
191  // stop here, position comes before this region,
192  // so myRegionIter is pointing to just after it,
193  // but was not found in the region.
194  return(false);
195  }
196  else if(pos < myRegionIter->second)
197  {
198  // the position is in the region, so return true.
199  return(true);
200  }
201  else
202  {
203  // The position is after this region, so increment.
204  ++myRegionIter;
205  }
206  }
207  // exited because we are at the end of the regions and the position was
208  // not found.
209  return(false);
210 }
211 
212 
213 bool NonOverlapRegionPos::findLeft(int32_t pos)
214 {
215  if(myRegionIter == myRegions.end())
216  {
217  if(myRegionIter == myRegions.begin())
218  {
219  // There is nothing in this list, so just return.
220  return(false);
221  }
222  // Move 1 lower than the end.
223  --myRegionIter;
224  }
225 
226  while(myRegionIter->first > pos)
227  {
228  // This region is before our position, so move to the previous region
229  // unless this is the first region in the list.
230  if(myRegionIter == myRegions.begin())
231  {
232  // Did not find the position and the iter is at the element
233  // just after the position.
234  return(false);
235  }
236  // Not yet to the beginning of the list, so decrement.
237  --myRegionIter;
238  }
239 
240  // At this point, the regions iter points to a region that starts
241  // before the position.
242  // Determine if the position is in the region by checking if it is
243  // less than the end of the region.
244  if(pos < myRegionIter->second)
245  {
246  // in the region.
247  return(true);
248  }
249 
250  // This region ends before this position. The iterator needs to point
251  // to the region after the position, so increment it.
252  ++myRegionIter;
253  return(false);
254 }
bool inRegion(int32_t pos)
Return whether or not the position was found within a region.
void add(const char *chrom, int32_t start, int32_t end)
End position is not included in the region.
bool inRegion(const char *chrom, int32_t pos)
Return whether or not the position was found within a region.
void add(int32_t start, int32_t end)
End position is not included in the region.
This class contains a list of non-overlapping regions, just positions, not including chromosomes (see...