libStatGen Software  1
ReferenceSequenceTest.cpp
1 /*
2  * Copyright (C) 2011 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <getopt.h>
19 #include "Generic.h"
20 #include <stdio.h>
21 #include "ReferenceSequence.h"
22 #include "UnitTest.h"
23 
24 #include <assert.h>
25 #include <sstream>
26 
27 
29 {
30 public:
31  ReferenceSequenceTest(const char *title) : UnitTest(title) {;}
32  void test1();
33  void test2();
34  void test3();
35  void humanGenomeTest1();
36 
37  void test() {
38  test1();
39  test2();
40  test3();
41  // This test is very slow:
42  // humanGenomeTest1();
43  }
44 };
45 
46 void ReferenceSequenceTest::test1(void)
47 {
48  std::string sequence("ACTGACTGACTGACTGACTGACTGACTGACTGACTGACTG");
49  std::string word;
50 
51  word="ACTG";
52  check(m_failures, ++m_testNum, "Test wordMatch with std::string", true,
53  Sequence::wordMatch(sequence, 4, word));
54 
55  std::stringstream output;
56 
57  Sequence::printNearbyWords(output, sequence, 8, word, 4);
58 
59  std::string expect("\
60 word 'ACTG' found -4 away from position 8.\n\
61 word 'ACTG' found 0 away from position 8.\n\
62 ");
63 
64  check(m_failures, ++m_testNum, "Test printNearbyWords with std::string", expect, output.str());
65 
66 
67  Sequence::getString(sequence, 4, 4, word);
68 
69  check(m_failures, ++m_testNum, "Test getString with std::string", "ACTG", word);
70 
71  Sequence::getHighLightedString(sequence, 0, 12, word, 4, 8);
72  check(m_failures, ++m_testNum, "Test getHighLightedStribng with std::string", "ACTGactgACTG",word);
73 
74 #if 0
75  // busted test - don't know why
76  output.clear();
77  output.str(std::string());
78 // Sequence::printBaseContext(std::cout, sequence, 8, 4);
79  Sequence::printBaseContext(output, sequence, 8, 4);
80  expect="\
81 index: 8\n\
82 ACTGACTGA\n\
83  ^\n\
84 ";
85  check(m_failures, ++m_testNum, "Test printBaseContext with std::string", expect, output.str());
86 #endif
87  std::string result;
88  std::string read("ACTGZZZZACTG");
89  expect = " ^^^^ ";
90  Sequence::getMismatchHatString(sequence, 4, result, read);
91  check(m_failures, ++m_testNum, "Test getMismatchHatString with std::string", expect, result);
92 
93 
94  read="ACTG";
95  std::string quality("");
96  size_t location = Sequence::simpleLocalAligner(sequence, 0, read, quality, 12);
97  check(m_failures, ++m_testNum, "Test simpleLocalAligner with std::string", (size_t) 0, location);
98 
99  read="ACNG";
100  int misMatches = Sequence::getMismatchCount(sequence, 0, read);
101  check(m_failures, ++m_testNum, "Test getMismatchCount with std::string", 1, misMatches);
102 
103  read="ACNG";
104  quality="$$$$";
105  int sumQ = Sequence::getSumQ(sequence, 0, read, quality);
106  check(m_failures, ++m_testNum, "Test getSumQ with std::string", 3, sumQ);
107 }
108 
109 void ReferenceSequenceTest::test2(void)
110 {
111  PackedSequenceData sequence;
112  std::string word;
113 
114  sequence.push_back('A');
115  sequence.push_back('C');
116  sequence.push_back('T');
117  sequence.push_back('G');
118 
119  sequence.push_back('A');
120  sequence.push_back('C');
121  sequence.push_back('T');
122  sequence.push_back('G');
123 
124  sequence.push_back('A');
125  sequence.push_back('C');
126  sequence.push_back('T');
127  sequence.push_back('G');
128 
129  sequence.push_back('A');
130  sequence.push_back('C');
131  sequence.push_back('T');
132  sequence.push_back('G');
133 
134  Sequence::getString(sequence, 4, 4, word);
135 
136  check(m_failures, ++m_testNum, "Test getString with PackedSequenceData", "ACTG", word);
137 
138  std::cout << "test2 sequence utilization is " << sequence.getUtilization() * 100 << "% - expect around 6.25%" << std::endl;
139 
140 }
141 
142 void ReferenceSequenceTest::test3(void)
143 {
144  std::vector<PackedSequenceData> chromosomeSequence;
145  std::vector<std::string> chromosomeNames;
146 
147  bool result = loadFastaFile("../phiX.fa", chromosomeSequence, chromosomeNames);
148 
149  if(result) {
150  std::cout << "../phiX.fa not found - skipping these tests." << std::endl;
151  return;
152  }
153 
154  std::cout << "phiX reference utilization is " << chromosomeSequence[0].getUtilization() * 100 << "% - expect around 96.8%" << std::endl;
155 
156 
157 
158  check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeNames.size());
159  check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", (size_t) 1, chromosomeSequence.size());
160  check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "1", chromosomeNames[0]);
161 
162  std::string word;
163 
164  Sequence::getString(chromosomeSequence[0], 60, 10, word);
165 
166  check(m_failures, ++m_testNum, "Test loadFastaFile with PackedSequenceData", "AAATTATCTT", word);
167 
168 }
169 
170 void ReferenceSequenceTest::humanGenomeTest1(void)
171 {
172  std::vector<PackedSequenceData> chromosomeSequence;
173  std::vector<std::string> chromosomeNames;
174 
175 #define HUMAN_GENOME "/data/local/ref/karma.ref/human.g1k.v37.fa"
176  bool result = loadFastaFile(HUMAN_GENOME, chromosomeSequence, chromosomeNames);
177 
178  if(result) {
179  std::cout << HUMAN_GENOME << " not found - skipping these tests." << std::endl;
180  return;
181  }
182 
183 }
184 
185 int main(int argc, char **argv)
186 {
187  ReferenceSequenceTest test("ReferenceSequenceTest");
188 
189  test.test();
190 
191  std::cout << test;
192 
193  exit(test.getFailureCount());
194 }