Unfit  3.1.1
Data fitting and optimization software
DataFileReader.hpp
1 // Unfit: Data fitting and optimization software
2 //
3 // Copyright (C) 2012- Dr Martin Buist & Dr Alberto Corrias
4 // Contacts: martin.buist _at_ nus.edu.sg; alberto _at_ nus.edu.sg
5 //
6 // See the 'Contributors' file for a list of those who have contributed
7 // to this work.
8 //
9 // This program is free software: you can redistribute it and/or modify
10 // it under the terms of the GNU General Public License as published by
11 // the Free Software Foundation, either version 3 of the License, or
12 // (at your option) any later version.
13 //
14 // This program is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with this program. If not, see <http://www.gnu.org/licenses/>.
21 //
22 #ifndef UNFIT_INCLUDE_DATAFILEREADER_HPP_
23 #define UNFIT_INCLUDE_DATAFILEREADER_HPP_
24 
25 #include <fstream>
26 #include <limits>
27 #include <sstream>
28 #include <string>
29 #include <vector>
30 
31 namespace Unfit
32 {
55 template <typename T>
57 {
66 
105  unsigned ReadFile(std::string file_name, unsigned skip = 0);
106 
118  void AddDelimiters(const std::string &new_delimiters);
119 
145  unsigned RetrieveColumn(std::size_t column_number, std::vector<T> &column,
146  bool return_incomplete_columns = false);
147 
161  std::vector<T> RetrieveDataRowWiseAsVector();
162 
182  void SplitLine(const std::string &line, const std::string &delimiters,
183  std::vector<std::string> &words);
184 
186  std::vector<std::vector<T>> data;
188  std::string default_delimiters;
189 };
190 
191 
192 template <class T>
194  data(),
195  default_delimiters("\t ,")
196 {}
197 
198 
199 template <class T>
200  unsigned DataFileReader<T>::ReadFile(std::string file_name, unsigned skip)
201 {
202  // Just in case this is not the first time, make sure we start from scratch
203  data.clear();
204  // Attempt to open the file, then check it is open
205  std::ifstream data_file(file_name.c_str());
206  if (!data_file.is_open()) return 1;
207  // Read in the data, one line at a time
208  while (data_file.good()) {
209  // Get a line
210  std::string line;
211  getline(data_file, line);
212  // Split the line based on the delimiters
213  std::vector<std::string> words;
214  SplitLine(line, default_delimiters, words);
215  // Skip the first "skip" numeric lines if requested
216  if (skip > 0) {
217  --skip;
218  continue;
219  }
220  // Convert to type T then store in a vector
221  std::vector<T> numbers;
222  for (auto entry = 0u; entry < words.size(); ++entry) {
223  std::stringstream ss;
224  ss << words[entry].c_str();
225  long double value;
226  // Check we have a number before including it
227  if (ss >> value) {
228  // Check the maximum size is okay
229  if (value > std::numeric_limits<T>::max()) {
230  data.clear();
231  return 2;
232  }
233  // Check the maximum negative size is okay
234  // Use std::numeric_limits::lowest when available
235  if (std::numeric_limits<T>::is_integer) {
236  if (value < std::numeric_limits<T>::min()) {
237  data.clear();
238  return 3;
239  }
240  }
241  else if (value <
242  -static_cast<long double>(std::numeric_limits<T>::max())) {
243  data.clear();
244  return 3;
245  }
246  numbers.push_back(static_cast<T>(value));
247  }
248  }
249  // If the line contained numbers, put these into a row of the data vector
250  if (!numbers.empty()) data.push_back( numbers );
251  }
252  if (data.empty()) return 4;
253  return 0;
254 }
255 
256 
257 template <class T>
258  void DataFileReader<T>::AddDelimiters(const std::string &new_delimiters)
259 {
260  default_delimiters += new_delimiters;
261 }
262 
263 
264 template <class T>
265  unsigned DataFileReader<T>::RetrieveColumn(std::size_t column_number,
266  std::vector<T> &column, bool return_incomplete_columns)
267 {
268  column.clear();
269  if (data.empty()) return 1;
270  std::size_t min_row_length = std::numeric_limits<std::size_t>::max();
271  std::size_t max_row_length = 0u;
272  for (auto d : data) {
273  if (d.size() < min_row_length) min_row_length = d.size();
274  if (d.size() > max_row_length) max_row_length = d.size();
275  }
276  if (column_number >= max_row_length) return 2; // Column does not exist
277  if (column_number >= min_row_length) { // Column is incomplete
278  if (return_incomplete_columns) {
279  for (auto i = 0u; i < data.size(); ++i) {
280  if (data[i].size() > column_number) {
281  column.push_back(data[i][column_number]);
282  }
283  }
284  }
285  return 3;
286  }
287  else {
288  column.resize(data.size());
289  for (auto i = 0u; i < data.size(); ++i) {
290  column[i] = data[i][column_number];
291  }
292  return 0;
293  }
294 }
295 
296 
297 template <class T>
299 {
300  // This does a range insert for every row of data in order
301  std::vector<T> result;
302  for (auto d : data) {
303  result.insert(end(result), begin(d), end(d));
304  }
305  return result;
306 }
307 
308 
309 template <class T>
310  void DataFileReader<T>::SplitLine(const std::string &line,
311  const std::string &delimiters, std::vector<std::string> &words)
312 {
313  // Get the first word from the line
314  auto word_start = line.find_first_not_of(delimiters, 0);
315  auto word_end = line.find_first_of(delimiters, word_start);
316 
317  // While we have not hit the end of the string
318  while (word_start != std::string::npos || word_end != std::string::npos) {
319  // Add the current word to the list (vector)
320  words.push_back(line.substr(word_start, (word_end-word_start)));
321  // Get the next word in the line
322  word_start = line.find_first_not_of(delimiters, word_end);
323  word_end = line.find_first_of(delimiters, word_start);
324  }
325 }
326 
327 } // namespace Unfit
328 
329 #endif
unsigned RetrieveColumn(std::size_t column_number, std::vector< T > &column, bool return_incomplete_columns=false)
Definition: DataFileReader.hpp:265
std::vector< T > RetrieveDataRowWiseAsVector()
Definition: DataFileReader.hpp:298
void AddDelimiters(const std::string &new_delimiters)
Definition: DataFileReader.hpp:258
std::vector< std::vector< T > > data
Definition: DataFileReader.hpp:186
Definition: Bounds.hpp:27
void SplitLine(const std::string &line, const std::string &delimiters, std::vector< std::string > &words)
Definition: DataFileReader.hpp:310
std::string default_delimiters
Definition: DataFileReader.hpp:188
DataFileReader()
Definition: DataFileReader.hpp:193
unsigned ReadFile(std::string file_name, unsigned skip=0)
Definition: DataFileReader.hpp:200
Reads in numeric data from a file.
Definition: DataFileReader.hpp:56