Qore CsvUtil Module Reference  1.1
 All Classes Namespaces Functions Variables Groups Pages
CsvUtil.qm.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // @file CsvUtil.qm Qore user module for working with CSV files
3 
4 /* CsvUtil.qm Copyright 2012 - 2013 Qore Technologies, sro
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // this module requires Qore 0.8.8 or better
26 
27 // turn on perl-style boolean evaluation
28 
29 
30 /* Version History
31  * 2013-09-29 v1.2: David Nichols <david@qore.org>:
32  + if "headers" are not given in the CsvUtil::CsvFileIterator::constructor() but "fields" are, then set the headers from the field descriptions automatically
33 
34  * 2013-06-28 v1.2: Petr Vanek <petr.vanek@qoretechnologies.com>
35  + CsvFileWriter addon
36 
37  * 2013-05-15 v1.1: David Nichols <david@qore.org>:
38  + bug fixes to header and fields option processing
39  + fixed CsvUtil::CsvFileIterator::index() to return the line index
40  + added CsvUtil::CsvFileIterator::lineNumber() to return the current line number in the file
41 
42  * 2012-10-13 v1.0: David Nichols <david@qore.org>:
43  + initial version of module
44 */
45 
117 // private class used to iterate a list and skip elements without any value
118 class ListValueIterator : public ListIterator {
119 
120 public:
121  constructor(*list l);
122 
123 
124  bool next();
125 
126 };
127 
128 class CsvHelper {
129 
130 public:
131 
132  private :
134  const Types = (
135  "int": True,
136  "float": True,
137  "number": True,
138  "string": True,
139  "date": True,
140  );
141 
143  const FieldAttrs = ("type", "format", "timezone", "code");
144 
145 
146 public:
147 
149 
150 private:
151  setFields();
152 public:
153 
154 
155  static checkType(string key, string value);
156 
157 }; // class CsvHelper
158 
160 namespace CsvUtil {
161 
163  const EOL_UNIX = "\n";
165  const EOL_WIN = "\r\n";
167  const EOL_MACINTOSH = "\r";
168 
169  // helper list of end of line values
170  const EOLS = (EOL_UNIX, EOL_WIN, EOL_MACINTOSH, );
171 
173 
306 class CsvFileIterator : public Qore::FileLineIterator, private CsvHelper {
307 
308 public:
309  private :
311  const Options = (
312  "encoding": True,
313  "separator": True,
314  "quote": True,
315  "eol": True,
316  "ignore-empty": True,
317  "ignore-whitespace": True,
318  "header-lines": True,
319  "header-names": True,
320  "headers": True,
321  "verify-columns": True,
322  "fields": True,
323  "timezone": True,
324  );
325 
326 public:
327 
328  private :
329  // field separator
330  string separator = ",";
331 
332  // field content delimiter
333  string quote = "\"";
334 
335  // number of header lines
336  softint headerLines = 0;
337 
338  // flag to use string names from the first header row if possible
339  bool headerNames = False;
340 
341  // True if empty lines should be ignored
342  bool ignoreEmptyLines = True;
343 
344  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
345  bool ignoreWhitespace = True;
346 
347  // headers / column names for lines iterated
348  *softlist headers;
349 
350  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
351  *hash fields;
352 
353  // list of field descriptions (from fields, ordered when headers are set)
354  *list fdesc;
355 
356  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
357  *TimeZone tz;
358 
359  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
360  bool checkElementCounts = False;
361 
362  // column count for verifying column counts
363  int cc;
364 
365  // current record count for the index() method
366  int rc = 0;
367 
368 public:
369 
371 
376  constructor(string path, *hash opts);
377 
378 
380 
393  bool next();
394 
395 
397 
404  any memberGate(string name);
405 
406 
408 
419  any getValue();
420 
421 
423 
432  hash getRecord();
433 
434 
436 
446 
447 
449 
456  string getSeparator();
457 
458 
460 
467  string getQuote();
468 
469 
471 
477  *list getHeaders();
478 
479 
481 
492  int index();
493 
494 
496 
511  int lineNumber();
512 
513 
515 
516 private:
517  list parseLine();
518 public:
519 
520 
521  }; // CsvFileIterator class
522 
523 
525 
576 class CsvFileWriter : private CsvHelper {
577 
578 public:
579  private :
581  const Options = (
582  "encoding": True,
583  "separator": True,
584  "quote": True,
585  "eol": True,
586  "verify-columns": True,
587  "fields": True,
588  "headers": True,
589  "date-format": True,
590  );
591 
592 public:
593 
594  private :
595  // TODO/FIXME
596  string encoding = get_default_encoding();
597 
598  // field separator
599  string separator = ",";
600 
601  // field content delimiter
602  string quote = "\"";
603 
604  // end of line sequence
605  string eol = EOL_UNIX;
606 
607  // default date->string format
608  string dateFormat = 'DD/MM/YYYY hh:mm:SS';
609 
610  // headers / column names for lines iterated
611  *softlist headers;
612 
613  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
614  *hash fields;
615 
616  // list of field descriptions (from fields, ordered when headers are set)
617  *list fdesc;
618 
619  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
620  bool checkElementCounts = False;
621 
622  // a file to write
623  File file;
624 
625  // the latest line number
626  int lineNo = 0;
627 
628  // base template for value format
629  string baseTemplate;
630 
631 public:
632 
634 
640  constructor(string path, *hash opts);
641 
642 
644 
649  writeLine(list values);
650 
651 
653 
658  writeLine(hash values);
659 
660 
662 
669  write(AbstractIterator iterator);
670 
671 
672  // real write to file - a list to write. Without any checking.
673 
674 private:
675  writeRawLine(list values);
676 public:
677 
678 
679  // format date to string by options. Priority: 1) column definition 2) standard format
680 
681 private:
682  string dateFormat(int ix);
683 public:
684 
685 
686  }; // CsvFileWriter class
687 
688 }; // CsvUtil namespace
689 
string get_default_encoding()
writeLine(list values)
write a line with list of values. Data are checked against column rules.
write(AbstractIterator iterator)
stream iterator into the file.
list getRecordList()
returns the current record as a list
const True
int lineNumber()
returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
constructor(string path, *hash opts)
creates the CsvFileIterator with the path of the file to read and optionally an option hash ...
const False
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
list list(...)
the CsvFileIterator class allows CSV files to be iterated
Definition: CsvUtil.qm.dox.h:306
any getValue()
returns the current record as a hash
string getQuote()
returns the current quote string
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:581
any memberGate(string name)
returns the given column value for the current row
list parseLine()
parses a line in the file and returns a processed list of the fields
const EOL_MACINTOSH
Old (pre-OSX) Macintosh end of line character sequence.
Definition: CsvUtil.qm.dox.h:167
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:311
const EOL_UNIX
Unix end of line character sequence (for new OS X too)
Definition: CsvUtil.qm.dox.h:163
const EOL_WIN
MS DOS/Windows end of line character sequence.
Definition: CsvUtil.qm.dox.h:165
*list getHeaders()
returns the current column headers or NOTHING if no headers have been detected or saved yet ...
constructor(string path, *hash opts)
creates the CsvFileWriter with the path of the file to read with an options
int index()
returns the row index being iterated, which does not necessarily correspond to the line number when t...
the CsvFileWriter class for easy and safe CSV file creation
Definition: CsvUtil.qm.dox.h:576
hash getRecord()
returns the current record as a hash
hash hash(object obj)
string getSeparator()
returns the current separator string