Qore CsvUtil Module Reference  1.4
CsvUtil.qm.dox.h
1 // -*- mode: c++; indent-tabs-mode: nil -*-
2 // @file CsvUtil.qm Qore user module for working with CSV files
3 
4 /* CsvUtil.qm Copyright 2012 - 2014 Qore Technologies, sro
5 
6  Permission is hereby granted, free of charge, to any person obtaining a
7  copy of this software and associated documentation files (the "Software"),
8  to deal in the Software without restriction, including without limitation
9  the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  and/or sell copies of the Software, and to permit persons to whom the
11  Software is furnished to do so, subject to the following conditions:
12 
13  The above copyright notice and this permission notice shall be included in
14  all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  DEALINGS IN THE SOFTWARE.
23 */
24 
25 // minimum required Qore version
26 
27 
28 /* see release notes below for version history
29 */
30 
137 // private class used to iterate a list and skip elements without any value
138 class ListValueIterator : public ListIterator {
139 
140 public:
141  constructor(*list l);
142 
143 
144  bool next();
145 
146 };
147 
148 class CsvHelper {
149 
150 public:
151  private :
153  const Types = (
154  "int": True,
155  "*int": True,
156  "float": True,
157  "*float": True,
158  "number": True,
159  "*number": True,
160  "string": True,
161  "*string": True,
162  "date": True,
163  "*date": True,
164  );
165 
167  const FieldAttrs = ("type", "format", "timezone", "code");
168 
169 public:
170 
171  // sets field description list
172  private setFields();
173 
174 
175  checkType(string key, string value);
176 
177 }; // class CsvHelper
178 
180 namespace CsvUtil {
182  const EOL_UNIX = "\n";
184  const EOL_WIN = "\r\n";
186  const EOL_MACINTOSH = "\r";
187 
188  // helper list of end of line values
189  const EOLS = (EOL_UNIX, EOL_WIN, EOL_MACINTOSH, );
190 
192 
349 class CsvAbstractIterator : public Qore::AbstractIterator, private CsvHelper {
350 
351 public:
352  private :
354  const Options = (
355  "encoding": True,
356  "separator": True,
357  "quote": True,
358  "eol": True,
359  "ignore-empty": True,
360  "ignore-whitespace": True,
361  "header-lines": True,
362  "header-names": True,
363  "headers": True,
364  "verify-columns": True,
365  "fields": True,
366  "timezone": True,
367  "tolwr": True,
368  );
369 
370  // field separator
371  string separator = ",";
372 
373  // field content delimiter
374  string quote = "\"";
375 
376  // number of header lines
377  softint headerLines = 0;
378 
379  // flag to use string names from the first header row if possible
380  bool headerNames = False;
381 
382  // True if empty lines should be ignored
383  bool ignoreEmptyLines = True;
384 
385  // Flag to trim the field content (trim leading and trailing whitespace) from unquoted fields
386  bool ignoreWhitespace = True;
387 
388  // flag to convert all header names to lower case
389  bool tolwr = False;
390 
391  // headers / column names for lines iterated
392  *softlist headers;
393 
394  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
395  *hash fields;
396 
397  // list of field descriptions (from fields, ordered when headers are set)
398  *list fdesc;
399 
400  // the @ref Qore::TimeZone to use when parsing dates (default: current time zone)
401  *TimeZone tz;
402 
403  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
404  bool checkElementCounts = False;
405 
406  // column count for verifying column counts
407  int cc;
408 
409  // current record count for the index() method
410  int rc = 0;
411 
412 public:
413 
415 
419  constructor(*hash opts);
420 
421 
423  private *string getDataName();
424 
425 
427  private abstract int lineNumberImpl();
428 
430  private abstract string getLineValueImpl();
431 
433  private abstract bool nextLineImpl();
434 
436 
441  bool next();
442 
443 
445 
452  any memberGate(string name);
453 
454 
456 
465  hash getValue();
466 
467 
469 
478  hash getRecord();
479 
480 
482 
492 
493 
495 
502  string getSeparator();
503 
504 
506 
513  string getQuote();
514 
515 
517 
523  *list getHeaders();
524 
525 
527 
538  int index();
539 
540 
542 
557  int lineNumber();
558 
559 
560  private any handleType(hash fh, *string val);
561 
562 
564  private list parseLine();
565 
566  };
567 
569 
575 
576 public:
578 
583  constructor(string path, *hash opts);
584 
585 
587  private *string getDataName();
588 
589 
591  private int lineNumberImpl();
592 
593 
595  private string getLineValueImpl();
596 
597 
599  private bool nextLineImpl();
600 
601  }; // CsvFileIterator class
602 
604 
610 
611 public:
612  private :
614  string data;
616  *string eol;
618  *string line;
620  int pos = 0;
622  int lineno = 0;
624  bool valid = False;
625 
626 public:
627 
629 
634  constructor(string data, *hash opts);
635 
636 
638 
646  bool valid();
647 
648 
650  private int lineNumberImpl();
651 
652 
654  private string getLineValueImpl();
655 
656 
658  private bool nextLineImpl();
659 
660  };
661 
663 
734 class AbstractCsvWriter : private CsvHelper {
735 
736 public:
737  private :
739  const Options = (
740  "encoding": True,
741  "separator": True,
742  "quote": True,
743  "eol": True,
744  "verify-columns": True,
745  "fields": True,
746  "headers": True,
747  "date-format": True,
748  "write-headers": True,
749  "optimal-quotes": True,
750  );
751 
752  // TODO/FIXME
753  string encoding = get_default_encoding();
754 
755  // field separator
756  string separator = ",";
757 
758  // field content delimiter
759  string quote = "\"";
760 
761  // end of line sequence
762  string eol = EOL_UNIX;
763 
764  // default date->string format
765  string dateFormat = 'DD/MM/YYYY hh:mm:SS';
766 
767  // headers / column names for lines iterated
768  *softlist headers;
769 
770  // hash of field information (types, formats, and possible code), hash key = column name or number (starting with 0)
771  *hash fields;
772 
773  // list of field descriptions (from fields, ordered when headers are set)
774  *list fdesc;
775 
776  // verify the column count for every row; if a row does not match, then throw a \c CSVFILEITERATOR-DATA-ERROR exception
777  bool checkElementCounts = False;
778 
779  // the latest line number
780  int lineNo = 0;
781 
782  // base template for value format
783  string baseTemplate;
784 
785  string errname;
786 
787  // this flag determines if any stored headers are output
788  bool write_headers = True;
789 
790  // stores the optimal quotes option
791  bool optimal_quotes = True;
792 
793 public:
794 
796 
802  constructor(string errname, *hash opts);
803 
804 
806 
811  writeLine(list values);
812 
813 
815 
820  writeLine(hash values);
821 
822 
824 
831  write(AbstractIterator iterator);
832 
833 
835  abstract private writeRawLine(list values);
836 
838 
842  private string prepareRawLine(list values);
843 
844 
846  private string dateFormat(int ix);
847 
848 
849  }; // AbstractCsvWriter class
850 
853 
854 public:
855 
856  private :
857  // a file to write
858  File file;
859 
860 public:
861 
863 
871  constructor(string path, *hash opts);
872 
873 
874  private writeRawLine(list values);
875 
876 
877  }; // CsvFileWriter
878 
881 
882 public:
883 
884  private :
885  // a csv content
886  string content;
887 
888 public:
889 
891 
896  constructor(*hash opts);
897 
898 
899  private writeRawLine(list values);
900 
901 
903  string getContent();
904 
905 
906  }; // CsvStringWriter
907 
908 }; // CsvUtil namespace
909 
private writeRawLine(list values)
real write implementation. Without any checking.
string getQuote()
returns the current quote string
string get_default_encoding()
list getRecordList()
returns the current record as a list
constructor(string data, *hash opts)
creates the CsvDataIterator with the input data and optionally an option hash
constructor(*hash opts)
creates the CsvStringWriter with content in the memory
private string prepareRawLine(list values)
Prepare a string (line with EOF) with formatting and escaping.
private list parseLine()
parses a line in the file and returns a processed list of the fields
the CsvAbstractIterator class is an abstract base class that allows abstract CSV data to be iterated ...
Definition: CsvUtil.qm.dox.h:349
*list getHeaders()
returns the current column headers or NOTHING if no headers have been detected or saved yet ...
write(AbstractIterator iterator)
stream iterator into the file.
private *string getDataName()
Returns the name of the input data.
*string eol
EOL marker.
Definition: CsvUtil.qm.dox.h:616
int lineNumber()
returns the current iterator line number in the file (the first line is line 1) or 0 if not pointing ...
hash getValue()
returns the current record as a hash
constructor(string errname, *hash opts)
creates the AbstractCsvWriter
hash getRecord()
returns the current record as a hash
bool next()
Moves the current line / record position to the next line / record; returns False if there are no mor...
private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const True
private writeRawLine(list values)
real write implementation. Without any checking.
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:354
int lineno
current line number
Definition: CsvUtil.qm.dox.h:622
int pos
current byte pos
Definition: CsvUtil.qm.dox.h:620
private int lineNumberImpl()
Returns the current line number.
constructor(string path, *hash opts)
creates the CsvFileIterator with the path of the file to read and optionally an option hash ...
const False
private *string getDataName()
Returns the name of the input data.
list list(...)
const Options
valid options for the object (a hash for quick lookups of valid keys)
Definition: CsvUtil.qm.dox.h:739
abstract private string getLineValueImpl()
Returns the current line.
the CsvFileIterator class allows CSV files to be iterated on a record basis
Definition: CsvUtil.qm.dox.h:574
string getContent()
Get the current in-memory content as a string.
constructor(*hash opts)
creates the CsvAbstractIterator with an option hash
the CsvStringWriter class for in-memory string CSV creation
Definition: CsvUtil.qm.dox.h:880
the AbstractCsvWriter class provides a parent for all CSV writers
Definition: CsvUtil.qm.dox.h:734
const EOL_MACINTOSH
Old (pre-OSX) Macintosh end of line character sequence.
Definition: CsvUtil.qm.dox.h:186
any memberGate(string name)
returns the given column value for the current row
private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
const EOL_UNIX
Unix end of line character sequence (for new OS X too)
Definition: CsvUtil.qm.dox.h:182
*string line
current line
Definition: CsvUtil.qm.dox.h:618
private int lineNumberImpl()
Returns the current line number; returns 0 if not pointing at any data.
writeLine(list values)
write a line with list of values. Data are checked against column rules.
int index()
returns the row index being iterated, which does not necessarily correspond to the line number when t...
string getSeparator()
returns the current separator string
private string getLineValueImpl()
Returns the current line trimmed of the EOL character(s)
const EOL_WIN
MS DOS/Windows end of line character sequence.
Definition: CsvUtil.qm.dox.h:184
the CsvDataIterator class allows arbitrary CSV string data to be iterated on a record basis ...
Definition: CsvUtil.qm.dox.h:609
bool valid()
returns True if the iterator is currently pointing at a valid element, False if not ...
constructor(string path, *hash opts)
creates the CsvFileWriter with the path of the file to read with an options
abstract private bool nextLineImpl()
Moves the current line / record position to the next line / record; returns False if there are no mor...
abstract private int lineNumberImpl()
Returns the current line number.
string data
input data
Definition: CsvUtil.qm.dox.h:614
the CsvUtil namespace contains all the objects in the CsvUtil module
Definition: CsvUtil.qm.dox.h:180
the CsvFileWriter class for easy and safe CSV file creation
Definition: CsvUtil.qm.dox.h:852
hash hash(object obj)
private string getLineValueImpl()
Returns the current line trimmed of the EOL character(s)
abstract private writeRawLine(list values)
real write implementation. Without any checking.