From 0862aedd77cde6f456c0179a9f1abb79fd3a537b Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 7 Apr 2015 14:14:34 -0500 Subject: [PATCH] Minor clean ups --- src/CollectorCharacter.h | 19 ++++++++++--------- src/CollectorDouble.h | 40 +++++++++++++++------------------------- src/CollectorFactor.h | 4 ++-- src/CollectorNumeric.h | 21 +++++++++------------ src/DateTime.h | 2 +- src/TokenizerLog.h | 4 ++++ 6 files changed, 41 insertions(+), 49 deletions(-) diff --git a/src/CollectorCharacter.h b/src/CollectorCharacter.h index ab252daa..53a164b3 100644 --- a/src/CollectorCharacter.h +++ b/src/CollectorCharacter.h @@ -8,28 +8,29 @@ class CollectorCharacter : public Collector { cetype_t encoding_; public: - CollectorCharacter(): Collector(CharacterVector()), encoding_(CE_NATIVE) { + CollectorCharacter(): Collector(Rcpp::CharacterVector()), encoding_(CE_NATIVE) { } void setValue(int i, const Token& t) { - SET_STRING_ELT(column_, i, parse(t)); - } - - SEXP parse(const Token& t) { + Rcpp::RObject charsxp; switch(t.type()) { case TOKEN_STRING: { boost::container::string buffer; SourceIterators string = t.getString(&buffer); - return Rf_mkCharLenCE(string.first, string.second - string.first, encoding_); + charsxp = Rf_mkCharLenCE(string.first, string.second - string.first, encoding_); + break; }; case TOKEN_MISSING: - return NA_STRING; + charsxp = NA_STRING; + break; case TOKEN_EMPTY: - return Rf_mkChar(""); + charsxp = Rf_mkChar(""); + break; case TOKEN_EOF: Rcpp::stop("Invalid token"); } - return NA_STRING; + + SET_STRING_ELT(column_, i, charsxp); } }; diff --git a/src/CollectorDouble.h b/src/CollectorDouble.h index 55fd0227..7531281c 100644 --- a/src/CollectorDouble.h +++ b/src/CollectorDouble.h @@ -11,7 +11,7 @@ namespace qi = boost::spirit::qi; #include "Collector.h" class CollectorDouble : public Collector { - double* data_; + boost::container::string buffer_; public: CollectorDouble(): Collector(Rcpp::NumericVector()) { @@ -19,47 +19,37 @@ class CollectorDouble : public Collector { virtual void resize(int n) { Collector::resize(n); - data_ = REAL(column_); } void setValue(int i, const Token& t) { - data_[i] = parse(t); - } - - double parse(const Token& t) { switch(t.type()) { case TOKEN_STRING: { - boost::container::string buffer; - SourceIterators string = t.getString(&buffer); + SourceIterators str = t.getString(&buffer_); + + bool ok = qi::parse(str.first, str.second, qi::double_, REAL(column_)[i]); + if (!ok) { + REAL(column_)[i] = NA_REAL; + warn(t.row(), t.col(), "a double", str); + } - std::pair parsed = parse(string.first, string.second); - if (!parsed.first) - warn(t.row(), t.col(), "a double", string); + if (str.first != str.second) + warn(t.row(), t.col(), "no trailing characters", str); - return parsed.second; + return; } case TOKEN_MISSING: case TOKEN_EMPTY: - return NA_REAL; + REAL(column_)[i] = NA_REAL; + break; case TOKEN_EOF: Rcpp::stop("Invalid token"); } - - return 0; } - static bool canParse(const std::string& x) { - return CollectorDouble::parse(x.begin(), x.end()).first; - } - - template - static std::pair parse(Iter begin, Iter end) { double res = 0; - - bool ok = qi::parse(begin, end, qi::double_, res) && begin == end; - return std::make_pair(ok, ok ? res : NA_REAL); + std::string::const_iterator begin = x.begin(), end = x.end(); + return qi::parse(begin, end, qi::double_, res) && begin == end; } - }; #endif diff --git a/src/CollectorFactor.h b/src/CollectorFactor.h index 2cc0a83b..2c31548c 100644 --- a/src/CollectorFactor.h +++ b/src/CollectorFactor.h @@ -8,6 +8,7 @@ class CollectorFactor : public Collector { Rcpp::CharacterVector levels_; std::map levelset_; bool ordered_; + boost::container::string buffer_; public: CollectorFactor(Rcpp::CharacterVector levels, bool ordered): @@ -29,8 +30,7 @@ class CollectorFactor : public Collector { int parse(const Token& t) { switch(t.type()) { case TOKEN_STRING: { - boost::container::string buffer; - SourceIterators string = t.getString(&buffer); + SourceIterators string = t.getString(&buffer_); std::string std_string(string.first, string.second); std::map::iterator it = levelset_.find(std_string); diff --git a/src/CollectorNumeric.h b/src/CollectorNumeric.h index c4bfa93d..258f3ac0 100644 --- a/src/CollectorNumeric.h +++ b/src/CollectorNumeric.h @@ -13,41 +13,38 @@ class CollectorNumeric : public Collector { } void setValue(int i, const Token& t) { - REAL(column_)[i] = parse(t); - } - - double parse(const Token& t) { switch(t.type()) { case TOKEN_STRING: { boost::container::string buffer; SourceIterators string = t.getString(&buffer); - std::pair parsed = parse(string.first, string.second); - if (!parsed.first) + if (!parse(string.first, string.second, &REAL(column_)[i])) { warn(t.row(), t.col(), "a number", string); - return parsed.second; + REAL(column_)[i] = NA_REAL; + } + break; } case TOKEN_MISSING: case TOKEN_EMPTY: - return NA_REAL; + REAL(column_)[i] = NA_REAL; + break; case TOKEN_EOF: Rcpp::stop("Invalid token"); } - - return 0; } private: template - static std::pair parse(Iter begin, Iter end) { + static bool parse(Iter begin, Iter end, double* pEnd) { std::string clean; for (Iter cur = begin; cur != end; ++cur) { if (*cur == '-' || *cur == '.' || (*cur >= '0' && *cur <= '9')) clean.push_back(*cur); } - return CollectorDouble::parse(clean.begin(), clean.end()); + std::string::const_iterator cbegin = clean.begin(), cend = clean.end(); + return qi::parse(cbegin, cend, qi::double_, *pEnd) && cbegin == cend; } }; diff --git a/src/DateTime.h b/src/DateTime.h index 78de2645..58d6a0a0 100644 --- a/src/DateTime.h +++ b/src/DateTime.h @@ -57,7 +57,7 @@ class DateTime { DateTime(int year, int mon, int day, int hour = 0, int min = 0, int sec = 0, double psec = 0, const std::string& tz = ""): year_(year), mon_(mon), day_(day), hour_(hour), min_(min), sec_(sec), - psec_(psec), offset_(0), tz_(tz) { + offset_(0), psec_(psec), tz_(tz) { } // Used to add time zone offsets which can only be easily applied once diff --git a/src/TokenizerLog.h b/src/TokenizerLog.h index 8118a9fa..d31c74a9 100644 --- a/src/TokenizerLog.h +++ b/src/TokenizerLog.h @@ -138,6 +138,10 @@ class TokenizerLog : public Tokenizer { case LOG_STRING: return fieldToken(token_begin + 1, end_, row, col); + case LOG_ESCAPE: + warn(row, col, "closing escape at end of file"); + return fieldToken(token_begin + 1, end_, row, col); + case LOG_DATE: warn(row, col, "closing ] at end of file"); return fieldToken(token_begin + 1, end_, row, col);