Skip to content

Commit

Permalink
Minor clean ups
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Apr 7, 2015
1 parent 052b05e commit 0862aed
Show file tree
Hide file tree
Showing 6 changed files with 41 additions and 49 deletions.
19 changes: 10 additions & 9 deletions src/CollectorCharacter.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,29 @@ class CollectorCharacter : public Collector {
cetype_t encoding_;

public:
CollectorCharacter(): Collector(CharacterVector()), encoding_(CE_NATIVE) {
CollectorCharacter(): Collector(Rcpp::CharacterVector()), encoding_(CE_NATIVE) {
}

void setValue(int i, const Token& t) {
SET_STRING_ELT(column_, i, parse(t));
}

SEXP parse(const Token& t) {
Rcpp::RObject charsxp;
switch(t.type()) {
case TOKEN_STRING: {
boost::container::string buffer;
SourceIterators string = t.getString(&buffer);
return Rf_mkCharLenCE(string.first, string.second - string.first, encoding_);
charsxp = Rf_mkCharLenCE(string.first, string.second - string.first, encoding_);
break;
};
case TOKEN_MISSING:
return NA_STRING;
charsxp = NA_STRING;
break;
case TOKEN_EMPTY:
return Rf_mkChar("");
charsxp = Rf_mkChar("");
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
}
return NA_STRING;

SET_STRING_ELT(column_, i, charsxp);
}

};
Expand Down
40 changes: 15 additions & 25 deletions src/CollectorDouble.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,55 +11,45 @@ namespace qi = boost::spirit::qi;
#include "Collector.h"

class CollectorDouble : public Collector {
double* data_;
boost::container::string buffer_;

public:
CollectorDouble(): Collector(Rcpp::NumericVector()) {
}

virtual void resize(int n) {
Collector::resize(n);
data_ = REAL(column_);
}

void setValue(int i, const Token& t) {
data_[i] = parse(t);
}

double parse(const Token& t) {
switch(t.type()) {
case TOKEN_STRING: {
boost::container::string buffer;
SourceIterators string = t.getString(&buffer);
SourceIterators str = t.getString(&buffer_);

bool ok = qi::parse(str.first, str.second, qi::double_, REAL(column_)[i]);
if (!ok) {
REAL(column_)[i] = NA_REAL;
warn(t.row(), t.col(), "a double", str);
}

std::pair<bool,double> parsed = parse(string.first, string.second);
if (!parsed.first)
warn(t.row(), t.col(), "a double", string);
if (str.first != str.second)
warn(t.row(), t.col(), "no trailing characters", str);

return parsed.second;
return;
}
case TOKEN_MISSING:
case TOKEN_EMPTY:
return NA_REAL;
REAL(column_)[i] = NA_REAL;
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
}

return 0;
}

static bool canParse(const std::string& x) {
return CollectorDouble::parse(x.begin(), x.end()).first;
}

template <class Iter>
static std::pair<bool,double> parse(Iter begin, Iter end) {
double res = 0;

bool ok = qi::parse(begin, end, qi::double_, res) && begin == end;
return std::make_pair(ok, ok ? res : NA_REAL);
std::string::const_iterator begin = x.begin(), end = x.end();
return qi::parse(begin, end, qi::double_, res) && begin == end;
}

};

#endif
4 changes: 2 additions & 2 deletions src/CollectorFactor.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class CollectorFactor : public Collector {
Rcpp::CharacterVector levels_;
std::map<std::string,int> levelset_;
bool ordered_;
boost::container::string buffer_;

public:
CollectorFactor(Rcpp::CharacterVector levels, bool ordered):
Expand All @@ -29,8 +30,7 @@ class CollectorFactor : public Collector {
int parse(const Token& t) {
switch(t.type()) {
case TOKEN_STRING: {
boost::container::string buffer;
SourceIterators string = t.getString(&buffer);
SourceIterators string = t.getString(&buffer_);

std::string std_string(string.first, string.second);
std::map<std::string,int>::iterator it = levelset_.find(std_string);
Expand Down
21 changes: 9 additions & 12 deletions src/CollectorNumeric.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,38 @@ class CollectorNumeric : public Collector {
}

void setValue(int i, const Token& t) {
REAL(column_)[i] = parse(t);
}

double parse(const Token& t) {
switch(t.type()) {
case TOKEN_STRING: {
boost::container::string buffer;
SourceIterators string = t.getString(&buffer);

std::pair<bool,double> parsed = parse(string.first, string.second);
if (!parsed.first)
if (!parse(string.first, string.second, &REAL(column_)[i])) {
warn(t.row(), t.col(), "a number", string);
return parsed.second;
REAL(column_)[i] = NA_REAL;
}
break;
}
case TOKEN_MISSING:
case TOKEN_EMPTY:
return NA_REAL;
REAL(column_)[i] = NA_REAL;
break;
case TOKEN_EOF:
Rcpp::stop("Invalid token");
}

return 0;
}

private:

template <class Iter>
static std::pair<bool,double> parse(Iter begin, Iter end) {
static bool parse(Iter begin, Iter end, double* pEnd) {
std::string clean;
for (Iter cur = begin; cur != end; ++cur) {
if (*cur == '-' || *cur == '.' || (*cur >= '0' && *cur <= '9'))
clean.push_back(*cur);
}

return CollectorDouble::parse(clean.begin(), clean.end());
std::string::const_iterator cbegin = clean.begin(), cend = clean.end();
return qi::parse(cbegin, cend, qi::double_, *pEnd) && cbegin == cend;
}

};
Expand Down
2 changes: 1 addition & 1 deletion src/DateTime.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class DateTime {
DateTime(int year, int mon, int day, int hour = 0, int min = 0, int sec = 0,
double psec = 0, const std::string& tz = ""):
year_(year), mon_(mon), day_(day), hour_(hour), min_(min), sec_(sec),
psec_(psec), offset_(0), tz_(tz) {
offset_(0), psec_(psec), tz_(tz) {
}

// Used to add time zone offsets which can only be easily applied once
Expand Down
4 changes: 4 additions & 0 deletions src/TokenizerLog.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ class TokenizerLog : public Tokenizer {
case LOG_STRING:
return fieldToken(token_begin + 1, end_, row, col);

case LOG_ESCAPE:
warn(row, col, "closing escape at end of file");
return fieldToken(token_begin + 1, end_, row, col);

case LOG_DATE:
warn(row, col, "closing ] at end of file");
return fieldToken(token_begin + 1, end_, row, col);
Expand Down

0 comments on commit 0862aed

Please sign in to comment.