Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisSandison committed Jun 19, 2018
2 parents d06fe86 + 411a494 commit 3e2bbda
Show file tree
Hide file tree
Showing 10 changed files with 424 additions and 175 deletions.
17 changes: 17 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
*.gem

/.bundle/
/.yardoc
/Gemfile.lock
/_yardoc/
/doc/
/pkg/
/spec/reports/
/tmp/
/vendor/
*.bundle
*.so
*.o
*.a
dockercfg.env

mkmf.log
.idea
8 changes: 8 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM brendan6/ruby:2.2.4
MAINTAINER Chris Sandison <chris@thinkdataworks.com>

ADD . $APP_HOME

RUN bundle install

CMD bundle exec rspec
7 changes: 7 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
source 'https://rubygems.org'

gemspec

group :development, :test do
gem 'pry-byebug'
end
67 changes: 67 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
PATH
remote: .
specs:
csv_sniffer (0.2.1)

GEM
remote: https://rubygems.org/
specs:
activesupport (5.2.0)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 0.7, < 2)
minitest (~> 5.1)
tzinfo (~> 1.1)
byebug (10.0.2)
coderay (1.1.2)
concurrent-ruby (1.0.5)
diff-lcs (1.3)
factory_girl (4.9.0)
activesupport (>= 3.0.0)
i18n (1.0.1)
concurrent-ruby (~> 1.0)
method_source (0.9.0)
minitest (5.11.3)
pry (0.11.3)
coderay (~> 1.1.0)
method_source (~> 0.9.0)
pry-byebug (3.6.0)
byebug (~> 10.0)
pry (~> 0.10)
rack (2.0.5)
rack-test (1.0.0)
rack (>= 1.0, < 3)
rake (10.5.0)
rspec (3.7.0)
rspec-core (~> 3.7.0)
rspec-expectations (~> 3.7.0)
rspec-mocks (~> 3.7.0)
rspec-core (3.7.1)
rspec-support (~> 3.7.0)
rspec-expectations (3.7.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.7.0)
rspec-mocks (3.7.0)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.7.0)
rspec-support (3.7.1)
thread_safe (0.3.6)
tzinfo (1.2.5)
thread_safe (~> 0.1)

PLATFORMS
ruby

DEPENDENCIES
bundler (~> 1.7)
csv_sniffer!
factory_girl
pry-byebug
rack
rack-test
rake (~> 10.0)
rspec
rspec-core
rspec-mocks

BUNDLED WITH
1.16.0
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@ is_quote_enclosed = CsvSniffer.is_quote_enclosed?("/path/to/some_file.csv") #=>
has_header = CsvSniffer.has_header?("/path/to/some_file.csv") #=> true
```

See [`test_csv_sniffer.rb`](test/test_csv_sniffer.rb) for more examples.
See [`csv_sniffer_spec.rb`](spec/csv_sniffer_spec.rb) for more examples.


## Tests

```
$ rake test
$ docker build -t csv_sniffer_container .
$ docker run -t csv_sniffer_container rspec
```


Expand Down
34 changes: 21 additions & 13 deletions csv_sniffer.gemspec
Original file line number Diff line number Diff line change
@@ -1,15 +1,23 @@
Gem::Specification.new do |s|
s.name = 'csv_sniffer'
s.version = '0.1.2'
s.date = '2015-12-28'
s.summary = "CSV library for heuristic detection of CSV properties"
s.description = "CSV Sniffer is a set of functions that allow a user detect the delimiter character in use, whether the values in the CSV file are quote enclosed, whether the file contains a header, and more. The library is intended to detect information to be used as configuration inputs for CSV parsers."
s.authors = ["Tim Ojo"]
s.email = 'ojo.tim@gmail.com'
s.homepage = 'https://github.com/tim-ojo/csv_sniffer'
s.license = 'MIT'
Gem::Specification.new do |spec|
spec.name = 'csv_sniffer'
spec.version = '0.2.0'
spec.date = '2018-06-15'
spec.summary = "CSV library for heuristic detection of CSV properties"
spec.description = "CSV Sniffer is a set of functions that allow a user detect the delimiter character in use, whether the values in the CSV file are quote enclosed, whether the file contains a header, and more. The library is intended to detect information to be used as configuration inputs for CSV parsers."
spec.authors = ["Chris Sandison"]
spec.email = 'chris@thinkdataworks.com'
spec.homepage = 'https://github.com/thinkdataworks/csv_sniffer'
spec.license = 'MIT'

s.files = `git ls-files`.split($/)
s.test_files = s.files.grep(/^test/)
s.add_development_dependency 'test-unit', '~> 0'
spec.files = `git ls-files`.split($/)
spec.test_files = spec.files.grep(/.*_spec\.rb/)

spec.add_development_dependency "bundler", "~> 1.7"
spec.add_development_dependency "rake", "~> 10.0"
spec.add_development_dependency "rspec"
spec.add_development_dependency "rspec-core"
spec.add_development_dependency "rspec-mocks"
spec.add_development_dependency "factory_girl"
spec.add_development_dependency "rack"
spec.add_development_dependency "rack-test"
end
16 changes: 10 additions & 6 deletions lib/csv_sniffer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# This class contains functions to heuristically decipher certain information from a CSV file
class CsvSniffer

DEFAULT_LINES_TO_READ = 10_000

# Reads the first line of the csv and returns the endline characters used
#
# Example:
Expand All @@ -11,12 +13,13 @@ class CsvSniffer
#
# Arguments:
# filepath: (String)
# lines: (int) number of lines to read, default 10,000

def self.detect_endline(filepath)
def self.detect_endline(filepath, lines: DEFAULT_LINES_TO_READ)
begin
file = File.open(filepath, binmode: 'rt', encoding: 'bom|utf-8:utf-8')
# Prevent large files with \r ending from reading the entire contents by limiting
file.readline(10_000)[/[\r\n]+/]
file.readline(DEFAULT_LINES_TO_READ)[/[\r\n]+/]
rescue EOFError
$/
end
Expand All @@ -30,9 +33,10 @@ def self.detect_endline(filepath)
#
# Arguments:
# filepath: (String)
# lines: (String) number of lines to read, default 10,000

def self.lines(filepath, &block)
File.foreach(filepath, detect_endline(filepath), binmode: 'rt', encoding: 'bom|utf-8:utf-8', &block)
def self.lines(filepath, lines: DEFAULT_LINES_TO_READ, &block)
File.foreach(filepath, detect_endline(filepath, lines: lines), binmode: 'rt', encoding: 'bom|utf-8:utf-8', &block)
end

# Reads the first line of the csv. Returns nil if no first line exists
Expand Down Expand Up @@ -67,9 +71,9 @@ def self.first_line(filepath, cleaned = true)
# Arguments:
# filepath: (String)

def self.rows(filepath, &block)
def self.rows(filepath, lines: DEFAULT_LINES_TO_READ, &block)
delim = detect_delimiter(filepath)
endline = detect_endline(filepath)
endline = detect_endline(filepath, lines: lines)
CSV.foreach(filepath, row_sep: endline, col_sep: delim, encoding: 'bom|utf-8:utf-8', &block)
end

Expand Down
Loading

0 comments on commit 3e2bbda

Please sign in to comment.