From 2d561fd4e1beefa1dd638de45d5e6f3c5bcbc3eb Mon Sep 17 00:00:00 2001 From: chris Date: Thu, 14 Jun 2018 12:37:16 -0400 Subject: [PATCH 1/3] feature/1: Allowing for more lines to be read Fixes #1 --- lib/csv_sniffer.rb | 16 ++++++++++------ test/test_csv_sniffer.rb | 8 ++++---- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/csv_sniffer.rb b/lib/csv_sniffer.rb index 263a1a4..6c04d7f 100644 --- a/lib/csv_sniffer.rb +++ b/lib/csv_sniffer.rb @@ -3,6 +3,8 @@ # This class contains functions to heuristically decipher certain information from a CSV file class CsvSniffer + DEFAULT_LINES_TO_READ = 10_000 + # Reads the first line of the csv and returns the endline characters used # # Example: @@ -11,12 +13,13 @@ class CsvSniffer # # Arguments: # filepath: (String) + # lines: (int) number of lines to read, default 10,000 - def self.detect_endline(filepath) + def self.detect_endline(filepath, lines: DEFAULT_LINES_TO_READ) begin file = File.open(filepath, binmode: 'rt', encoding: 'bom|utf-8:utf-8') # Prevent large files with \r ending from reading the entire contents by limiting - file.readline(10_000)[/[\r\n]+/] + file.readline(DEFAULT_LINES_TO_READ)[/[\r\n]+/] rescue EOFError $/ end @@ -30,9 +33,10 @@ def self.detect_endline(filepath) # # Arguments: # filepath: (String) + # lines: (String) number of lines to read, default 10,000 - def self.lines(filepath, &block) - File.foreach(filepath, detect_endline(filepath), binmode: 'rt', encoding: 'bom|utf-8:utf-8', &block) + def self.lines(filepath, lines: DEFAULT_LINES_TO_READ, &block) + File.foreach(filepath, detect_endline(filepath, lines: lines), binmode: 'rt', encoding: 'bom|utf-8:utf-8', &block) end # Reads the first line of the csv. Returns nil if no first line exists @@ -67,9 +71,9 @@ def self.first_line(filepath, cleaned = true) # Arguments: # filepath: (String) - def self.rows(filepath, &block) + def self.rows(filepath, lines: DEFAULT_LINES_TO_READ, &block) delim = detect_delimiter(filepath) - endline = detect_endline(filepath) + endline = detect_endline(filepath, lines: lines) CSV.foreach(filepath, row_sep: endline, col_sep: delim, encoding: 'bom|utf-8:utf-8', &block) end diff --git a/test/test_csv_sniffer.rb b/test/test_csv_sniffer.rb index 44e6425..63e56fe 100644 --- a/test/test_csv_sniffer.rb +++ b/test/test_csv_sniffer.rb @@ -73,7 +73,7 @@ class CsvSnifferTest < Minitest::Test def test_file1 assert_equal ",", CsvSniffer.detect_delimiter(@@file1.path) assert_equal false, CsvSniffer.is_quote_enclosed?(@@file1.path) - assert_equal nil, CsvSniffer.get_quote_char(@@file1.path) + assert_nil CsvSniffer.get_quote_char(@@file1.path) assert_equal true, CsvSniffer.has_header?(@@file1.path) assert_equal "Name,Number", CsvSniffer.first_line(@@file1.path) assert_equal ["Name","Number"], CsvSniffer.first_row(@@file1.path) @@ -95,7 +95,7 @@ def test_file3 def test_file4 assert_equal "\t", CsvSniffer.detect_delimiter(@@file4.path) assert_equal false, CsvSniffer.is_quote_enclosed?(@@file4.path) - assert_equal nil, CsvSniffer.get_quote_char(@@file4.path) + assert_nil CsvSniffer.get_quote_char(@@file4.path) assert_equal false, CsvSniffer.has_header?(@@file4.path) end @@ -112,7 +112,7 @@ def test_file6 def test_file7 assert_equal false, CsvSniffer.has_header?(@@file7.path) - assert_equal nil, CsvSniffer.get_quote_char(@@file7.path) + assert_nil CsvSniffer.get_quote_char(@@file7.path) assert_equal ",", CsvSniffer.detect_delimiter(@@file7.path) end @@ -133,7 +133,7 @@ def test_file9 def test_file10 assert_equal ";", CsvSniffer.detect_delimiter(@@file10.path) assert_equal false, CsvSniffer.is_quote_enclosed?(@@file10.path) - assert_equal nil, CsvSniffer.get_quote_char(@@file10.path) + assert_nil CsvSniffer.get_quote_char(@@file10.path) assert_equal true, CsvSniffer.has_header?(@@file10.path) end From 3819174f846e1ece76e1c3cd2adfb6d622a0ab6c Mon Sep 17 00:00:00 2001 From: Chris Sandison Date: Mon, 18 Jun 2018 13:27:38 -0400 Subject: [PATCH 2/3] feature/4: Dockerizing and moving to rspec (#5) * feature/4: Starting refactor and changing tests to rspec * feature/4: Moving all tests over to rspec Fixes #4 --- .gitignore | 17 +++ Dockerfile | 8 ++ Gemfile | 7 ++ README.md | 5 +- csv_sniffer.gemspec | 34 +++--- spec/csv_sniffer_spec.rb | 244 +++++++++++++++++++++++++++++++++++++++ spec/spec_helper.rb | 11 ++ test/test_csv_sniffer.rb | 154 ------------------------ 8 files changed, 311 insertions(+), 169 deletions(-) create mode 100644 Dockerfile create mode 100644 Gemfile create mode 100644 spec/csv_sniffer_spec.rb create mode 100644 spec/spec_helper.rb delete mode 100644 test/test_csv_sniffer.rb diff --git a/.gitignore b/.gitignore index a3a6afd..77675fd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,19 @@ *.gem +/.bundle/ +/.yardoc +/Gemfile.lock +/_yardoc/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ +/vendor/ +*.bundle +*.so +*.o +*.a +dockercfg.env + +mkmf.log +.idea diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..fa78f8e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM brendan6/ruby:2.2.4 +MAINTAINER Chris Sandison + +ADD . $APP_HOME + +RUN bundle install + +CMD bundle exec rspec diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..5c2c5d4 --- /dev/null +++ b/Gemfile @@ -0,0 +1,7 @@ +source 'https://rubygems.org' + +gemspec + +group :development, :test do + gem 'pry-byebug' +end diff --git a/README.md b/README.md index 18f4e06..7369b2f 100644 --- a/README.md +++ b/README.md @@ -32,13 +32,14 @@ is_quote_enclosed = CsvSniffer.is_quote_enclosed?("/path/to/some_file.csv") #=> has_header = CsvSniffer.has_header?("/path/to/some_file.csv") #=> true ``` -See [`test_csv_sniffer.rb`](test/test_csv_sniffer.rb) for more examples. +See [`csv_sniffer_spec.rb`](spec/csv_sniffer_spec.rb) for more examples. ## Tests ``` -$ rake test +$ docker build -t csv_sniffer_container . +$ docker run -t csv_sniffer_container rspec ``` diff --git a/csv_sniffer.gemspec b/csv_sniffer.gemspec index 00c4ffa..7cd2cf6 100644 --- a/csv_sniffer.gemspec +++ b/csv_sniffer.gemspec @@ -1,15 +1,23 @@ -Gem::Specification.new do |s| - s.name = 'csv_sniffer' - s.version = '0.1.2' - s.date = '2015-12-28' - s.summary = "CSV library for heuristic detection of CSV properties" - s.description = "CSV Sniffer is a set of functions that allow a user detect the delimiter character in use, whether the values in the CSV file are quote enclosed, whether the file contains a header, and more. The library is intended to detect information to be used as configuration inputs for CSV parsers." - s.authors = ["Tim Ojo"] - s.email = 'ojo.tim@gmail.com' - s.homepage = 'https://github.com/tim-ojo/csv_sniffer' - s.license = 'MIT' +Gem::Specification.new do |spec| + spec.name = 'csv_sniffer' + spec.version = '0.2.0' + spec.date = '2018-06-15' + spec.summary = "CSV library for heuristic detection of CSV properties" + spec.description = "CSV Sniffer is a set of functions that allow a user detect the delimiter character in use, whether the values in the CSV file are quote enclosed, whether the file contains a header, and more. The library is intended to detect information to be used as configuration inputs for CSV parsers." + spec.authors = ["Chris Sandison"] + spec.email = 'chris@thinkdataworks.com' + spec.homepage = 'https://github.com/thinkdataworks/csv_sniffer' + spec.license = 'MIT' - s.files = `git ls-files`.split($/) - s.test_files = s.files.grep(/^test/) - s.add_development_dependency 'test-unit', '~> 0' + spec.files = `git ls-files`.split($/) + spec.test_files = spec.files.grep(/.*_spec\.rb/) + + spec.add_development_dependency "bundler", "~> 1.7" + spec.add_development_dependency "rake", "~> 10.0" + spec.add_development_dependency "rspec" + spec.add_development_dependency "rspec-core" + spec.add_development_dependency "rspec-mocks" + spec.add_development_dependency "factory_girl" + spec.add_development_dependency "rack" + spec.add_development_dependency "rack-test" end diff --git a/spec/csv_sniffer_spec.rb b/spec/csv_sniffer_spec.rb new file mode 100644 index 0000000..dd06be3 --- /dev/null +++ b/spec/csv_sniffer_spec.rb @@ -0,0 +1,244 @@ +require 'spec_helper' + +describe CsvSniffer do + def dc + described_class + end + + UTF_16_BOM = "\xFF\xFE".force_encoding('utf-16le') + + context 'csv file' do + before(:all) do + @csv_file = Tempfile.new('fil1e', binmode: 'wt+') + @csv_file.puts "Name,Number" + @csv_file.puts "John Doe,555-123-4567" + @csv_file.puts "Jane C. Doe,555-000-1234" + @csv_file.rewind + end + + after(:all) { @csv_file.close } + + it 'should detect the delimiter' do + expect(dc.detect_delimiter(@csv_file)).to eq(",") + end + + it 'should detect if quote enclosed' do + expect(dc.is_quote_enclosed?(@csv_file)).to be_falsey + end + + it 'should get the quote character' do + expect(dc.get_quote_char(@csv_file)).to be_nil + end + + it 'should check if it has a header' do + expect(dc.has_header?(@csv_file)).to be_truthy + end + + it 'should get the first line' do + expect(dc.first_line(@csv_file)).to eq("Name,Number") + end + + it 'should get the first row' do + expect(dc.first_row(@csv_file)).to eq(["Name", "Number"]) + end + end + + context 'psv file' do + before(:all) do + @psv_file = Tempfile.new('file2', binmode: 'wt+') + @psv_file.puts "'Name' |'Number'\t" + @psv_file.puts "'John Doe'|'555-123-4567'" + @psv_file.puts "'Jane C. Doe'|'555-000-1234'" + @psv_file.rewind + end + + after(:all) { @psv_file.close } + + it 'should detect the delimiter' do + expect(dc.detect_delimiter(@psv_file)).to eq("|") + end + + it 'should detect if quote enclosed' do + expect(dc.is_quote_enclosed?(@psv_file)).to be_truthy + end + + it 'should get the quote character' do + expect(dc.get_quote_char(@psv_file)).to eq("'") + end + + it 'should check if it has a header' do + expect(dc.has_header?(@psv_file)).to be_truthy + end + end + + context 'semi-colon separated file' do + before(:all) do + @ssv_file = Tempfile.new('file3', binmode: 'wt+') + @ssv_file.puts "John Doe;555-123-4567;Good\tdude" + @ssv_file.puts "Jane C. Doe;555-000-1234 ; Great gal" + @ssv_file.puts "John Smith;555-999-1234;Don't know about him" + @ssv_file.rewind + end + + after(:all) { @ssv_file.close } + + it 'should detect the delimiter' do + expect(dc.detect_delimiter(@ssv_file)).to eq(";") + end + + it 'should detect if quote enclosed' do + expect(dc.is_quote_enclosed?(@ssv_file)).to be_falsey + end + + it 'should detect if it has a header' do + expect(dc.has_header?(@ssv_file)).to be_falsey + end + end + + context 'tab separated file without header' do + before(:all) do + @tsv = Tempfile.new('file4', binmode: 'wt+') + @tsv.puts "Doe, John\t555-123-4567" + @tsv.puts "Jane C. Doe\t555-000-1234\t" + @tsv.rewind + end + + after(:all) { @tsv.close } + + it 'should detect the delimiter' do + expect(dc.detect_delimiter(@tsv)).to eq("\t") + end + + it 'should detect if quote enclosed' do + expect(dc.is_quote_enclosed?(@tsv)).to be_falsey + end + + it 'should detect the quote character' do + expect(dc.get_quote_char(@tsv)).to be_nil + end + + it 'should detect if it has a header' do + expect(dc.has_header?(@tsv)).to be_falsey + end + end + + context 'quoted psv without header' do + before(:all) do + @psv = Tempfile.new('file5', binmode: 'wt+') + @psv.puts '"Doe,,,,,, John"|"555-123-4567"' + @psv.puts %{"Jane C. Doe"|"555-000-1234\t"} + @psv.rewind + end + + after(:all) { @psv.close } + + it 'should detect delimiter' do + expect(dc.detect_delimiter(@psv)).to eq("|") + end + + it 'should detect if quote enclosed' do + expect(dc.is_quote_enclosed?(@psv)).to be_truthy + end + + it 'should get the quote character' do + expect(dc.get_quote_char(@psv)).to eq('"') + end + end + + context 'psv with commas in cell' do + before(:all) do + @psv = Tempfile.new('file6', binmode: 'wt+') + @psv.puts 'Name|Phone No.|Age' + @psv.puts 'Doe, John|555-123-4567|31' + @psv.puts 'Doe, Jane C. |555-000-1234|30' + @psv.rewind + end + + after(:all) { @psv.close } + + it 'should detect the delimiter' do + expect(dc.detect_delimiter(@psv)).to eq("|") + end + + it 'should detect if it has a header' do + expect(dc.has_header?(@psv)).to be_truthy + end + end + + context 'empty file' do + before(:all) do + @empty = Tempfile.new('file7', binmode: 'wt+') + @empty.rewind + end + + after(:all) { @empty.close } + + it 'should return the default delimiater' do + expect(dc.detect_delimiter(@empty)).to eq(",") + end + + it 'should detect no header' do + expect(dc.has_header?(@empty)).to be_falsey + end + + it 'should detect no quote character' do + expect(dc.get_quote_char(@empty)).to be_nil + end + end + + context 'psv file with quotes' do + before(:all) do + @psv = Tempfile.new('file8', binmode: 'wt+') + @psv.puts '"Name"|"Phone"|"Age"' + @psv.puts '"Doe,,,,,, John"|"555-123-4567"|"31"' + @psv.puts %{"Jane C. Doe"|"555-000-1234\t"|"30"} + @psv.rewind + end + + after(:all) { @psv.close } + + it 'should detect the delimiter' do + expect(dc.detect_delimiter(@psv)).to eq("|") + end + + it 'should detect if quote enclosed' do + expect(dc.is_quote_enclosed?(@psv)).to be_truthy + end + + it 'should get the quote character' do + expect(dc.get_quote_char(@psv)).to eq('"') + end + + it 'should determine if file contains a header' do + expect(dc.has_header?(@psv)).to be_truthy + end + end + + context 'with non-standard encoding' do + before(:all) do + @file = Tempfile.new('file10', binmode: 'wt+', encoding: 'utf-16le') + @file.puts UTF_16_BOM + 'Name;Phone;Age'.encode('utf-16le') + @file.puts '"Doe John";"555-123-4567";31' + @file.puts %{"Jane C. Doe";"555-000-1234\t";30'} + @file.rewind + end + + after(:all) { @file.close } + + it 'should detect delimiter' do + expect(dc.detect_delimiter(@file)).to eq(';') + end + + it 'should detect if quote enclosed' do + expect(dc.is_quote_enclosed?(@file)).to be_falsey + end + + it 'should detect no quote character' do + expect(dc.get_quote_char(@file)).to be_nil + end + + it 'should detect the header' do + expect(dc.has_header?(@file)).to be_truthy + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..e060ace --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,11 @@ +ENV['RACK_ENV'] = 'test' +require 'rspec' +require 'pry' + +require_relative '../lib/csv_sniffer' + +RSpec.configure do |config| + config.filter_run_excluding broken: true, integration: true + config.filter_run focus: true + config.run_all_when_everything_filtered = true +end diff --git a/test/test_csv_sniffer.rb b/test/test_csv_sniffer.rb deleted file mode 100644 index 63e56fe..0000000 --- a/test/test_csv_sniffer.rb +++ /dev/null @@ -1,154 +0,0 @@ -require 'minitest/autorun' -require 'tempfile' -require 'csv_sniffer' - -class CsvSnifferTest < Minitest::Test - UTF_16_BOM = "\xFF\xFE".force_encoding('utf-16le') - - @@file1 = Tempfile.new('file1', binmode: 'wt+') - @@file1.puts "Name,Number" - @@file1.puts "John Doe,555-123-4567" - @@file1.puts "Jane C. Doe,555-000-1234" - @@file1.rewind - - @@file2 = Tempfile.new('file2', binmode: 'wt+') - @@file2.puts "'Name' |'Number'\t" - @@file2.puts "'John Doe'|'555-123-4567'" - @@file2.puts "'Jane C. Doe'|'555-000-1234'" - @@file2.rewind - - @@file3 = Tempfile.new('file3', binmode: 'wt+') - @@file3.puts "John Doe;555-123-4567;Good\tdude" - @@file3.puts "Jane C. Doe;555-000-1234 ; Great gal" - @@file3.puts "John Smith;555-999-1234;Don't know about him" - @@file3.rewind - - @@file4 = Tempfile.new('file4', binmode: 'wt+') - @@file4.puts "Doe, John\t555-123-4567" - @@file4.puts "Jane C. Doe\t555-000-1234\t" - @@file4.rewind - - @@file5 = Tempfile.new('file5', binmode: 'wt+') - @@file5.puts '"Doe,,,,,, John"|"555-123-4567"' - @@file5.puts %{"Jane C. Doe"|"555-000-1234\t"} - @@file5.rewind - - @@file6 = Tempfile.new('file6', binmode: 'wt+') - @@file6.puts 'Name|Phone No.|Age' - @@file6.puts 'Doe, John|555-123-4567|31' - @@file6.puts 'Doe, Jane C. |555-000-1234|30' - @@file6.rewind - - @@file7 = Tempfile.new('file7', binmode: 'wt+') - @@file7.rewind - - @@file8 = Tempfile.new('file8', binmode: 'wt+') - @@file8.puts '"Name"|"Phone"|"Age"' - @@file8.puts '"Doe,,,,,, John"|"555-123-4567"|"31"' - @@file8.puts %{"Jane C. Doe"|"555-000-1234\t"|"30"} - @@file8.rewind - - @@file9 = Tempfile.new('file9', binmode: 'wt+', encoding: 'utf-16le') - @@file9.puts UTF_16_BOM + '"Name"|"Phone"|"Age"'.encode('utf-16le') - @@file9.puts '"Doe,,,,,, John"|"555-123-4567"|"31"' - @@file9.puts %{"Jane C. Doe"|"555-000-1234\t"|"30"} - @@file9.rewind - - @@file10 = Tempfile.new('file10', binmode: 'wt+', encoding: 'utf-16le') - @@file10.puts UTF_16_BOM + 'Name;Phone;Age'.encode('utf-16le') - @@file10.puts '"Doe John";"555-123-4567";31' - @@file10.puts %{"Jane C. Doe";"555-000-1234\t";30'} - @@file10.rewind - - @@file11 = Tempfile.new('file11', binmode: 'wt+') - @@file11.print "\"Name\",\"Number\"\rJohn ;;;;;;;; Doe,555-123-4567\r" - @@file11.flush - @@file11.rewind - - @@file12 = Tempfile.new('file4', binmode: 'wt+') - @@file12.puts %{"Doe, John"\t"555-123-4567"} - @@file12.puts %{"Jane C. Doe"\t"555-000-1234\t"} - @@file12.rewind - - def test_file1 - assert_equal ",", CsvSniffer.detect_delimiter(@@file1.path) - assert_equal false, CsvSniffer.is_quote_enclosed?(@@file1.path) - assert_nil CsvSniffer.get_quote_char(@@file1.path) - assert_equal true, CsvSniffer.has_header?(@@file1.path) - assert_equal "Name,Number", CsvSniffer.first_line(@@file1.path) - assert_equal ["Name","Number"], CsvSniffer.first_row(@@file1.path) - end - - def test_file2 - assert_equal "|", CsvSniffer.detect_delimiter(@@file2.path) - assert_equal true, CsvSniffer.is_quote_enclosed?(@@file2.path) - assert_equal "'", CsvSniffer.get_quote_char(@@file2.path) - assert_equal true, CsvSniffer.has_header?(@@file2.path) - end - - def test_file3 - assert_equal ";", CsvSniffer.detect_delimiter(@@file3.path) - assert_equal false, CsvSniffer.is_quote_enclosed?(@@file3.path) - assert_equal false, CsvSniffer.has_header?(@@file3.path) - end - - def test_file4 - assert_equal "\t", CsvSniffer.detect_delimiter(@@file4.path) - assert_equal false, CsvSniffer.is_quote_enclosed?(@@file4.path) - assert_nil CsvSniffer.get_quote_char(@@file4.path) - assert_equal false, CsvSniffer.has_header?(@@file4.path) - end - - def test_file5 - assert_equal "|", CsvSniffer.detect_delimiter(@@file5.path) - assert_equal true, CsvSniffer.is_quote_enclosed?(@@file5.path) - assert_equal '"', CsvSniffer.get_quote_char(@@file5.path) - end - - def test_file6 - assert_equal "|", CsvSniffer.detect_delimiter(@@file6.path) - assert_equal true, CsvSniffer.has_header?(@@file6.path) - end - - def test_file7 - assert_equal false, CsvSniffer.has_header?(@@file7.path) - assert_nil CsvSniffer.get_quote_char(@@file7.path) - assert_equal ",", CsvSniffer.detect_delimiter(@@file7.path) - end - - def test_file8 - assert_equal "|", CsvSniffer.detect_delimiter(@@file8.path) - assert_equal true, CsvSniffer.is_quote_enclosed?(@@file8.path) - assert_equal '"', CsvSniffer.get_quote_char(@@file8.path) - assert_equal true, CsvSniffer.has_header?(@@file8.path) - end - - def test_file9 - assert_equal "|", CsvSniffer.detect_delimiter(@@file9.path) - assert_equal true, CsvSniffer.is_quote_enclosed?(@@file9.path) - assert_equal '"', CsvSniffer.get_quote_char(@@file9.path) - assert_equal true, CsvSniffer.has_header?(@@file9.path) - end - - def test_file10 - assert_equal ";", CsvSniffer.detect_delimiter(@@file10.path) - assert_equal false, CsvSniffer.is_quote_enclosed?(@@file10.path) - assert_nil CsvSniffer.get_quote_char(@@file10.path) - assert_equal true, CsvSniffer.has_header?(@@file10.path) - end - - def test_file11 - assert_equal "\r", CsvSniffer.detect_endline(@@file11.path) - assert_equal ",", CsvSniffer.detect_delimiter(@@file11.path) - assert_equal true, CsvSniffer.is_quote_enclosed?(@@file11.path) - assert_equal '"', CsvSniffer.get_quote_char(@@file11.path) - assert_equal false, CsvSniffer.has_header?(@@file11.path) - end - - def test_file12 - assert_equal "\t", CsvSniffer.detect_delimiter(@@file12.path) - assert_equal true, CsvSniffer.is_quote_enclosed?(@@file12.path) - assert_equal '"', CsvSniffer.get_quote_char(@@file12.path) - assert_equal false, CsvSniffer.has_header?(@@file12.path) - end -end From 411a494bcd11435f21548cc1dc7e35d84b7c246a Mon Sep 17 00:00:00 2001 From: Chris Sandison Date: Tue, 19 Jun 2018 09:45:11 -0400 Subject: [PATCH 3/3] feature/2: Adding tests for detecting newline (#7) Fixes #2 --- Gemfile.lock | 67 ++++++++++++++++++++++++++++++++++++++++ spec/csv_sniffer_spec.rb | 36 +++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 Gemfile.lock diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..37707f6 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,67 @@ +PATH + remote: . + specs: + csv_sniffer (0.2.1) + +GEM + remote: https://rubygems.org/ + specs: + activesupport (5.2.0) + concurrent-ruby (~> 1.0, >= 1.0.2) + i18n (>= 0.7, < 2) + minitest (~> 5.1) + tzinfo (~> 1.1) + byebug (10.0.2) + coderay (1.1.2) + concurrent-ruby (1.0.5) + diff-lcs (1.3) + factory_girl (4.9.0) + activesupport (>= 3.0.0) + i18n (1.0.1) + concurrent-ruby (~> 1.0) + method_source (0.9.0) + minitest (5.11.3) + pry (0.11.3) + coderay (~> 1.1.0) + method_source (~> 0.9.0) + pry-byebug (3.6.0) + byebug (~> 10.0) + pry (~> 0.10) + rack (2.0.5) + rack-test (1.0.0) + rack (>= 1.0, < 3) + rake (10.5.0) + rspec (3.7.0) + rspec-core (~> 3.7.0) + rspec-expectations (~> 3.7.0) + rspec-mocks (~> 3.7.0) + rspec-core (3.7.1) + rspec-support (~> 3.7.0) + rspec-expectations (3.7.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.7.0) + rspec-mocks (3.7.0) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.7.0) + rspec-support (3.7.1) + thread_safe (0.3.6) + tzinfo (1.2.5) + thread_safe (~> 0.1) + +PLATFORMS + ruby + +DEPENDENCIES + bundler (~> 1.7) + csv_sniffer! + factory_girl + pry-byebug + rack + rack-test + rake (~> 10.0) + rspec + rspec-core + rspec-mocks + +BUNDLED WITH + 1.16.0 diff --git a/spec/csv_sniffer_spec.rb b/spec/csv_sniffer_spec.rb index dd06be3..c7737eb 100644 --- a/spec/csv_sniffer_spec.rb +++ b/spec/csv_sniffer_spec.rb @@ -241,4 +241,40 @@ def dc expect(dc.has_header?(@file)).to be_truthy end end + + context 'with carriage return newlines' do + before(:all) do + @file = Tempfile.new('file11', binmode: 'wt+') + @file.puts( + "Name,Phone,Age\r\n" + + "John Doe,555-123-4567,19\r\n" + + "Jane Doe,555-000-1234,22\r\n" + ) + @file.rewind + end + + after(:all) { @file.close } + + it 'should detect the endline' do + expect(dc.detect_endline(@file)).to eq("\r\n") + end + end + + context 'with only carriage returns' do + before(:all) do + @file = Tempfile.new('file12', binmode: 'wt+') + @file.puts( + "Name,Phone,Age\r" + + "John Doe,555-123-4567,19\r" + + "Jane Doe,555-000-1234,22\r" + ) + @file.rewind + end + + after(:all) { @file.close } + + it 'should detect the endline' do + expect(dc.detect_endline(@file)).to eq("\r") + end + end end