Skip to content

Commit

Permalink
* 读csv会忽略BOM数据 [Issue #3137](#3137)
Browse files Browse the repository at this point in the history
* 解决csv用office打开乱码的问题,写csv默认带上BOM数据
  • Loading branch information
zhuangjiaju committed Apr 28, 2023
1 parent 0019be4 commit 8fb759a
Show file tree
Hide file tree
Showing 22 changed files with 256 additions and 267 deletions.
4 changes: 4 additions & 0 deletions easyexcel-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,9 @@
<groupId>org.ehcache</groupId>
<artifactId>ehcache</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
</dependencies>
</project>

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.alibaba.excel.analysis.csv;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.util.ArrayList;
Expand All @@ -11,6 +12,7 @@

import com.alibaba.excel.analysis.ExcelReadExecutor;
import com.alibaba.excel.context.csv.CsvReadContext;
import com.alibaba.excel.enums.ByteOrderMarkEnum;
import com.alibaba.excel.enums.CellDataTypeEnum;
import com.alibaba.excel.enums.RowTypeEnum;
import com.alibaba.excel.exception.ExcelAnalysisException;
Expand All @@ -27,6 +29,7 @@
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;

/**
* read executor
Expand Down Expand Up @@ -82,17 +85,26 @@ public void execute() {
private CSVParser csvParser() throws IOException {
CsvReadWorkbookHolder csvReadWorkbookHolder = csvReadContext.csvReadWorkbookHolder();
CSVFormat csvFormat = csvReadWorkbookHolder.getCsvFormat();

ByteOrderMarkEnum byteOrderMark = ByteOrderMarkEnum.valueOfByCharsetName(
csvReadContext.csvReadWorkbookHolder().getCharset().name());
if (csvReadWorkbookHolder.getMandatoryUseInputStream()) {
return csvFormat.parse(
new InputStreamReader(csvReadWorkbookHolder.getInputStream(), csvReadWorkbookHolder.getCharset()));
return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark);
}
if (csvReadWorkbookHolder.getFile() != null) {
return csvFormat.parse(new InputStreamReader(Files.newInputStream(csvReadWorkbookHolder.getFile().toPath()),
csvReadWorkbookHolder.getCharset()));
return buildCsvParser(csvFormat, Files.newInputStream(csvReadWorkbookHolder.getFile().toPath()),
byteOrderMark);
}
return buildCsvParser(csvFormat, csvReadWorkbookHolder.getInputStream(), byteOrderMark);
}

private CSVParser buildCsvParser(CSVFormat csvFormat, InputStream inputStream, ByteOrderMarkEnum byteOrderMark)
throws IOException {
if (byteOrderMark == null) {
return csvFormat.parse(
new InputStreamReader(inputStream, csvReadContext.csvReadWorkbookHolder().getCharset()));
}
return csvFormat.parse(
new InputStreamReader(csvReadWorkbookHolder.getInputStream(), csvReadWorkbookHolder.getCharset()));
return csvFormat.parse(new InputStreamReader(new BOMInputStream(inputStream, byteOrderMark.getByteOrderMark()),
csvReadContext.csvReadWorkbookHolder().getCharset()));
}

private void dealRecord(CSVRecord record, int rowIndex) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package com.alibaba.excel.enums;

import java.nio.charset.Charset;
import java.util.Map;

import com.alibaba.excel.util.MapUtils;

import lombok.Getter;
import org.apache.commons.io.ByteOrderMark;

/**
* byte order mark
*
* @author Jiaju Zhuang
*/
@Getter
public enum ByteOrderMarkEnum {

UTF_8(ByteOrderMark.UTF_8),
UTF_16BE(ByteOrderMark.UTF_16BE),
UTF_16LE(ByteOrderMark.UTF_16LE),
UTF_32BE(ByteOrderMark.UTF_32BE),
UTF_32LE(ByteOrderMark.UTF_32LE),

;

final ByteOrderMark byteOrderMark;
final String stringPrefix;

ByteOrderMarkEnum(ByteOrderMark byteOrderMark) {
this.byteOrderMark = byteOrderMark;
Charset charset = Charset.forName(byteOrderMark.getCharsetName());
this.stringPrefix = new String(byteOrderMark.getBytes(), charset);
}

/**
* store character aliases corresponding to `ByteOrderMark` prefix
*/
private static final Map<String, ByteOrderMarkEnum> CHARSET_BYTE_ORDER_MARK_MAP = MapUtils.newHashMap();

static {
for (ByteOrderMarkEnum value : ByteOrderMarkEnum.values()) {
CHARSET_BYTE_ORDER_MARK_MAP.put(value.getByteOrderMark().getCharsetName(), value);
}
}

public static ByteOrderMarkEnum valueOfByCharsetName(String charsetName) {
return CHARSET_BYTE_ORDER_MARK_MAP.get(charsetName);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,30 @@
import java.io.Closeable;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.alibaba.excel.constant.BuiltinFormats;
import com.alibaba.excel.enums.ByteOrderMarkEnum;
import com.alibaba.excel.enums.NumericCellTypeEnum;
import com.alibaba.excel.exception.ExcelGenerateException;
import com.alibaba.excel.util.DateUtils;
import com.alibaba.excel.util.ListUtils;
import com.alibaba.excel.util.MapUtils;
import com.alibaba.excel.util.NumberDataFormatterUtils;
import com.alibaba.excel.util.StringUtils;

import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.apache.commons.io.ByteOrderMark;
import org.apache.poi.ss.usermodel.AutoFilter;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellRange;
Expand Down Expand Up @@ -51,6 +57,7 @@
@Setter
@EqualsAndHashCode
public class CsvSheet implements Sheet, Closeable {

/**
* workbook
*/
Expand Down Expand Up @@ -109,6 +116,13 @@ private void initSheet() {
}
rowCache = ListUtils.newArrayListWithExpectedSize(rowCacheCount);
try {
if (csvWorkbook.getWithBom()) {
ByteOrderMarkEnum byteOrderMark = ByteOrderMarkEnum.valueOfByCharsetName(
csvWorkbook.getCharset().name());
if (byteOrderMark != null) {
out.append(byteOrderMark.getStringPrefix());
}
}
csvPrinter = csvFormat.print(out);
} catch (IOException e) {
throw new ExcelGenerateException(e);
Expand Down
Loading

0 comments on commit 8fb759a

Please sign in to comment.