Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft #1284 streamreader return cell #1291

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 39 additions & 39 deletions cell_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ func TestGetCellValue(t *testing.T) {
f.checked = nil
cells := []string{"A3", "A4", "B4", "A7", "B7"}
rows, err := f.GetRows("Sheet1")
assert.Equal(t, [][]string{nil, nil, {"A3"}, {"A4", "B4"}, nil, nil, {"A7", "B7"}, {"A8", "B8"}}, rows)
assert.Equal(t, [][]Cell{nil, nil, {Cell{Value: "A3"}}, {Cell{Value: "A4"}, Cell{Value: "B4"}}, nil, nil, {Cell{Value: "A7"}, Cell{Value: "B7"}}, {Cell{Value: "A8"}, Cell{Value: "B8"}}}, rows)
assert.NoError(t, err)
for _, cell := range cells {
value, err := f.GetCellValue("Sheet1", cell)
Expand All @@ -246,21 +246,21 @@ func TestGetCellValue(t *testing.T) {
f.Pkg.Store("xl/worksheets/sheet1.xml", []byte(fmt.Sprintf(sheetData, `<row r="2"><c r="A2" t="str"><v>A2</v></c></row><row r="2"><c r="B2" t="str"><v>B2</v></c></row>`)))
f.checked = nil
rows, err = f.GetRows("Sheet1")
assert.Equal(t, [][]string{nil, {"A2", "B2"}}, rows)
assert.Equal(t, [][]Cell{nil, {Cell{Value: "A2"}, Cell{Value: "B2"}}}, rows)
assert.NoError(t, err)

f.Sheet.Delete("xl/worksheets/sheet1.xml")
f.Pkg.Store("xl/worksheets/sheet1.xml", []byte(fmt.Sprintf(sheetData, `<row r="1"><c r="A1" t="str"><v>A1</v></c></row><row r="1"><c r="B1" t="str"><v>B1</v></c></row>`)))
f.checked = nil
rows, err = f.GetRows("Sheet1")
assert.Equal(t, [][]string{{"A1", "B1"}}, rows)
assert.Equal(t, [][]Cell{{Cell{Value: "A1"}, Cell{Value: "B1"}}}, rows)
assert.NoError(t, err)

f.Sheet.Delete("xl/worksheets/sheet1.xml")
f.Pkg.Store("xl/worksheets/sheet1.xml", []byte(fmt.Sprintf(sheetData, `<row><c t="str"><v>A3</v></c></row><row><c t="str"><v>A4</v></c><c t="str"><v>B4</v></c></row><row r="7"><c t="str"><v>A7</v></c><c t="str"><v>B7</v></c></row><row><c t="str"><v>A8</v></c><c t="str"><v>B8</v></c></row>`)))
f.checked = nil
rows, err = f.GetRows("Sheet1")
assert.Equal(t, [][]string{{"A3"}, {"A4", "B4"}, nil, nil, nil, nil, {"A7", "B7"}, {"A8", "B8"}}, rows)
assert.Equal(t, [][]Cell{{Cell{Value: "A3"}}, {Cell{Value: "A4"}, Cell{Value: "B4"}}, nil, nil, nil, nil, {Cell{Value: "A7"}, Cell{Value: "B7"}}, {Cell{Value: "A8"}, Cell{Value: "B8"}}}, rows)
assert.NoError(t, err)

f.Sheet.Delete("xl/worksheets/sheet1.xml")
Expand All @@ -270,13 +270,13 @@ func TestGetCellValue(t *testing.T) {
assert.Equal(t, "H6", cell)
assert.NoError(t, err)
rows, err = f.GetRows("Sheet1")
assert.Equal(t, [][]string{
{"A6", "B6", "C6"},
assert.Equal(t, [][]Cell{
{Cell{Value: "A6"}, Cell{Value: "B6"}, Cell{Value: "C6"}},
nil,
{"100", "B3"},
{"", "", "", "", "", "F4"},
{Cell{Value: int64(100)}, Cell{Value: "B3"}},
{Cell{}, Cell{}, Cell{}, Cell{}, Cell{}, Cell{Value: "F4"}},
nil,
{"", "", "", "", "", "", "", "H6"},
{Cell{}, Cell{}, Cell{}, Cell{}, Cell{}, Cell{}, Cell{}, Cell{Value: "H6"}},
}, rows)
assert.NoError(t, err)

Expand Down Expand Up @@ -314,36 +314,36 @@ func TestGetCellValue(t *testing.T) {
</row>`)))
f.checked = nil
rows, err = f.GetRows("Sheet1")
assert.Equal(t, [][]string{{
"2422.3",
"2422.3",
"12.4",
"964",
"1101.6",
"275.4",
"68.9",
"44385.2083333333",
"5.1",
"5.11",
"5.1",
"5.111",
"5.1111",
"2422.012345678",
"2422.0123456789",
"12.012345678901",
"964",
"1101.6",
"275.4",
"68.9",
"0.08888",
"0.00004",
"2422.3",
"1101.6",
"275.4",
"68.9",
"1.1",
"1234567890123_4",
"123456789_0123_4",
assert.Equal(t, [][]Cell{{
Cell{Value: 2422.3},
Cell{Value: 2422.3},
Cell{Value: 12.4},
Cell{Value: int64(964)},
Cell{Value: 1101.6},
Cell{Value: 275.4},
Cell{Value: 68.9},
Cell{Value: 44385.2083333333},
Cell{Value: 5.1},
Cell{Value: 5.11},
Cell{Value: 5.1},
Cell{Value: 5.111},
Cell{Value: 5.1111},
Cell{Value: 2422.012345678},
Cell{Value: 2422.0123456789},
Cell{Value: 12.012345678901},
Cell{Value: int64(964)},
Cell{Value: 1101.6},
Cell{Value: 275.4},
Cell{Value: 68.9},
Cell{Value: 0.08888},
Cell{Value: 0.00004},
Cell{Value: 2422.3},
Cell{Value: 1101.6},
Cell{Value: 275.4},
Cell{Value: 68.9},
Cell{Value: 1.1},
Cell{Value: "1234567890123_4"},
Cell{Value: "123456789_0123_4"},
}}, rows)
assert.NoError(t, err)
}
Expand Down
4 changes: 2 additions & 2 deletions excelize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1114,12 +1114,12 @@ func TestSharedStrings(t *testing.T) {
if !assert.NoError(t, err) {
t.FailNow()
}
assert.Equal(t, "A", rows[0][0])
assert.Equal(t, Cell{Value: "A"}, rows[0][0])
rows, err = f.GetRows("Sheet2")
if !assert.NoError(t, err) {
t.FailNow()
}
assert.Equal(t, "Test Weight (Kgs)", rows[0][0])
assert.Equal(t, Cell{Value: "Test Weight (Kgs)"}, rows[0][0])
assert.NoError(t, f.Close())
}

Expand Down
3 changes: 3 additions & 0 deletions lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,9 @@ func (f *File) addSheetNameSpace(sheet string, ns xml.Attr) {
// the precision for the numeric.
func isNumeric(s string) (bool, int) {
dot, e, n, p := false, false, false, 0
if s == "" {
return false, 0
}
for i, v := range s {
if v == '.' {
if dot {
Expand Down
103 changes: 93 additions & 10 deletions rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ import (
// fmt.Println()
// }
//
func (f *File) GetRows(sheet string, opts ...Options) ([][]string, error) {
func (f *File) GetRows(sheet string, opts ...Options) ([][]Cell, error) {
rows, err := f.Rows(sheet)
if err != nil {
return nil, err
}
results, cur, max := make([][]string, 0, 64), 0, 0
results, cur, max := make([][]Cell, 0, 64), 0, 0
for rows.Next() {
cur++
row, err := rows.Columns(opts...)
Expand All @@ -74,12 +74,14 @@ type Rows struct {
err error
curRow, seekRow int
needClose, rawCellValue bool
sheet string
sheetPath string
sheetName string
f *File
tempFile *os.File
sst *xlsxSST
decoder *xml.Decoder
token xml.Token
rowOpts RowOpts
}

// Next will return true if find the next row element.
Expand All @@ -101,6 +103,17 @@ func (rows *Rows) Next() bool {
rows.curRow = rowNum
}
rows.token = token
ro := RowOpts{}
if styleID, err := attrValToInt("s", xmlElement.Attr); err == nil && styleID > 0 && styleID < MaxCellStyles {
ro.StyleID = styleID
}
if hidden, err := attrValToBool("hidden", xmlElement.Attr); err == nil {
ro.Hidden = hidden
}
if height, err := attrValToFloat("ht", xmlElement.Attr); err == nil {
ro.Height = height
}
rows.rowOpts = ro
return true
}
case xml.EndElement:
Expand All @@ -111,6 +124,13 @@ func (rows *Rows) Next() bool {
}
}

// GetStyleID will return the RowOpts of the current row.
//
// rowOpts := rows.GetRowOpts()
func (rows *Rows) GetRowOpts() RowOpts {
return rows.rowOpts
}

// Error will return the error when the error occurs.
func (rows *Rows) Error() error {
return rows.err
Expand All @@ -128,7 +148,7 @@ func (rows *Rows) Close() error {
// Columns return the current row's column values. This fetches the worksheet
// data as a stream, returns each cell in a row as is, and will not skip empty
// rows in the tail of the worksheet.
func (rows *Rows) Columns(opts ...Options) ([]string, error) {
func (rows *Rows) Columns(opts ...Options) ([]Cell, error) {
if rows.curRow > rows.seekRow {
return nil, nil
}
Expand Down Expand Up @@ -171,9 +191,9 @@ func (rows *Rows) Columns(opts ...Options) ([]string, error) {
}

// appendSpace append blank characters to slice by given length and source slice.
func appendSpace(l int, s []string) []string {
func appendSpace(l int, s []Cell) []Cell {
for i := 1; i < l; i++ {
s = append(s, "")
s = append(s, Cell{})
}
return s
}
Expand All @@ -192,7 +212,7 @@ type rowXMLIterator struct {
err error
inElement string
cellCol int
columns []string
columns []Cell
}

// rowXMLHandler parse the row XML element of the worksheet.
Expand All @@ -206,10 +226,18 @@ func (rows *Rows) rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.Sta
return
}
}
//blank := rowIterator.cellCol - len(rowIterator.columns)
//if val, _ := colCell.getValueFrom(rows.f, rows.sst, raw); val != "" || colCell.F != nil {
// rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val)
//}
blank := rowIterator.cellCol - len(rowIterator.columns)
if val, _ := colCell.getValueFrom(rows.f, rows.sst, raw); val != "" || colCell.F != nil {
rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val)
var formula string
if colCell.F != nil {
formula, _ = rows.f.GetCellFormula(rows.sheetName, colCell.R)
}
//if val, _ := colCell.getTypedValueFrom(rows.f, rows.sst); val != "" || colCell.F != nil {
val, _ := colCell.getTypedValueFrom(rows.f, rows.sst)
rowIterator.columns = append(appendSpace(blank, rowIterator.columns), Cell{Value: val, StyleID: colCell.S, Formula: formula})
}
}

Expand Down Expand Up @@ -249,7 +277,7 @@ func (f *File) Rows(sheet string) (*Rows, error) {
f.saveFileList(name, f.replaceNameSpaceBytes(name, output))
}
var err error
rows := Rows{f: f, sheet: name}
rows := Rows{f: f, sheetPath: name, sheetName: sheet}
rows.needClose, rows.decoder, rows.tempFile, err = f.xmlDecoder(name)
return &rows, err
}
Expand Down Expand Up @@ -475,6 +503,61 @@ func (c *xlsxC) getValueFrom(f *File, d *xlsxSST, raw bool) (string, error) {
}
}

func (c *xlsxC) getTypedValueFrom(f *File, d *xlsxSST) (interface{}, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to take care of the performance impact of this feature, I agree with you for reused the Cell structure, and I think we needn't add the String() function on Cell, keep applying number format and for the Cell.Value by default, and not convert the string cell value by cell's data type, all the cell value will be return in string data type, add a CellType field in the Cell structure which represents the type of the cell, let user convert the data type of the cell value from string by CellType if in need.

If a cell value was numeric, which applies a text or data time number format, then get the cell's value without specifying the RawCellValue option, it will convert the cell value to string data type after applying the number format style, if no CellType field, at this time, it will be loose the original data type information, so we need to add this field instead of convert cell value to Go data type to represents the cell data type.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main reason i did the conversion is because (if i'm not mistaken) the StreamWriter API will read the Golang type in order to decide the type in the output XML. (So this way i can read a cell and convert it to Cell then use StreamWriter to write the same value.
So in order to keep the ability to do in := streamread(…); streamwriter.Write(in) (pseudocode), we'll need to change StreamWriter to support CellType?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest that add the CellType field in the type Cell which is only used for the stream reader, the stream reader always returns cell value in string data type. This design has no ability for in := streamread(…); streamwriter.Write(in), which needs the user to prepare the needed cell's value from the string data type, and use them in the stream writer. Because there are some differences with set the cell's value on the stream writer, the cell value depends on the style of the cell when reading the cell value, such as the number format, but the library doesn't support applying all number formats for the cell currently, so if we convert the cell to Go data type when reading cells, there is currently no guarantee that the values of all cells can be correctly converted to the data type of the Go language, so I don't suggest adding this ability at this time, what do you think about it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it makes more sense if i explain my use case: basically we want to write a lot of data in the worksheets (hence stream writer), but we also want to reuse the style, formula and data of an excel "template", so an Excel file we use as a reference and we read while writing to stream writer. This works fine right now for values but not for formulas and styles. I think it's possible for me to do some magic outside of Excelize (eg. use getTypedValueFrom in my own code), but you pointed out that performance can be an issue (as well as data quality) with this approach so maybe i should reconsider 🤔
So i was thinking of doing something a bit different: 1. copy the template file to use as base, 2. create new worksheets 3. delete the original worksheets. This way, the styles would already be present in the output file, and i can just copy the StyleID over from the input to the output. With that design, i think i will only need to write the StyleID and the Value without caring about the conversion, correct?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The style of the cell was storage in the workbook globally, so the same style ID represents the same style between the template and generates the workbook, but the style with number format only can be used for the cell in numeric value, so you need to convert the cell value to correct data type when streaming writing. I'm not sure where the input data come from in your case, if it comes as you say in := streamread(…); streamwriter.Write(in), because all cells value of the in input data was in string data type, if any cell's style contains a number format, which style will be invalid after streamwriter.Write(in).

f.Lock()
defer f.Unlock()
switch c.T {
case "b":
if c.V == "1" {
return true, nil
} else if c.V == "0" {
return false, nil
}
case "s":
if c.V != "" {
xlsxSI := 0
xlsxSI, _ = strconv.Atoi(c.V)
if _, ok := f.tempFiles.Load(defaultXMLPathSharedStrings); ok {
return f.getFromStringItem(xlsxSI), nil
}
if len(d.SI) > xlsxSI {
return d.SI[xlsxSI].String(), nil
}
}
case "str":
return c.V, nil
case "inlineStr":
if c.IS != nil {
return c.IS.String(), nil
}
return c.V, nil
default:
if isNum, precision := isNumeric(c.V); isNum {
var precisionV string
if precision == 0 {
precisionV = roundPrecision(c.V, 15)
} else {
precisionV = roundPrecision(c.V, -1)
}

vi, erri := strconv.ParseInt(precisionV, 10, 64)
vf, errf := strconv.ParseFloat(precisionV, 64)
if erri == nil {
return vi, nil
} else if errf == nil {
return vf, nil
} else {
return precisionV, nil
}
} else {
return nil, nil
}
// TODO: add support for other possible values of T (https://stackoverflow.com/questions/18334314/what-do-excel-xml-cell-attribute-values-mean)
}

return c.V, nil
}

// roundPrecision provides a function to format floating-point number text
// with precision, if the given text couldn't be parsed to float, this will
// return the original string.
Expand Down
Loading