Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix and improve detection and import of ods, xlsx and csv documents #1446

Merged
merged 11 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/DatetimeDateBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class DatetimeDateBusiness extends SuperBusiness implements IColumnTypeBusiness
* @return string
*/
public function parseValue($value, ?Column $column = null): string {
return json_encode($this->isValidDate($value, 'Y-m-d') ? $value : '');
return json_encode($this->isValidDate((string)$value, 'Y-m-d') ? (string)$value : '');
}

/**
Expand All @@ -26,7 +26,7 @@ public function parseValue($value, ?Column $column = null): string {
* @return bool
*/
public function canBeParsed($value, ?Column $column = null): bool {
return $this->isValidDate($value, 'Y-m-d');
return $this->isValidDate((string)$value, 'Y-m-d');
}

}
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/DatetimeTimeBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class DatetimeTimeBusiness extends SuperBusiness implements IColumnTypeBusiness
* @return string
*/
public function parseValue($value, ?Column $column = null): string {
return json_encode($this->isValidDate($value, 'H:i') ? $value : '');
return json_encode($this->isValidDate((string)$value, 'H:i') ? $value : '');
}

/**
Expand All @@ -26,7 +26,7 @@ public function parseValue($value, ?Column $column = null): string {
* @return bool
*/
public function canBeParsed($value, ?Column $column = null): bool {
return $this->isValidDate($value, 'H:i');
return $this->isValidDate((string)$value, 'H:i');
}

}
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/SelectionCheckBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
use OCA\Tables\Db\Column;

class SelectionCheckBusiness extends SuperBusiness implements IColumnTypeBusiness {
public const PATTERN_POSITIVE = ['yes', '1', true, 1, 'true'];
public const PATTERN_NEGATIVE = ['no', '0', false, 0, 'false'];
public const PATTERN_POSITIVE = ['yes', '1', true, 1, 'true', 'TRUE'];
public const PATTERN_NEGATIVE = ['no', '0', false, 0, 'false', 'FALSE'];

/**
* @param mixed $value
Expand Down
105 changes: 96 additions & 9 deletions lib/Service/ImportService.php
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ private function getPreviewData(Worksheet $worksheet): array {
$columns[] = [
'title' => $title,
'type' => $this->rawColumnDataTypes[$colIndex]['type'],
'subtype' => $this->rawColumnDataTypes[$colIndex]['subtype'],
'subtype' => $this->rawColumnDataTypes[$colIndex]['subtype'] ?? null,
'numberDecimals' => $this->rawColumnDataTypes[$colIndex]['number_decimals'] ?? 0,
'numberPrefix' => $this->rawColumnDataTypes[$colIndex]['number_prefix'] ?? '',
'numberSuffix' => $this->rawColumnDataTypes[$colIndex]['number_suffix'] ?? '',
Expand All @@ -154,13 +154,26 @@ private function getPreviewData(Worksheet $worksheet): array {
$cellIterator = $row->getCellIterator();
$cellIterator->setIterateOnlyExistingCells(false);

foreach ($cellIterator as $cellIndex => $cell) {
foreach ($cellIterator as $cell) {
$value = $cell->getValue();
$colIndex = (int) $cellIndex;
// $cellIterator`s index is based on 1, not 0.
$colIndex = $cellIterator->getCurrentColumnIndex() - 1;
$column = $this->columns[$colIndex];

if (($column && $column->getType() === 'datetime') || (is_array($columns[$colIndex]) && $columns[$colIndex]['type'] === 'datetime')) {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d H:i');
if (isset($columns[$colIndex]['subtype']) && $columns[$colIndex]['subtype'] === 'date') {
$format = 'Y-m-d';
} elseif (isset($columns[$colIndex]['subtype']) && $columns[$colIndex]['subtype'] === 'time') {
$format = 'H:i';
} else {
$format = 'Y-m-d H:i';
}

try {
$value = Date::excelToDateTimeObject($value)->format($format);
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format($format);
}
} elseif (($column && $column->getType() === 'number' && $column->getNumberSuffix() === '%')
|| (is_array($columns[$colIndex]) && $columns[$colIndex]['type'] === 'number' && $columns[$colIndex]['numberSuffix'] === '%')) {
$value = $value * 100;
Expand Down Expand Up @@ -285,8 +298,14 @@ public function import(?int $tableId, ?int $viewId, string $path, bool $createMi
* @throws PermissionError
*/
private function loop(Worksheet $worksheet): void {
$firstRow = $worksheet->getRowIterator()->current();
$secondRow = $worksheet->getRowIterator()->seek(2)->current();
$rowIterator = $worksheet->getRowIterator();
$firstRow = $rowIterator->current();
$rowIterator->next();
if (!$rowIterator->valid()) {
return;
}
$secondRow = $rowIterator->current();
unset($rowIterator);
$this->getColumns($firstRow, $secondRow);

if (empty(array_filter($this->columns))) {
Expand Down Expand Up @@ -361,8 +380,32 @@ private function createRow(Row $row): void {

$value = $cell->getValue();
$hasData = $hasData || !empty($value);

if ($column->getType() === 'datetime') {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d H:i');
if ($column->getType() === 'datetime' && $column->getSubtype() === 'date') {
$format = 'Y-m-d';
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'time') {
$format = 'H:i';
} else {
$format = 'Y-m-d H:i';
}
try {
$value = Date::excelToDateTimeObject($value)->format($format);
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format($format);
}
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'date') {
try {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d');
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format('Y-m-d');
}
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'time') {
try {
$value = Date::excelToDateTimeObject($value)->format('H:i');
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format('H:i');
}
} elseif ($column->getType() === 'number' && $column->getNumberSuffix() === '%') {
$value = $value * 100;
} elseif ($column->getType() === 'selection' && $column->getSubtype() === 'check') {
Expand Down Expand Up @@ -414,6 +457,8 @@ private function getColumns(Row $firstRow, Row $secondRow): void {
$index = 0;
$countMatchingColumnsFromConfig = 0;
$countCreatedColumnsFromConfig = 0;
$lastCellWasEmpty = false;
$hasGapInTitles = false;
foreach ($cellIterator as $cell) {
if ($cell && $cell->getValue() !== null && $cell->getValue() !== '') {
$title = $cell->getValue();
Expand All @@ -437,14 +482,29 @@ private function getColumns(Row $firstRow, Row $secondRow): void {

// Convert data type to our data type
$dataTypes[] = $this->parseColumnDataType($secondRowCellIterator->current());
if ($lastCellWasEmpty) {
$hasGapInTitles = true;
}
$lastCellWasEmpty = false;
} else {
$this->logger->debug('No cell given or cellValue is empty while loading columns for importing');
if ($cell->getDataType() === 'null') {
// LibreOffice generated XLSX doc may have more empty columns in the first row.
// Continue without increasing error count, but leave a marker to detect gaps in titles.
$lastCellWasEmpty = true;
continue;
}
$this->countErrors++;
}
$secondRowCellIterator->next();
$index++;
}

if ($hasGapInTitles) {
$this->logger->info('Imported table is having a gap in column titles');
$this->countErrors++;
}

$this->rawColumnTitles = $titles;
$this->rawColumnDataTypes = $dataTypes;

Expand All @@ -468,9 +528,33 @@ private function parseColumnDataType(Cell $cell): array {
'subtype' => 'line',
];

if (Date::isDateTime($cell) || $originDataType === DataType::TYPE_ISO_DATE) {
try {
if ($value === false) {
throw new \Exception('We do not accept `false` here');
}
$dateValue = new \DateTimeImmutable($value);
} catch (\Exception) {
}

if (isset($dateValue)
|| Date::isDateTime($cell)
|| $originDataType === DataType::TYPE_ISO_DATE) {
// the formatted value stems from the office document and shows the original user intent
$dateAnalysis = date_parse($formattedValue);
$containsDate = $dateAnalysis['year'] !== false || $dateAnalysis['month'] !== false || $dateAnalysis['day'] !== false;
$containsTime = $dateAnalysis['hour'] !== false || $dateAnalysis['minute'] !== false || $dateAnalysis['second'] !== false;

if ($containsDate && !$containsTime) {
$subType = 'date';
} elseif (!$containsDate && $containsTime) {
$subType = 'time';
} else {
$subType = '';
}

$dataType = [
'type' => 'datetime',
'subtype' => $subType,
];
} elseif ($originDataType === DataType::TYPE_NUMERIC) {
if (str_contains($formattedValue, '%')) {
Expand Down Expand Up @@ -514,7 +598,10 @@ private function parseColumnDataType(Cell $cell): array {
'type' => 'number',
];
}
} elseif ($originDataType === DataType::TYPE_BOOL) {
} elseif ($originDataType === DataType::TYPE_BOOL
|| ($originDataType === DataType::TYPE_FORMULA
&& in_array($formattedValue, ['FALSE', 'TRUE'], true))
) {
$dataType = [
'type' => 'selection',
'subtype' => 'check',
Expand Down
2 changes: 1 addition & 1 deletion src/modules/modals/Import.vue
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
<p class="span">
{{ t('tables', 'Supported formats: xlsx, xls, csv, html, xml') }}
<br>
{{ t('tables', 'First row of the file must contain column headings.') }}
{{ t('tables', 'First row of the file must contain column headings without gaps.') }}
</p>
</div>
</RowFormWrapper>
Expand Down
51 changes: 28 additions & 23 deletions tests/integration/features/APIv1.feature
Original file line number Diff line number Diff line change
Expand Up @@ -187,31 +187,36 @@ Feature: APIv1
Then user deletes last created row
Then user "participant1" deletes table with keyword "Rows check"


@api1
Scenario: Import csv table
Given file "/import.csv" exists for user "participant1" with following data
| Col1 | Col2 | Col3 | num | emoji | special |
| Val1 | Val2 | Val3 | 1 | 💙 | Ä |
| great | news | here | 99 | ⚠️ | Ö |
Given table "Import test" with emoji "👨🏻‍💻" exists for user "participant1" as "base1"
When user imports file "/import.csv" into last created table
@api1 @import
Scenario Outline: Import a document file
Given user "participant1" uploads file "<importfile>"
And table "Import test" with emoji "👨🏻‍💻" exists for user "participant1" as "base1"
When user imports file "/<importfile>" into last created table
Then import results have the following data
| found_columns_count | 6 |
| created_columns_count | 6 |
| inserted_rows_count | 2 |
| errors_count | 0 |
Then table has at least following columns
| Col1 |
| Col2 |
| Col3 |
| num |
| emoji |
| special |
| found_columns_count | 10 |
| created_columns_count | 10 |
| inserted_rows_count | 2 |
| errors_count | 0 |
Then table has at least following typed columns
| Col1 | text |
| Col2 | text |
| Col3 | text |
| num | number |
| emoji | text |
| special | text |
| date | datetime |
| truth | selection |
Then table contains at least following rows
| Col1 | Col2 | Col3 | num | emoji | special |
| Val1 | Val2 | Val3 | 1 | 💙 | Ä |
| great | news | here | 99 | ⚠️ | Ö |
| Date and Time | Col1 | Col2 | Col3 | num | emoji | special | date | truth | time |
| 2022-02-20 08:42 | Val1 | Val2 | Val3 | 1 | 💙 | Ä | 2024-02-24 | false | 18:48 |
| 2016-06-01 13:37 | great | news | here | 99 | ⚠ | Ö | 2016-06-01 | true | 01:23 |

Examples:
| importfile |
| import-from-libreoffice.ods |
| import-from-libreoffice.xlsx |
| import-from-ms365.xlsx |
| import-from-libreoffice.csv |

@api1
Scenario: Create, edit and delete views
Expand Down
49 changes: 48 additions & 1 deletion tests/integration/features/bootstrap/FeatureContext.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Psr7\Utils;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\ExpectationFailedException;
use Psr\Http\Message\ResponseInterface;
Expand Down Expand Up @@ -64,6 +65,8 @@ class FeatureContext implements Context {
private array $tableData = [];
private array $viewData = [];

private $importColumnData = null;

// use CommandLineTrait;
private CollectionManager $collectionManager;

Expand All @@ -89,6 +92,7 @@ public function setUp() {
* @AfterScenario
*/
public function cleanupUsers() {
$this->importColumnData = null;
$this->collectionManager->cleanUp();
foreach ($this->createdUsers as $user) {
$this->deleteUser($user);
Expand Down Expand Up @@ -467,8 +471,21 @@ public function columnsForNodeV2(string $nodeType, string $nodeName, ?TableNode
// (((((((((((((((((((((((((((( END API v2 )))))))))))))))))))))))))))))))))))


/**
* @Given user :user uploads file :file
*/
public function uploadFile(string $user, string $file): void {
$this->setCurrentUser($user);

$localFilePath = __DIR__ . '/../../resources/' . $file;

$url = sprintf('%sremote.php/dav/files/%s/%s', $this->baseUrl, $user, $file);
$body = Utils::streamFor(fopen($localFilePath, 'rb'));

$this->sendRequestFullUrl('PUT', $url, $body);

Assert::assertEquals(201, $this->response->getStatusCode());
}

// IMPORT --------------------------

Expand Down Expand Up @@ -574,7 +591,7 @@ public function checkRowsExists(TableNode $table): void {
$allValuesForColumn[] = $row[$indexForCol];
}
foreach ($table->getColumn($key) as $item) {
Assert::assertTrue(in_array($item, $allValuesForColumn));
Assert::assertTrue(in_array($item, $allValuesForColumn), sprintf('%s not in %s', $item, implode(', ', $allValuesForColumn)));
}
}
}
Expand Down Expand Up @@ -1190,6 +1207,36 @@ public function tableColumns(?TableNode $body = null): void {
}
}

/**
* @Then table has at least following typed columns
*
* @param TableNode|null $body
*/
public function tableTypedColumns(?TableNode $body = null): void {
$this->sendRequest(
'GET',
'/apps/tables/api/1/tables/'.$this->tableId.'/columns'
);

$data = $this->getDataFromResponse($this->response);
Assert::assertEquals(200, $this->response->getStatusCode());

// check if no columns exists
if ($body === null) {
Assert::assertCount(0, $data);
return;
}

$colByTitle = [];
foreach ($data as $d) {
$colByTitle[$d['title']] = $d['type'];
}
foreach ($body->getRows() as $columnData) {
Assert::assertArrayHasKey($columnData[0], $colByTitle);
Assert::assertSame($columnData[1], $colByTitle[$columnData[0]], sprintf('Column "%s" has unexpected type "%s"', $columnData[0], $colByTitle[$columnData[0]]));
}
}

/**
* @Then user deletes last created column
*/
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/resources/import-from-libreoffice.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date and Time,Col1,Col2,Col3,num,emoji,special,date,truth,time
2022-02-20 08:42,Val1,Val2,Val3,1,💙,Ä,2024-02-24,false,18:48
2016-06-01 13:37,great,news,here,99,⚠,Ö,2016-06-01,true,01:23
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Binary file not shown.
Loading
Loading