Skip to content

Commit

Permalink
Merge pull request #41327 from nextcloud/dont-reuse-metadata-unscanned
Browse files Browse the repository at this point in the history
dont reuse etag for folders marked explicitly unscanned
  • Loading branch information
icewind1991 authored Jan 31, 2024
2 parents 02e9cd6 + b777304 commit aff861f
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 8 deletions.
38 changes: 31 additions & 7 deletions lib/private/Files/Cache/Scanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,9 @@ public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData =
$fileId = $cacheData['fileid'];
$data['fileid'] = $fileId;
// only reuse data if the file hasn't explicitly changed
if (isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
$mtimeUnchanged = isset($data['storage_mtime']) && isset($cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime'];
// if the folder is marked as unscanned, never reuse etags
if ($mtimeUnchanged && $cacheData['size'] !== -1) {
$data['mtime'] = $cacheData['mtime'];
if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
$data['size'] = $cacheData['size'];
Expand All @@ -220,6 +222,11 @@ public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData =

// Only update metadata that has changed
$newData = array_diff_assoc($data, $cacheData->getData());

// make it known to the caller that etag has been changed and needs propagation
if (isset($newData['etag'])) {
$data['etag_changed'] = true;
}
} else {
// we only updated unencrypted_size if it's already set
unset($data['unencrypted_size']);
Expand Down Expand Up @@ -388,16 +395,20 @@ protected function getExistingChildren($folderId) {
* @param int|float $oldSize the size of the folder before (re)scanning the children
* @return int|float the size of the scanned folder or -1 if the size is unknown at this stage
*/
protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize) {
protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize, &$etagChanged = false) {
if ($reuse === -1) {
$reuse = ($recursive === self::SCAN_SHALLOW) ? self::REUSE_ETAG | self::REUSE_SIZE : self::REUSE_ETAG;
}
$this->emit('\OC\Files\Cache\Scanner', 'scanFolder', [$path, $this->storageId]);
$size = 0;
$childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size);
$childQueue = $this->handleChildren($path, $recursive, $reuse, $folderId, $lock, $size, $etagChanged);

foreach ($childQueue as $child => [$childId, $childSize]) {
$childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize);
// "etag changed" propagates up, but not down, so we pass `false` to the children even if we already know that the etag of the current folder changed
$childEtagChanged = false;
$childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock, $childSize, $childEtagChanged);
$etagChanged |= $childEtagChanged;

if ($childSize === -1) {
$size = -1;
} elseif ($size !== -1) {
Expand All @@ -410,8 +421,17 @@ protected function scanChildren(string $path, $recursive, int $reuse, int $folde
if ($this->storage->instanceOfStorage(Encryption::class)) {
$this->cache->calculateFolderSize($path);
} else {
if ($this->cacheActive && $oldSize !== $size) {
$this->cache->update($folderId, ['size' => $size]);
if ($this->cacheActive) {
$updatedData = [];
if ($oldSize !== $size) {
$updatedData['size'] = $size;
}
if ($etagChanged) {
$updatedData['etag'] = uniqid();
}
if ($updatedData) {
$this->cache->update($folderId, $updatedData);
}
}
}
$this->emit('\OC\Files\Cache\Scanner', 'postScanFolder', [$path, $this->storageId]);
Expand All @@ -421,7 +441,7 @@ protected function scanChildren(string $path, $recursive, int $reuse, int $folde
/**
* @param bool|IScanner::SCAN_RECURSIVE_INCOMPLETE $recursive
*/
private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size): array {
private function handleChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float &$size, bool &$etagChanged): array {
// we put this in it's own function so it cleans up the memory before we start recursing
$existingChildren = $this->getExistingChildren($folderId);
$newChildren = iterator_to_array($this->storage->getDirectoryContent($path));
Expand Down Expand Up @@ -469,6 +489,10 @@ private function handleChildren(string $path, $recursive, int $reuse, int $folde
} elseif ($size !== -1) {
$size += $data['size'];
}

if (isset($data['etag_changed']) && $data['etag_changed']) {
$etagChanged = true;
}
}
} catch (Exception $ex) {
// might happen if inserting duplicate while a scanning
Expand Down
2 changes: 1 addition & 1 deletion lib/private/Files/ObjectStore/ObjectStoreScanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public function scan($path, $recursive = self::SCAN_RECURSIVE, $reuse = -1, $loc
return [];
}

protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize) {
protected function scanChildren(string $path, $recursive, int $reuse, int $folderId, bool $lock, int|float $oldSize, &$etagChanged = false) {
return 0;
}

Expand Down
44 changes: 44 additions & 0 deletions tests/lib/Files/Cache/ScannerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -404,4 +404,48 @@ public function dataTestIsPartialFile() {
['/sub/folder/foo.txt', false],
];
}

public function testNoETagUnscannedFolder() {
$this->fillTestFolders();

$this->scanner->scan('');

$oldFolderEntry = $this->cache->get('folder');
// create a new file in a folder by keeping the mtime unchanged, but mark the folder as unscanned
$this->storage->file_put_contents('folder/new.txt', 'foo');
$this->storage->touch('folder', $oldFolderEntry->getMTime());
$this->cache->update($oldFolderEntry->getId(), ['size' => -1]);

$this->scanner->scan('');

$this->cache->inCache('folder/new.txt');

$newFolderEntry = $this->cache->get('folder');
$this->assertNotEquals($newFolderEntry->getEtag(), $oldFolderEntry->getEtag());
}

public function testNoETagUnscannedSubFolder() {
$this->fillTestFolders();
$this->storage->mkdir('folder/sub');

$this->scanner->scan('');

$oldFolderEntry1 = $this->cache->get('folder');
$oldFolderEntry2 = $this->cache->get('folder/sub');
// create a new file in a folder by keeping the mtime unchanged, but mark the folder as unscanned
$this->storage->file_put_contents('folder/sub/new.txt', 'foo');
$this->storage->touch('folder/sub', $oldFolderEntry1->getMTime());

// we only mark the direct parent as unscanned, which is the current "notify" behavior
$this->cache->update($oldFolderEntry2->getId(), ['size' => -1]);

$this->scanner->scan('');

$this->cache->inCache('folder/new.txt');

$newFolderEntry1 = $this->cache->get('folder');
$this->assertNotEquals($newFolderEntry1->getEtag(), $oldFolderEntry1->getEtag());
$newFolderEntry2 = $this->cache->get('folder/sub');
$this->assertNotEquals($newFolderEntry2->getEtag(), $oldFolderEntry2->getEtag());
}
}

0 comments on commit aff861f

Please sign in to comment.