Skip to content

Commit

Permalink
Merge pull request #121 from utopia-php/fix-resumable-upload-backport
Browse files Browse the repository at this point in the history
S3 retry backported to
  • Loading branch information
christyjacob4 authored Nov 28, 2024
2 parents 893ccf0 + 833d429 commit 0d9228f
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 10 deletions.
12 changes: 9 additions & 3 deletions src/Storage/Device/Local.php
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,14 @@ public function upload(string $source, string $path, int $chunk = 1, int $chunks
$tmp = \dirname($path).DIRECTORY_SEPARATOR.'tmp_'.\basename($path).DIRECTORY_SEPARATOR.\basename($path).'_chunks.log';

$this->createDirectory(\dirname($tmp));
if (! file_put_contents($tmp, "$chunk\n", FILE_APPEND)) {
throw new Exception('Can\'t write chunk log '.$tmp);

$chunkFilePath = dirname($tmp).DIRECTORY_SEPARATOR.pathinfo($path, PATHINFO_FILENAME).'.part.'.$chunk;

// skip writing chunk if the chunk was re-uploaded
if (! file_exists($chunkFilePath)) {
if (! file_put_contents($tmp, "$chunk\n", FILE_APPEND)) {
throw new Exception('Can\'t write chunk log '.$tmp);
}
}

$chunkLogs = file($tmp);
Expand All @@ -106,7 +112,7 @@ public function upload(string $source, string $path, int $chunk = 1, int $chunks

$chunksReceived = count(file($tmp));

if (! \rename($source, dirname($tmp).DIRECTORY_SEPARATOR.pathinfo($path, PATHINFO_FILENAME).'.part.'.$chunk)) {
if (! \rename($source, $chunkFilePath)) {
throw new Exception('Failed to write chunk '.$chunk);
}

Expand Down
14 changes: 9 additions & 5 deletions src/Storage/Device/S3.php
Original file line number Diff line number Diff line change
Expand Up @@ -315,11 +315,15 @@ public function uploadData(string $data, string $path, string $contentType, int
$metadata['uploadId'] = $uploadId;
}

$etag = $this->uploadPart($data, $path, $contentType, $chunk, $uploadId);
$metadata['parts'] ??= [];
$metadata['parts'][] = ['partNumber' => $chunk, 'etag' => $etag];
$metadata['chunks'] ??= 0;
$metadata['chunks']++;

$etag = $this->uploadPart($data, $path, $contentType, $chunk, $uploadId);
// skip incrementing if the chunk was re-uploaded
if (! array_key_exists($chunk, $metadata['parts'])) {
$metadata['chunks']++;
}
$metadata['parts'][$chunk] = $etag;
if ($metadata['chunks'] == $chunks) {
$this->completeMultipartUpload($path, $uploadId, $metadata['parts']);
}
Expand Down Expand Up @@ -430,8 +434,8 @@ protected function completeMultipartUpload(string $path, string $uploadId, array
$uri = $path !== '' ? '/'.\str_replace(['%2F', '%3F'], ['/', '?'], \rawurlencode($path)) : '/';

$body = '<CompleteMultipartUpload>';
foreach ($parts as $part) {
$body .= "<Part><ETag>{$part['etag']}</ETag><PartNumber>{$part['partNumber']}</PartNumber></Part>";
foreach ($parts as $key => $etag) {
$body .= "<Part><ETag>{$etag}</ETag><PartNumber>{$key}</PartNumber></Part>";
}
$body .= '</CompleteMultipartUpload>';

Expand Down
54 changes: 54 additions & 0 deletions tests/Storage/Device/LocalTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,60 @@ public function testPartUpload()
return $dest;
}

public function testPartUploadRetry()
{
$source = __DIR__.'/../../resources/disk-a/large_file.mp4';
$dest = $this->object->getPath('uploaded2.mp4');
$totalSize = \filesize($source);
// AWS S3 requires each part to be at least 5MB except for last part
$chunkSize = 5 * 1024 * 1024;

$chunks = ceil($totalSize / $chunkSize);

$chunk = 1;
$start = 0;
$handle = @fopen($source, 'rb');
$op = __DIR__.'/chunkx.part';
while ($start < $totalSize) {
$contents = fread($handle, $chunkSize);
$op = __DIR__.'/chunkx.part';
$cc = fopen($op, 'wb');
fwrite($cc, $contents);
fclose($cc);
$this->object->upload($op, $dest, $chunk, $chunks);
$start += strlen($contents);
$chunk++;
if ($chunk == 2) {
break;
}
fseek($handle, $start);
}
@fclose($handle);

$chunk = 1;
$start = 0;
// retry from first to make sure duplicate chunk re-upload works without issue
$handle = @fopen($source, 'rb');
$op = __DIR__.'/chunkx.part';
while ($start < $totalSize) {
$contents = fread($handle, $chunkSize);
$op = __DIR__.'/chunkx.part';
$cc = fopen($op, 'wb');
fwrite($cc, $contents);
fclose($cc);
$this->object->upload($op, $dest, $chunk, $chunks);
$start += strlen($contents);
$chunk++;
fseek($handle, $start);
}
@fclose($handle);

$this->assertEquals(\filesize($source), $this->object->getFileSize($dest));
$this->assertEquals(\md5_file($source), $this->object->getFileHash($dest));

return $dest;
}

public function testAbort()
{
$source = __DIR__.'/../../resources/disk-a/large_file.mp4';
Expand Down
72 changes: 70 additions & 2 deletions tests/Storage/S3Base.php
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,76 @@ public function testPartUpload()
$cc = fopen($op, 'wb');
fwrite($cc, $contents);
fclose($cc);
$etag = $this->object->upload($op, $dest, $chunk, $chunks, $metadata);
$parts[] = ['partNumber' => $chunk, 'etag' => $etag];
$this->object->upload($op, $dest, $chunk, $chunks, $metadata);
$start += strlen($contents);
$chunk++;
fseek($handle, $start);
}
@fclose($handle);
unlink($op);

$this->assertEquals(\filesize($source), $this->object->getFileSize($dest));

// S3 doesnt provide a method to get a proper MD5-hash of a file created using multipart upload
// https://stackoverflow.com/questions/8618218/amazon-s3-checksum
// More info on how AWS calculates ETag for multipart upload here
// https://savjee.be/2015/10/Verifying-Amazon-S3-multi-part-uploads-with-ETag-hash/
// TODO
// $this->assertEquals(\md5_file($source), $this->object->getFileHash($dest));
// $this->object->delete($dest);
return $dest;
}

public function testPartUploadRetry()
{
$source = __DIR__.'/../resources/disk-a/large_file.mp4';
$dest = $this->object->getPath('uploaded.mp4');
$totalSize = \filesize($source);
// AWS S3 requires each part to be at least 5MB except for last part
$chunkSize = 5 * 1024 * 1024;

$chunks = ceil($totalSize / $chunkSize);

$chunk = 1;
$start = 0;

$metadata = [
'parts' => [],
'chunks' => 0,
'uploadId' => null,
'content_type' => \mime_content_type($source),
];
$handle = @fopen($source, 'rb');
$op = __DIR__.'/chunk.part';
while ($start < $totalSize) {
$contents = fread($handle, $chunkSize);
$op = __DIR__.'/chunk.part';
$cc = fopen($op, 'wb');
fwrite($cc, $contents);
fclose($cc);
$this->object->upload($op, $dest, $chunk, $chunks, $metadata);
$start += strlen($contents);
$chunk++;
if ($chunk == 2) {
break;
}
fseek($handle, $start);
}
@fclose($handle);
unlink($op);

$chunk = 1;
$start = 0;
// retry from first to make sure duplicate chunk re-upload works without issue
$handle = @fopen($source, 'rb');
$op = __DIR__.'/chunk.part';
while ($start < $totalSize) {
$contents = fread($handle, $chunkSize);
$op = __DIR__.'/chunk.part';
$cc = fopen($op, 'wb');
fwrite($cc, $contents);
fclose($cc);
$this->object->upload($op, $dest, $chunk, $chunks, $metadata);
$start += strlen($contents);
$chunk++;
fseek($handle, $start);
Expand Down

0 comments on commit 0d9228f

Please sign in to comment.