Skip to content

Commit

Permalink
Split into two ranges to read messages if offsetInFileOfOldestMessage…
Browse files Browse the repository at this point in the history
… > offsetInFileAtEndOfNewestMessage to avoid possible loop (#72)

* Revert "Protect against crashes during header write (#66)"

This reverts commit 69881ef.

* Split two ranges to read messages if offsetInFileOfOldestMessage > offsetInFileAtEndOfNewestMessage to avoid possible loop

* bump version

* keep tests

* add comment

* add parameter comments

* support all the way back to Xcode 11

* Apply suggestions from code review

Co-authored-by: Dan Federman <dfed@me.com>

* Hanle cache file as empty when offsetInFileOfOldestMessage == offsetInFileAtEndOfNewestMessage

* set func nextEncodedMessage private

* handle offsetInFile > endOffset in func encodedMessagesFromOffset

Co-authored-by: Dan Federman <dfed@me.com>
  • Loading branch information
jianjunwoo and dfed authored Nov 10, 2022
1 parent 3a1ed6f commit 26a1c97
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 62 deletions.
2 changes: 1 addition & 1 deletion CacheAdvance.podspec
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Pod::Spec.new do |s|
s.name = 'CacheAdvance'
s.version = '1.2.4'
s.version = '1.2.5'
s.license = 'Apache License, Version 2.0'
s.summary = 'A performant cache for logging systems. CacheAdvance persists log events 30x faster than SQLite.'
s.homepage = 'https://github.com/dfed/CacheAdvance'
Expand Down
35 changes: 29 additions & 6 deletions Sources/CacheAdvance/CacheAdvance.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ public final class CacheAdvance<T: Codable> {
self.init(
fileURL: fileURL,
writer: try FileHandle(forWritingTo: fileURL),
reader: try CacheReader(
forReadingFrom: fileURL,
maximumBytes: maximumBytes),
reader: try CacheReader(forReadingFrom: fileURL),
header: try CacheHeaderHandle(
forReadingFrom: fileURL,
maximumBytes: maximumBytes,
Expand Down Expand Up @@ -172,10 +170,35 @@ public final class CacheAdvance<T: Codable> {
try header.checkFile()

var messages = [T]()
while let encodedMessage = try reader.nextEncodedMessage() {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}
if reader.offsetInFileOfOldestMessage < reader.offsetInFileAtEndOfNewestMessage {
// There is only one range: | `offsetInFileOfOldestMessage` -> `offsetInFileAtEndOfNewestMessage` |
let encodedMessages = try reader.encodedMessagesFromOffset(
reader.offsetInFileOfOldestMessage,
endOffset: reader.offsetInFileAtEndOfNewestMessage)
for encodedMessage in encodedMessages {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}
} else if reader.offsetInFileOfOldestMessage == reader.offsetInFileAtEndOfNewestMessage {
// This is an empty cache.
return []
} else {
// In this case, the messages could be split to two ranges
// | First Range | (GAP: ignore) | Second Range |

// This is second range: | `offsetInFileOfOldestMessage` -> EOF |
let olderMessages = try reader.encodedMessagesFromOffset(reader.offsetInFileOfOldestMessage)
for encodedMessage in olderMessages {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}

// This is first range: | `expectedEndOfHeaderInFile` -> `offsetInFileAtEndOfNewestMessage` |
let newerMessages = try reader.encodedMessagesFromOffset(
FileHeader.expectedEndOfHeaderInFile,
endOffset: reader.offsetInFileAtEndOfNewestMessage)
for encodedMessage in newerMessages {
messages.append(try decoder.decode(T.self, from: encodedMessage))
}
}
// Now that we've read all messages, seek back to the oldest message.
try reader.seekToBeginningOfOldestMessage()

Expand Down
88 changes: 40 additions & 48 deletions Sources/CacheAdvance/CacheReader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,9 @@ final class CacheReader {

/// Creates a new instance of the receiver.
///
/// - Parameters:
/// - file: The file URL indicating the desired location of the on-disk store. This file should already exist.
/// - maximumBytes: The maximum size of the cache, in bytes.
init(forReadingFrom file: URL, maximumBytes: Bytes) throws {
/// - Parameter file: The file URL indicating the desired location of the on-disk store. This file should already exist.
init(forReadingFrom file: URL) throws {
reader = try FileHandle(forReadingFrom: file)
self.maximumBytes = maximumBytes
}

deinit {
Expand All @@ -44,51 +41,27 @@ final class CacheReader {
reader.offsetInFile
}

/// Returns the next encodable message, seeking to the beginning of the next message.
func nextEncodedMessage() throws -> Data? {
let startingOffset = offsetInFile

guard startingOffset != offsetInFileAtEndOfNewestMessage else {
// We're at the last message.
return nil
}

switch try nextEncodedMessageSpan() {
case let .span(messageLength):
// Check our assumptions before we try to read the message.
let endOfMessage = startingOffset + UInt64(MessageSpan.storageLength) + UInt64(messageLength)
let startingOffsetIsBeforeEndOfNewestMessageAndDoesNotExceedEndOfNewestMessage = startingOffset < offsetInFileAtEndOfNewestMessage && endOfMessage <= offsetInFileAtEndOfNewestMessage
let startingOffsetIsAfterEndOfNewestMessageAndDoesNotExceedEndOfFile = offsetInFileAtEndOfNewestMessage < startingOffset && endOfMessage <= maximumBytes
guard
startingOffsetIsBeforeEndOfNewestMessageAndDoesNotExceedEndOfNewestMessage
|| startingOffsetIsAfterEndOfNewestMessageAndDoesNotExceedEndOfFile
else {
// The offsetInFileAtEndOfNewestMessage is incorrect. This likely occured due to a crash when writing our header file.
throw CacheAdvanceError.fileCorrupted
}

let message = try reader.readDataUp(toLength: Int(messageLength))
guard message.count > 0 else {
throw CacheAdvanceError.fileCorrupted
}

return message

case .emptyRead:
guard offsetInFileAtEndOfNewestMessage < startingOffset else {
// We started reading before the offset of the end of the newest message, therefore we expected a message to be read. We instead read an empty space, meaning that the file is corrupt.
throw CacheAdvanceError.fileCorrupted
/// Returns the encodable messages in a range
///
/// - Parameter startOffset: the offset from which to start reading
/// - Parameter endOffset: the offset at which to stop reading. If `nil`, the end offset will be the EOF
func encodedMessagesFromOffset(_ startOffset: UInt64, endOffset: UInt64? = nil) throws -> [Data] {
var encodedMessages = [Data]()
try reader.seek(to: startOffset)
while let data = try nextEncodedMessage() {
encodedMessages.append(data)
if let endOffset = endOffset {
if offsetInFile == endOffset {
break
} else if offsetInFile > endOffset {
throw CacheAdvanceError.fileCorrupted
}
}

// We know the next message is at the end of the file header. Let's seek to it.
try reader.seek(to: FileHeader.expectedEndOfHeaderInFile)

// We know there's a message to read now that we're at the start of the file.
return try nextEncodedMessage()

case .invalidFormat:
}
if let endOffset = endOffset, offsetInFile != endOffset {
throw CacheAdvanceError.fileCorrupted
}
return encodedMessages
}

/// Seeks to the beginning of the oldest message in the file.
Expand All @@ -114,6 +87,26 @@ final class CacheReader {

// MARK: Private

/// Returns the next encodable message, seeking to the beginning of the next message.
private func nextEncodedMessage() throws -> Data? {
switch try nextEncodedMessageSpan() {
case let .span(messageLength):
let message = try reader.readDataUp(toLength: Int(messageLength))
guard message.count > 0 else {
throw CacheAdvanceError.fileCorrupted
}

return message

case .emptyRead:
// An empty read means we hit the EOF. It is the responsibility of the calling code to validate this assumption.
return nil

case .invalidFormat:
throw CacheAdvanceError.fileCorrupted
}
}

/// Returns the next encoded message span, seeking to the end the span.
private func nextEncodedMessageSpan() throws -> NextMessageSpan {
let messageSizeData = try reader.readDataUp(toLength: MessageSpan.storageLength)
Expand All @@ -132,7 +125,6 @@ final class CacheReader {
}

private let reader: FileHandle
private let maximumBytes: Bytes

}

Expand Down
11 changes: 4 additions & 7 deletions Tests/CacheAdvanceTests/CacheAdvanceTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS"BASIS,
Expand Down Expand Up @@ -110,8 +110,7 @@ final class CacheAdvanceTests: XCTestCase {
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
forReadingFrom: testFileLocation),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
Expand Down Expand Up @@ -147,8 +146,7 @@ final class CacheAdvanceTests: XCTestCase {
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
forReadingFrom: testFileLocation),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
Expand Down Expand Up @@ -184,8 +182,7 @@ final class CacheAdvanceTests: XCTestCase {
fileURL: testFileLocation,
writer: try FileHandle(forWritingTo: testFileLocation),
reader: try CacheReader(
forReadingFrom: testFileLocation,
maximumBytes: maximumBytes),
forReadingFrom: testFileLocation),
header: header,
decoder: JSONDecoder(),
encoder: JSONEncoder())
Expand Down

0 comments on commit 26a1c97

Please sign in to comment.