From e57e3c98ef7fca5d44a935788bf01706dbc6e62a Mon Sep 17 00:00:00 2001 From: Brent Simmons Date: Sat, 30 Nov 2024 22:01:58 -0800 Subject: [PATCH] Use CacheControlInfo to pay attention to the Cache-Control http response header and drop requests that are made too soon. We need to be nice to servers. --- RSWeb/Sources/RSWeb/CacheControlInfo.swift | 63 ++++++++++++++++++++++ RSWeb/Sources/RSWeb/DownloadSession.swift | 35 ++++++++++-- 2 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 RSWeb/Sources/RSWeb/CacheControlInfo.swift diff --git a/RSWeb/Sources/RSWeb/CacheControlInfo.swift b/RSWeb/Sources/RSWeb/CacheControlInfo.swift new file mode 100644 index 000000000..c1da4f1ef --- /dev/null +++ b/RSWeb/Sources/RSWeb/CacheControlInfo.swift @@ -0,0 +1,63 @@ +// +// CacheControl.swift +// RSWeb +// +// Created by Brent Simmons on 11/30/24. +// + +import Foundation + +/// Basic Cache-Control handling — just the part we need, +/// which is to know when we got the response (dateCreated) +/// and when we can ask again (dateExpired). +public struct CacheControlInfo: Codable, Equatable { + + let dateCreated: Date + let maxAge: TimeInterval + + var dateExpired: Date { + dateCreated + maxAge + } + + public init?(urlResponse: HTTPURLResponse) { + guard let cacheControlValue = urlResponse.valueForHTTPHeaderField(HTTPResponseHeader.cacheControl) else { + return nil + } + self.init(value: cacheControlValue) + } + + /// Returns nil if there’s no max-age or it’s < 1. + public init?(value: String) { + + guard let maxAge = Self.parseMaxAge(value) else { + return nil + } + + let d = Date() + self.dateCreated = d + self.maxAge = maxAge + } +} + +private extension CacheControlInfo { + + static let maxAgePrefix = "max-age=" + static let maxAgePrefixCount = maxAgePrefix.count + + static func parseMaxAge(_ s: String) -> TimeInterval? { + + let components = s.components(separatedBy: ",") + let trimmedComponents = components.map { $0.trimmingCharacters(in: .whitespaces) } + + for component in trimmedComponents { + if component.hasPrefix(Self.maxAgePrefix) { + let maxAgeStringValue = component.dropFirst(maxAgePrefixCount) + if let timeInterval = TimeInterval(maxAgeStringValue), timeInterval > 0 { + return timeInterval + } + } + } + + return nil + } +} diff --git a/RSWeb/Sources/RSWeb/DownloadSession.swift b/RSWeb/Sources/RSWeb/DownloadSession.swift index 311d744f8..7224181c9 100755 --- a/RSWeb/Sources/RSWeb/DownloadSession.swift +++ b/RSWeb/Sources/RSWeb/DownloadSession.swift @@ -7,6 +7,7 @@ // import Foundation +import os // Create a DownloadSessionDelegate, then create a DownloadSession. // To download things: call download with a set of URLs. DownloadSession will call the various delegate methods. @@ -31,6 +32,7 @@ public protocol DownloadSessionDelegate { private let delegate: DownloadSessionDelegate private var redirectCache = [URL: URL]() private var queue = [URL]() + private var cacheControlResponses = [URL: CacheControlInfo]() // 429 Too Many Requests responses private var retryAfterMessages = [String: HTTPResponse429]() @@ -128,9 +130,10 @@ extension DownloadSession: URLSessionDataDelegate { tasksInProgress.insert(dataTask) tasksPending.remove(dataTask) - - if let info = infoForTask(dataTask) { - info.urlResponse = response + + let taskInfo = infoForTask(dataTask) + if let taskInfo { + taskInfo.urlResponse = response } if !response.statusIsOK { @@ -149,6 +152,15 @@ extension DownloadSession: URLSessionDataDelegate { return } + if let httpURLResponse = response as? HTTPURLResponse, let cacheControlInfo = CacheControlInfo(urlResponse: httpURLResponse) { + if let url = taskInfo?.url { + cacheControlResponses[url] = cacheControlInfo + if let actualURL = response.url, actualURL != url { + cacheControlResponses[actualURL] = cacheControlInfo + } + } + } + addDataTaskFromQueueIfNecessary() completionHandler(.allow) } @@ -182,9 +194,15 @@ private extension DownloadSession { let urlToUse = cachedRedirect(for: url) ?? url if requestShouldBeDroppedDueToActive429(urlToUse) { + os_log(.debug, "Dropping request for previous 429: \(urlToUse)") return } if requestShouldBeDroppedDueToPrevious400(urlToUse) { + os_log(.debug, "Dropping request for previous 400-499: \(urlToUse)") + return + } + if requestShouldBeDroppedDueToCacheControl(urlToUse) { + os_log(.debug, "Dropping request for Cache-Control reasons: \(urlToUse)") return } @@ -380,6 +398,17 @@ private extension DownloadSession { return false } + + // MARK: - Cache-Control responses + + func requestShouldBeDroppedDueToCacheControl(_ url: URL) -> Bool { + + guard let cacheControlInfo = cacheControlResponses[url] else { + return false + } + + return cacheControlInfo.dateExpired > Date() + } } extension URLSessionTask {