-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from UCL/devel
From Devel to prepare first release
- Loading branch information
Showing
32 changed files
with
48,927 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,56 @@ | ||
# takeout-app-macos | ||
# Takeout Filter | ||
|
||
MacOS desktop application for processing Google Takeout export files. | ||
|
||
![GitHub release](https://img.shields.io/github/release/UCL/takeout-app-macos.svg) | ||
|
||
## Installation | ||
|
||
Download and move to `/Applications` folder. It runs on MacOS Monterey 12.4. | ||
|
||
## Usage | ||
|
||
The application extracts and filters search activity data from Google Takeout exports. All exports must be present in a single folder. | ||
|
||
### Input files | ||
|
||
The application accepts Takeout export files in ZIP format, where the MyActivity report is in either JSON or HTML format. | ||
|
||
#### Takeout files | ||
|
||
All Takeout exports **must** be ZIP files in the format `[ID].zip`, where ID is an integer. | ||
|
||
#### Catalogue file | ||
|
||
It is a CSV file containing the dates of presentation and the names to be filtered off the Takeout export. | ||
The CSV files **must** have the following format: | ||
|
||
``` | ||
ID,DateOfPresentation,NamesToFilter | ||
1001,2022-07-28,Forename Surname | ||
1002,2022-07-29,Forename Surname | ||
``` | ||
|
||
The CSV file **must** include a header in its first line, and the date of presentation **must** be in the format `YYYY-MM-DD`. | ||
|
||
### Output files | ||
|
||
The application will generate two csv files per ID. All output files will be saved in the `TakeoutFilter` directory created in the output folder. This folder can then be zipped and shared: | ||
|
||
- Aggregates: Named `[id]-aggregates.csv`, it contains the date of the first query, and the total number of queries before the filtering. | ||
|
||
- Queries: Named `[id]-queries.csv`, it contains the list of health related queries after the filtering process, and their timestamp. | ||
|
||
|
||
## Reporting bugs | ||
|
||
Please use the Github issue tracker for any bugs or feature suggestions: | ||
|
||
[https://github.com/UCL/takeout-app-macos/issues](https://github.com/UCL/takeout-app-macos/issues) | ||
|
||
|
||
## Authors | ||
|
||
- David Guzman (Github: [@david-guzman](https://github.com/david-guzman)) | ||
|
||
Except `porterstemmer_ansi_thread_safe.c`, The Porter Stemming Algorithm, by Martin Porter [https://tartarus.org/martin/PorterStemmer/](https://tartarus.org/martin/PorterStemmer/) |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// | ||
// DataAccess.swift | ||
// TakeoutFilter | ||
// | ||
// Created by David Guzman on 09/07/2022. | ||
// | ||
|
||
import Foundation | ||
|
||
class DataAccess { | ||
|
||
private let dataSource: DataSource | ||
|
||
init?() { | ||
do { | ||
dataSource = try DataSource.openDatabase() | ||
} catch { | ||
return nil | ||
} | ||
} | ||
|
||
func hasTerm(_ term: String) throws -> Bool { | ||
return try dataSource.selectTrueWhereTerm(term: term) | ||
} | ||
|
||
func hasTermStemmed(_ stem: String) throws -> Bool { | ||
return try dataSource.selectTrueWhereStem(stem: stem) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
// | ||
// DataSource.swift | ||
// TakeoutFilter | ||
// | ||
// Created by David Guzman on 09/07/2022. | ||
// | ||
|
||
import Foundation | ||
import SQLite3 | ||
|
||
class DataSource { | ||
|
||
private static let databaseFile: String = "TakeoutFilter" | ||
|
||
private let databasePointer: OpaquePointer? | ||
|
||
fileprivate var errorMessage: String { | ||
if let errorPointer = sqlite3_errmsg(databasePointer) { | ||
let errorMessage = String(cString: errorPointer) | ||
return errorMessage | ||
} else { | ||
return "No error message returned by data source" | ||
} | ||
} | ||
|
||
private init(_ dbPointer: OpaquePointer?) { | ||
self.databasePointer = dbPointer | ||
} | ||
|
||
static func openDatabase() throws -> DataSource { | ||
var db: OpaquePointer? | ||
guard let dbFileUrl = Bundle.main.url( | ||
forResource: databaseFile, | ||
withExtension: "sqlite" | ||
) else { | ||
throw DataSourceError.Open(message: "Cannot find database file") | ||
} | ||
if (sqlite3_open(dbFileUrl.path, &db) == SQLITE_OK) { | ||
return DataSource(db) | ||
} else { | ||
defer { | ||
if db != nil { | ||
sqlite3_close(db) | ||
} | ||
} | ||
if let errorPointer = sqlite3_errmsg(db) { | ||
let message = String(cString: errorPointer) | ||
throw DataSourceError.Open(message: message) | ||
} else { | ||
throw DataSourceError.Open(message: "Failed without an error message") | ||
} | ||
} | ||
} | ||
|
||
deinit { | ||
sqlite3_close(databasePointer) | ||
} | ||
} | ||
|
||
extension DataSource { | ||
|
||
private func prepareStatement(statement: String) throws -> OpaquePointer? { | ||
var stmt: OpaquePointer? | ||
guard sqlite3_prepare_v2(databasePointer, statement, -1, &stmt, nil) == SQLITE_OK else { | ||
throw DataSourceError.Prepare(message: errorMessage) | ||
} | ||
return stmt | ||
} | ||
|
||
func selectTrueWhereTerm(term: String) throws -> Bool { | ||
let querySql = "SELECT EXISTS (SELECT 1 FROM MEDICAL_TERMS WHERE TERM = ?);" | ||
guard let queryStmt = try prepareStatement(statement: querySql) else { | ||
return false | ||
} | ||
defer { | ||
sqlite3_finalize(queryStmt) | ||
} | ||
guard sqlite3_bind_text(queryStmt, 1, term, -1, nil) == SQLITE_OK else { | ||
throw DataSourceError.Bind(message: "Failed to bind String to statement") | ||
} | ||
guard sqlite3_step(queryStmt) == SQLITE_ROW else { | ||
throw DataSourceError.Step(message: "Failed to run query and return row") | ||
} | ||
return sqlite3_column_int(queryStmt, 0) == 1 | ||
} | ||
|
||
func selectTrueWhereStem(stem: String) throws -> Bool { | ||
let querySql = "SELECT EXISTS (SELECT 1 FROM MEDICAL_TERM_STEMS WHERE STEM = ?);" | ||
guard let queryStmt = try prepareStatement(statement: querySql) else { | ||
return false | ||
} | ||
defer { | ||
sqlite3_finalize(queryStmt) | ||
} | ||
guard sqlite3_bind_text(queryStmt, 1, stem, -1, nil) == SQLITE_OK else { | ||
throw DataSourceError.Bind(message: "Failed to bind String to statement") | ||
} | ||
guard sqlite3_step(queryStmt) == SQLITE_ROW else { | ||
throw DataSourceError.Step(message: "Failed to run query and return row") | ||
} | ||
return sqlite3_column_int(queryStmt, 0) == 1 | ||
} | ||
|
||
} | ||
|
||
enum DataSourceError: Error { | ||
case Open(message: String) | ||
case Prepare(message: String) | ||
case Step(message: String) | ||
case Bind(message: String) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
// | ||
// Catalogue.swift | ||
// TakeoutFilter | ||
// | ||
// Created by David Guzman on 27/06/2022. | ||
// | ||
|
||
import Foundation | ||
|
||
struct Catalogue { | ||
|
||
private var catalogueUrl: URL | ||
|
||
private var iso8601DateFormatter: ISO8601DateFormatter = ISO8601DateFormatter() | ||
|
||
init(catalogue: URL) { | ||
self.catalogueUrl = catalogue | ||
// Configure formatter to not to expect time part. Expected string in YYYY-MM-DD format. | ||
self.iso8601DateFormatter.formatOptions = .withFullDate | ||
} | ||
|
||
func entries() -> [Entry] { | ||
var entries: [Entry] = [] | ||
do { | ||
let stringContent = try String(contentsOfFile: catalogueUrl.path) | ||
let stringLines: [String] = stringContent.components(separatedBy: "\n").filter(){$0 != ""} | ||
for index in 1 ..< stringLines.count { | ||
let line: [String] = stringLines[index].components(separatedBy: ",") | ||
let intId = Int(line[0]) ?? 0 | ||
if (intId != 0) { | ||
let datePresentation: Date? = iso8601DateFormatter.date(from: line[1]) | ||
guard let datePresentation = datePresentation else { | ||
throw DateParsingError("Cannot parse date \(line[1])") | ||
} | ||
let e: Entry = Entry(id: intId, dateOfPresentation: datePresentation, namesToFilter: line[2]) | ||
entries.append(e) | ||
} | ||
} | ||
return entries | ||
} catch { | ||
return entries | ||
} | ||
} | ||
|
||
func extractIds() -> [Int] { | ||
var ids: [Int] = [] | ||
do { | ||
let stringContent = try String(contentsOfFile: catalogueUrl.path) | ||
let stringLines: [String] = stringContent.components(separatedBy: "\n").filter(){$0 != ""} | ||
for index in 1 ..< stringLines.count { | ||
let line: [String] = stringLines[index].components(separatedBy: ",") | ||
let intId = Int(line[0]) ?? 0 | ||
if (intId != 0) { | ||
ids.append(intId) | ||
} | ||
} | ||
return ids | ||
} catch { | ||
return ids | ||
} | ||
} | ||
|
||
struct Entry: Equatable { | ||
|
||
private var id: Int | ||
|
||
private var dateOfPresentation: Date | ||
|
||
private var namesToFilter: String | ||
|
||
init(id: Int, dateOfPresentation: Date, namesToFilter: String) { | ||
self.id = id | ||
self.dateOfPresentation = dateOfPresentation | ||
self.namesToFilter = namesToFilter | ||
} | ||
|
||
func getId() -> Int { | ||
return id | ||
} | ||
|
||
func getNamesToFilter() -> String { | ||
return namesToFilter | ||
} | ||
|
||
func getDateOfPresentation() -> Date { | ||
return dateOfPresentation | ||
} | ||
} | ||
|
||
struct DateParsingError: Error { | ||
|
||
private let message: String | ||
|
||
init(_ message: String) { | ||
self.message = message | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// | ||
// CsvWriter.swift | ||
// TakeoutFilter | ||
// | ||
// Created by David Guzman on 07/07/2022. | ||
// | ||
|
||
import Foundation | ||
|
||
enum CsvError: Error { | ||
case outputBaseDirNotFound | ||
case outputDirNotFound | ||
case outputFileNotFound | ||
} | ||
|
||
class CsvWriter { | ||
|
||
private let aggregatesHeader: String = "Total_Number_Of_Queries,First_Query_Date" | ||
private let aggregatesSuffix: String = "-aggregates.csv" | ||
private let queriesHeader: String = "Date,Query" | ||
private let queriesSuffix: String = "-queries.csv" | ||
private let newline: String = "\n" | ||
private let outputDir: String = "TakeoutFilter" | ||
private var outputDirUrl: URL? | ||
private let fileManager: FileManager = FileManager.default | ||
|
||
private func getAggregatesFileName(_ id: Int) -> String { | ||
return "\(id)\(aggregatesSuffix)" | ||
} | ||
|
||
private func getQueriesFileName(_ id: Int) -> String { | ||
return "\(id)\(queriesSuffix)" | ||
} | ||
|
||
private func writeString(fileName: String, csvContent: String) throws { | ||
guard let output = outputDirUrl else { | ||
throw CsvError.outputDirNotFound | ||
} | ||
let csvUrl: URL = output.appendingPathComponent(fileName) | ||
try csvContent.write(to: csvUrl, atomically: true, encoding: .utf8) | ||
} | ||
|
||
func configureOutput(_ url: URL) throws { | ||
outputDirUrl = url.appendingPathComponent(outputDir) | ||
guard let output = outputDirUrl else { | ||
throw CsvError.outputBaseDirNotFound | ||
} | ||
if fileManager.fileExists(atPath: output.path) { | ||
try fileManager.removeItem(at: output) | ||
} | ||
try fileManager.createDirectory(at: output, withIntermediateDirectories: true, attributes: nil) | ||
} | ||
|
||
func writeAggregates(id: Int, totalNumberQueries: Int, firstQueryDate: Date) throws { | ||
let csvString = aggregatesHeader.appending(newline) | ||
.appending("\(totalNumberQueries),\(firstQueryDate)") | ||
.appending(newline) | ||
try writeString(fileName: getAggregatesFileName(id), csvContent: csvString) | ||
} | ||
|
||
func writeQueries(id: Int, queries: [Query]) throws { | ||
var csvString = queriesHeader.appending(newline) | ||
for q in queries { | ||
csvString = csvString.appending("\(q.date),\(q.query)") | ||
.appending(newline) | ||
} | ||
try writeString(fileName: getQueriesFileName(id), csvContent: csvString) | ||
} | ||
} |
Oops, something went wrong.