Skip to content

Commit

Permalink
Merge pull request #1 from UCL/devel
Browse files Browse the repository at this point in the history
From Devel to prepare first release
  • Loading branch information
david-guzman authored Jul 28, 2022
2 parents 82b95ee + 60cd203 commit 449baff
Show file tree
Hide file tree
Showing 32 changed files with 48,927 additions and 49 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,4 @@ iOSInjectionProject/
**/xcshareddata/WorkspaceSettings.xcsettings

# End of https://www.toptal.com/developers/gitignore/api/macos,xcode,swift

TakeoutFilter/Resources
57 changes: 56 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,56 @@
# takeout-app-macos
# Takeout Filter

MacOS desktop application for processing Google Takeout export files.

![GitHub release](https://img.shields.io/github/release/UCL/takeout-app-macos.svg)

## Installation

Download and move to `/Applications` folder. It runs on MacOS Monterey 12.4.

## Usage

The application extracts and filters search activity data from Google Takeout exports. All exports must be present in a single folder.

### Input files

The application accepts Takeout export files in ZIP format, where the MyActivity report is in either JSON or HTML format.

#### Takeout files

All Takeout exports **must** be ZIP files in the format `[ID].zip`, where ID is an integer.

#### Catalogue file

It is a CSV file containing the dates of presentation and the names to be filtered off the Takeout export.
The CSV files **must** have the following format:

```
ID,DateOfPresentation,NamesToFilter
1001,2022-07-28,Forename Surname
1002,2022-07-29,Forename Surname
```

The CSV file **must** include a header in its first line, and the date of presentation **must** be in the format `YYYY-MM-DD`.

### Output files

The application will generate two csv files per ID. All output files will be saved in the `TakeoutFilter` directory created in the output folder. This folder can then be zipped and shared:

- Aggregates: Named `[id]-aggregates.csv`, it contains the date of the first query, and the total number of queries before the filtering.

- Queries: Named `[id]-queries.csv`, it contains the list of health related queries after the filtering process, and their timestamp.


## Reporting bugs

Please use the Github issue tracker for any bugs or feature suggestions:

[https://github.com/UCL/takeout-app-macos/issues](https://github.com/UCL/takeout-app-macos/issues)


## Authors

- David Guzman (Github: [@david-guzman](https://github.com/david-guzman))

Except `porterstemmer_ansi_thread_safe.c`, The Porter Stemming Algorithm, by Martin Porter [https://tartarus.org/martin/PorterStemmer/](https://tartarus.org/martin/PorterStemmer/)
174 changes: 169 additions & 5 deletions TakeoutFilter.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

30 changes: 30 additions & 0 deletions TakeoutFilter/Data/DataAccess.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//
// DataAccess.swift
// TakeoutFilter
//
// Created by David Guzman on 09/07/2022.
//

import Foundation

class DataAccess {

private let dataSource: DataSource

init?() {
do {
dataSource = try DataSource.openDatabase()
} catch {
return nil
}
}

func hasTerm(_ term: String) throws -> Bool {
return try dataSource.selectTrueWhereTerm(term: term)
}

func hasTermStemmed(_ stem: String) throws -> Bool {
return try dataSource.selectTrueWhereStem(stem: stem)
}

}
111 changes: 111 additions & 0 deletions TakeoutFilter/Data/DataSource.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
//
// DataSource.swift
// TakeoutFilter
//
// Created by David Guzman on 09/07/2022.
//

import Foundation
import SQLite3

class DataSource {

private static let databaseFile: String = "TakeoutFilter"

private let databasePointer: OpaquePointer?

fileprivate var errorMessage: String {
if let errorPointer = sqlite3_errmsg(databasePointer) {
let errorMessage = String(cString: errorPointer)
return errorMessage
} else {
return "No error message returned by data source"
}
}

private init(_ dbPointer: OpaquePointer?) {
self.databasePointer = dbPointer
}

static func openDatabase() throws -> DataSource {
var db: OpaquePointer?
guard let dbFileUrl = Bundle.main.url(
forResource: databaseFile,
withExtension: "sqlite"
) else {
throw DataSourceError.Open(message: "Cannot find database file")
}
if (sqlite3_open(dbFileUrl.path, &db) == SQLITE_OK) {
return DataSource(db)
} else {
defer {
if db != nil {
sqlite3_close(db)
}
}
if let errorPointer = sqlite3_errmsg(db) {
let message = String(cString: errorPointer)
throw DataSourceError.Open(message: message)
} else {
throw DataSourceError.Open(message: "Failed without an error message")
}
}
}

deinit {
sqlite3_close(databasePointer)
}
}

extension DataSource {

private func prepareStatement(statement: String) throws -> OpaquePointer? {
var stmt: OpaquePointer?
guard sqlite3_prepare_v2(databasePointer, statement, -1, &stmt, nil) == SQLITE_OK else {
throw DataSourceError.Prepare(message: errorMessage)
}
return stmt
}

func selectTrueWhereTerm(term: String) throws -> Bool {
let querySql = "SELECT EXISTS (SELECT 1 FROM MEDICAL_TERMS WHERE TERM = ?);"
guard let queryStmt = try prepareStatement(statement: querySql) else {
return false
}
defer {
sqlite3_finalize(queryStmt)
}
guard sqlite3_bind_text(queryStmt, 1, term, -1, nil) == SQLITE_OK else {
throw DataSourceError.Bind(message: "Failed to bind String to statement")
}
guard sqlite3_step(queryStmt) == SQLITE_ROW else {
throw DataSourceError.Step(message: "Failed to run query and return row")
}
return sqlite3_column_int(queryStmt, 0) == 1
}

func selectTrueWhereStem(stem: String) throws -> Bool {
let querySql = "SELECT EXISTS (SELECT 1 FROM MEDICAL_TERM_STEMS WHERE STEM = ?);"
guard let queryStmt = try prepareStatement(statement: querySql) else {
return false
}
defer {
sqlite3_finalize(queryStmt)
}
guard sqlite3_bind_text(queryStmt, 1, stem, -1, nil) == SQLITE_OK else {
throw DataSourceError.Bind(message: "Failed to bind String to statement")
}
guard sqlite3_step(queryStmt) == SQLITE_ROW else {
throw DataSourceError.Step(message: "Failed to run query and return row")
}
return sqlite3_column_int(queryStmt, 0) == 1
}

}

enum DataSourceError: Error {
case Open(message: String)
case Prepare(message: String)
case Step(message: String)
case Bind(message: String)
}
98 changes: 98 additions & 0 deletions TakeoutFilter/Main/Catalogue.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
//
// Catalogue.swift
// TakeoutFilter
//
// Created by David Guzman on 27/06/2022.
//

import Foundation

struct Catalogue {

private var catalogueUrl: URL

private var iso8601DateFormatter: ISO8601DateFormatter = ISO8601DateFormatter()

init(catalogue: URL) {
self.catalogueUrl = catalogue
// Configure formatter to not to expect time part. Expected string in YYYY-MM-DD format.
self.iso8601DateFormatter.formatOptions = .withFullDate
}

func entries() -> [Entry] {
var entries: [Entry] = []
do {
let stringContent = try String(contentsOfFile: catalogueUrl.path)
let stringLines: [String] = stringContent.components(separatedBy: "\n").filter(){$0 != ""}
for index in 1 ..< stringLines.count {
let line: [String] = stringLines[index].components(separatedBy: ",")
let intId = Int(line[0]) ?? 0
if (intId != 0) {
let datePresentation: Date? = iso8601DateFormatter.date(from: line[1])
guard let datePresentation = datePresentation else {
throw DateParsingError("Cannot parse date \(line[1])")
}
let e: Entry = Entry(id: intId, dateOfPresentation: datePresentation, namesToFilter: line[2])
entries.append(e)
}
}
return entries
} catch {
return entries
}
}

func extractIds() -> [Int] {
var ids: [Int] = []
do {
let stringContent = try String(contentsOfFile: catalogueUrl.path)
let stringLines: [String] = stringContent.components(separatedBy: "\n").filter(){$0 != ""}
for index in 1 ..< stringLines.count {
let line: [String] = stringLines[index].components(separatedBy: ",")
let intId = Int(line[0]) ?? 0
if (intId != 0) {
ids.append(intId)
}
}
return ids
} catch {
return ids
}
}

struct Entry: Equatable {

private var id: Int

private var dateOfPresentation: Date

private var namesToFilter: String

init(id: Int, dateOfPresentation: Date, namesToFilter: String) {
self.id = id
self.dateOfPresentation = dateOfPresentation
self.namesToFilter = namesToFilter
}

func getId() -> Int {
return id
}

func getNamesToFilter() -> String {
return namesToFilter
}

func getDateOfPresentation() -> Date {
return dateOfPresentation
}
}

struct DateParsingError: Error {

private let message: String

init(_ message: String) {
self.message = message
}
}
}
69 changes: 69 additions & 0 deletions TakeoutFilter/Main/CsvWriter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//
// CsvWriter.swift
// TakeoutFilter
//
// Created by David Guzman on 07/07/2022.
//

import Foundation

enum CsvError: Error {
case outputBaseDirNotFound
case outputDirNotFound
case outputFileNotFound
}

class CsvWriter {

private let aggregatesHeader: String = "Total_Number_Of_Queries,First_Query_Date"
private let aggregatesSuffix: String = "-aggregates.csv"
private let queriesHeader: String = "Date,Query"
private let queriesSuffix: String = "-queries.csv"
private let newline: String = "\n"
private let outputDir: String = "TakeoutFilter"
private var outputDirUrl: URL?
private let fileManager: FileManager = FileManager.default

private func getAggregatesFileName(_ id: Int) -> String {
return "\(id)\(aggregatesSuffix)"
}

private func getQueriesFileName(_ id: Int) -> String {
return "\(id)\(queriesSuffix)"
}

private func writeString(fileName: String, csvContent: String) throws {
guard let output = outputDirUrl else {
throw CsvError.outputDirNotFound
}
let csvUrl: URL = output.appendingPathComponent(fileName)
try csvContent.write(to: csvUrl, atomically: true, encoding: .utf8)
}

func configureOutput(_ url: URL) throws {
outputDirUrl = url.appendingPathComponent(outputDir)
guard let output = outputDirUrl else {
throw CsvError.outputBaseDirNotFound
}
if fileManager.fileExists(atPath: output.path) {
try fileManager.removeItem(at: output)
}
try fileManager.createDirectory(at: output, withIntermediateDirectories: true, attributes: nil)
}

func writeAggregates(id: Int, totalNumberQueries: Int, firstQueryDate: Date) throws {
let csvString = aggregatesHeader.appending(newline)
.appending("\(totalNumberQueries),\(firstQueryDate)")
.appending(newline)
try writeString(fileName: getAggregatesFileName(id), csvContent: csvString)
}

func writeQueries(id: Int, queries: [Query]) throws {
var csvString = queriesHeader.appending(newline)
for q in queries {
csvString = csvString.appending("\(q.date),\(q.query)")
.appending(newline)
}
try writeString(fileName: getQueriesFileName(id), csvContent: csvString)
}
}
Loading

0 comments on commit 449baff

Please sign in to comment.