Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
tomikat committed Dec 17, 2024
1 parent 79000ec commit 4108435
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 17 deletions.
59 changes: 59 additions & 0 deletions dev-resources/sql/anonymizer-application-queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,62 @@ WHERE group_answer_values.application_id = :application_id AND

-- name: sql-application-secret-ids
SELECT id FROM application_secrets;

-- name: sql-update-multi-by-key!
UPDATE multi_answer_values SET value = :val WHERE key = :key;

-- name: sql-anonymize-long-textareas-group!
WITH anonymisoitavat AS (
SELECT gav.application_id,
gav.key
FROM group_answer_values gav
JOIN group_answers ga ON gav.application_id = ga.application_id AND gav.key = ga.key
WHERE field_type IN ('textArea', 'textField')
AND gav.value !~ '(^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$)|(^[0-9]{1,2}[.][0-9]{1,2}[.][0-9]{4}$)'
AND length(gav.value) >= 6
)
UPDATE group_answer_values gav
SET value = substring('Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris ' ||
'nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, ' ||
'sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut ' ||
'enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat ' ||
'nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do ' ||
'eiusmod tempor incididunt ut labore et dolore magna aliqua.' FROM 0 FOR length(gav.value)+1)
FROM anonymisoitavat a
WHERE gav.application_id = a.application_id
AND gav.key = a.key;

-- name: sql-anonymize-long-textareas-multi!
WITH anonymisoitavat AS (
SELECT mav.application_id,
mav.key
FROM multi_answer_values mav
JOIN multi_answers ma ON mav.application_id = ma.application_id AND mav.key = ma.key
WHERE field_type IN ('textArea', 'textField')
AND ma.key NOT IN ('guardian-phone', 'guardian-name', 'guardian-email', 'guardian-phone-secondary', 'guardian-name-secondary', 'guardian-email-secondary')
AND mav.value !~ '(^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$)|(^[0-9]{1,2}[.][0-9]{1,2}[.][0-9]{4}$)'
AND length(mav.value) >= 6
)
UPDATE multi_answer_values mav
SET value = substring('Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris ' ||
'nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, ' ||
'sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut ' ||
'enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat ' ||
'nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do ' ||
'eiusmod tempor incididunt ut labore et dolore magna aliqua.' FROM 0 FOR length(mav.value)+1)
FROM anonymisoitavat a
WHERE mav.application_id = a.application_id
AND mav.key = a.key;

-- name: sql-anonymize-long-textareas!
UPDATE answers ans
SET value = substring('Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris ' ||
'nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, ' ||
'sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut ' ||
'enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat ' ||
'nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do ' ||
'eiusmod tempor incididunt ut labore et dolore magna aliqua.' FROM 0 FOR length(ans.value)+1)
WHERE field_type IN ('textArea', 'textField')
AND ans.key NOT IN ('gender', 'first-name', 'birth-date', 'home-town', 'ssn', 'email', 'preferred-name', 'last-name', 'address', 'phone', 'postal-office', 'postal-code')
AND ans.value !~ '(^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$)|(^[0-9]{1,2}[.][0-9]{1,2}[.][0-9]{4}$)'
AND length(ans.value) >= 6;
42 changes: 40 additions & 2 deletions dev/clj/ataru/anonymizer/anonymizer_application_store.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
[ataru.util.random :as crypto]
[cheshire.core :as json]
[clojure.java.jdbc :as jdbc]
[taoensso.timbre :as log]
[yesql.core :as sql])
(:import org.postgresql.util.PGobject))

Expand All @@ -14,6 +15,41 @@
(defn get-application [id]
(first (db/exec :db sql-get-application {:id id})))

(defn anonymize-guardian! []
(log/info "Anonymize guardians")
(jdbc/with-db-transaction [connection {:datasource (db/get-datasource :db)}]
(sql-update-multi-by-key! {:key "guardian-name" :val "Testi Huoltaja"}
{:connection connection})
(sql-update-multi-by-key! {:key "guardian-name-secondary" :val "Testi Huoltaja"}
{:connection connection})
(sql-update-multi-by-key! {:key "guardian-phone" :val "0501234567"}
{:connection connection})
(sql-update-multi-by-key! {:key "guardian-phone-secondary" :val "0501234567"}
{:connection connection})
(sql-update-multi-by-key! {:key "guardian-email" :val "testi1.huoltaja@testiopintopolku.fi"}
{:connection connection})
(sql-update-multi-by-key! {:key "guardian-email-secondary" :val "testi2.huoltaja@testiopintopolku.fi"}
{:connection connection}))
(log/info "Done anonymizing guardians"))

(defn anonymize-long-textareas-group! []
(log/info "Anonymize long textareas in group answers")
(jdbc/with-db-transaction [connection {:datasource (db/get-datasource :db)}]
(sql-anonymize-long-textareas-group! {} {:connection connection}))
(log/info "Done anonymizing long textareas in group answers"))

(defn anonymize-long-textareas-multi! []
(log/info "Anonymize long textareas in multi answers")
(jdbc/with-db-transaction [connection {:datasource (db/get-datasource :db)}]
(sql-anonymize-long-textareas-multi! {} {:connection connection}))
(log/info "Done anonymizing long textareas in multi answers"))

(defn anonymize-long-textareas! []
(log/info "Anonymize long textareas in answers")
(jdbc/with-db-transaction [connection {:datasource (db/get-datasource :db)}]
(sql-anonymize-long-textareas! {} {:connection connection}))
(log/info "Done anonymizing long textareas in answers"))

(defn update-application [application]
(let [answers (:answers (:content application))
update-answers-args {:application_id (:id application)
Expand All @@ -26,7 +62,8 @@
(sql-update-application-multi-answer-values! update-answers-args {:connection connection})
(sql-update-application-group-answer-values! update-answers-args {:connection connection}))))

(defn regenerate-application-secrets []
(defn regenerate-application-secrets! []
(log/info "Regenerate application secrets")
(jdbc/with-db-transaction [connection {:datasource (db/get-datasource :db)}]
(doseq [id-chunk (->> (sql-application-secret-ids {} {:connection connection})
(map :id)
Expand All @@ -36,4 +73,5 @@
SET secret = ?
WHERE id = ?"]
(map vector (repeatedly (fn [] (crypto/url-part 34))) id-chunk))
{:multi? true}))))
{:multi? true})))
(log/info "Done regenerating application secrets"))
32 changes: 18 additions & 14 deletions dev/clj/ataru/anonymizer/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -78,18 +78,22 @@

(defn anonymize-data [& args]
(assert (not (clojure.string/blank? (second args))))
(let [executor-service (Executors/newFixedThreadPool
(.availableProcessors (Runtime/getRuntime)))
fake-persons (file->fake-persons (first args))
(let [fake-persons (file->fake-persons (first args))
attachment-key (second args)]
(doseq [id (application-store/get-all-application-ids)]
(.execute
executor-service
(fn []
(let [application (application-store/get-application id)]
(if-let [fake-person (first (get fake-persons (:person_oid application)))]
(do (application-store/update-application (anonymize fake-person attachment-key application))
(log/info "Anonymized application" (:id application)))
(log/info "Did not anonymize application" (:id application)))))))
(.shutdown executor-service)
(application-store/regenerate-application-secrets)))
(log/info "Anonymise applications")
(dorun
(pmap (fn [id]
(let [application (application-store/get-application id)]
(if-let [fake-person (first (get fake-persons (:person_oid application)))]
(do (application-store/update-application (anonymize fake-person attachment-key application))
(log/info "Anonymized application" (:id application)))
(log/info "Did not anonymize application" (:id application)))))
(application-store/get-all-application-ids)))
(log/info "Done anonymising applications")
(application-store/anonymize-guardian!)
(dorun (pcalls application-store/anonymize-long-textareas-group!
application-store/anonymize-long-textareas-multi!
application-store/anonymize-long-textareas!))
(application-store/regenerate-application-secrets!))
(log/info "Shutting down")
(shutdown-agents))
2 changes: 1 addition & 1 deletion project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
:test-paths ["spec"]
:resource-paths ["src/sql" "resources"]
:uberjar-name "ataru.jar"
:jvm-opts ^:replace ["-Xmx2g"]
:jvm-opts ^:replace ["-Xmx10g"]

:plugins [[lein-cljsbuild "1.1.8"]
[lein-doo "0.1.11"]
Expand Down

0 comments on commit 4108435

Please sign in to comment.