diff --git a/examples/federated_learning.ipynb b/examples/federated_learning.ipynb new file mode 100644 index 0000000..f813fe1 --- /dev/null +++ b/examples/federated_learning.ipynb @@ -0,0 +1,580 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from flib.preprocess.feature_engineering import cal_features\n", + "import sys\n", + "import os\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generate data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:__main__:Random seed: 0\n", + "INFO:__main__:Simulation name: 10K_accts\n", + "INFO:__main__:Add 14791 base transactions\n", + "INFO:__main__:Generated 10000 accounts.\n", + "INFO:__main__:Generated 17170 normal models.\n", + "INFO:__main__:Normal model counts {'single': 4000, 'fan_out': 551, 'fan_in': 619, 'forward': 4000, 'mutual': 4000, 'periodical': 4000}\n", + "INFO:__main__:Exported 10000 accounts to tmp/10K_accts/accounts.csv\n", + "INFO:__main__:Exported 15090 transactions to tmp/10K_accts/transactions.csv\n", + "INFO:__main__:Output alert member list to: tmp/10K_accts/alert_members.csv\n", + "INFO:__main__:Exported 276 members for 35 AML typologies to tmp/10K_accts/alert_members.csv\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\u001b[1;34mINFO\u001b[m] Scanning for projects...\n", + "[\u001b[1;34mINFO\u001b[m] \n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m---------------------------< \u001b[0;36mamlsim:amlsim\u001b[0;1m >----------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1mBuilding amlsim 1.0.0\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m--------------------------------[ jar ]---------------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m--- \u001b[0;32mexec-maven-plugin:3.1.0:java\u001b[m \u001b[1m(default-cli)\u001b[m @ \u001b[36mamlsim\u001b[0;1m ---\u001b[m\n", + "General transaction interval: 7\n", + "Base transaction amount: Normal = 1.000000, Suspicious= 150000.000000\n", + "Random seed: 0\n", + "Simulation name: 10K_accts\n", + "Working directory: tmp/10K_accts/\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "INFO: Simulator Name: 10K_accts\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "INFO: Simulator Name: 10K_accts\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "WARNING: Output log directory already exists: outputs/10K_accts/\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "WARNING: Output log directory already exists: outputs/10K_accts/\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim executeSimulation\n", + "INFO: Transaction log file: outputs/10K_accts/tx_log.csv\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim executeSimulation\n", + "INFO: Transaction log file: outputs/10K_accts/tx_log.csv\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Account CSV header: ACCOUNT_ID,CUSTOMER_ID,INIT_BALANCE,COUNTRY,ACCOUNT_TYPE,IS_SAR,BANK_ID\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Account CSV header: ACCOUNT_ID,CUSTOMER_ID,INIT_BALANCE,COUNTRY,ACCOUNT_TYPE,IS_SAR,BANK_ID\n", + "Sep 03, 2024 3:03:29 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Number of total accounts: 10000\n", + "Sep 03, 2024 3:03:29 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Number of total accounts: 10000\n", + "Sep 03, 2024 3:03:30 PM amlsim.AMLSim loadAlertMemberFile\n", + "INFO: Load alert member list from:tmp/10K_accts/alert_members.csv\n", + "Sep 03, 2024 3:03:30 PM amlsim.AMLSim loadAlertMemberFile\n", + "INFO: Load alert member list from:tmp/10K_accts/alert_members.csv\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting AMLSim Running for 30 steps. Current loop:0\n", + "****************************** - Finished running 30 steps \n", + "\n", + "It took: 4.174 seconds to execute the simulation\n", + "\n", + "Simulation name: 10K_accts\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m------------------------------------------------------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1;32mBUILD SUCCESS\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m------------------------------------------------------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] Total time: 6.687 s\n", + "[\u001b[1;34mINFO\u001b[m] Finished at: 2024-09-03T15:03:34Z\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m------------------------------------------------------------------------\u001b[m\n", + "txs log: outputs/10K_accts/tx_log.csv\n" + ] + } + ], + "source": [ + "pwd = '/home/edvin/Desktop/flib/'\n", + "config_path = pwd + 'flib/AMLsim/paramFiles/10K_accts/conf.json'\n", + "\n", + "os.system(f'cd ../flib/AMLsim && python3 scripts/transaction_graph_generator.py \"{config_path}\"')\n", + "os.system(f'cd ../flib/AMLsim && mvn exec:java -Dexec.mainClass=amlsim.AMLSim -Dexec.args=\"{config_path}\"')\n", + "\n", + "with open(config_path, 'r') as f:\n", + " config = json.load(f)\n", + "tx_log_path = os.path.join(config['output']['directory'], config['general']['simulation_name'], config['output']['transaction_log'])\n", + "\n", + "print(f'txs log: {tx_log_path}')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = cal_features('../flib/AMLsim/' + tx_log_path, windows=(3, 10), overlap=0.9, include_edges=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | account | \n", + "bank | \n", + "sums_spending_0_9 | \n", + "means_spending_0_9 | \n", + "medians_spending_0_9 | \n", + "stds_spending_0_9 | \n", + "maxs_spending_0_9 | \n", + "mins_spending_0_9 | \n", + "counts_spending_0_9 | \n", + "sums_spending_9_18 | \n", + "... | \n", + "stds_18_27 | \n", + "maxs_18_27 | \n", + "mins_18_27 | \n", + "counts_in_18_27 | \n", + "counts_out_18_27 | \n", + "counts_unique_in_18_27 | \n", + "counts_unique_out_18_27 | \n", + "counts_days_in_bank | \n", + "counts_phone_changes | \n", + "is_sar | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "3238 | \n", + "bank_d | \n", + "5558.55 | \n", + "555.855 | \n", + "578.900 | \n", + "101.646646 | \n", + "674.31 | \n", + "338.97 | \n", + "10.0 | \n", + "4719.42 | \n", + "... | \n", + "463.975713 | \n", + "293.05 | \n", + "-637.96 | \n", + "3.0 | \n", + "1.0 | \n", + "1.0 | \n", + "1.0 | \n", + "27 | \n", + "0 | \n", + "0 | \n", + "
1 | \n", + "3927 | \n", + "bank_d | \n", + "5994.56 | \n", + "599.456 | \n", + "609.830 | \n", + "255.681170 | \n", + "992.79 | \n", + "175.06 | \n", + "10.0 | \n", + "4099.59 | \n", + "... | \n", + "952.359697 | \n", + "737.48 | \n", + "-609.36 | \n", + "1.0 | \n", + "1.0 | \n", + "1.0 | \n", + "1.0 | \n", + "28 | \n", + "0 | \n", + "0 | \n", + "
2 | \n", + "3080 | \n", + "bank_d | \n", + "5029.89 | \n", + "502.989 | \n", + "530.265 | \n", + "136.664153 | \n", + "673.19 | \n", + "284.42 | \n", + "10.0 | \n", + "3543.46 | \n", + "... | \n", + "0.000000 | \n", + "280.94 | \n", + "280.94 | \n", + "1.0 | \n", + "0.0 | \n", + "1.0 | \n", + "0.0 | \n", + "19 | \n", + "0 | \n", + "0 | \n", + "
3 | \n", + "3885 | \n", + "bank_d | \n", + "5146.61 | \n", + "514.661 | \n", + "469.990 | \n", + "150.169421 | \n", + "781.41 | \n", + "329.09 | \n", + "10.0 | \n", + "3704.01 | \n", + "... | \n", + "443.822642 | \n", + "973.90 | \n", + "346.24 | \n", + "2.0 | \n", + "0.0 | \n", + "1.0 | \n", + "0.0 | \n", + "25 | \n", + "0 | \n", + "0 | \n", + "
4 | \n", + "3845 | \n", + "bank_d | \n", + "5205.31 | \n", + "520.531 | \n", + "534.405 | \n", + "204.613208 | \n", + "773.06 | \n", + "64.82 | \n", + "10.0 | \n", + "5170.27 | \n", + "... | \n", + "137.023152 | \n", + "614.77 | \n", + "420.99 | \n", + "2.0 | \n", + "0.0 | \n", + "1.0 | \n", + "0.0 | \n", + "27 | \n", + "0 | \n", + "0 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
987 | \n", + "3823 | \n", + "bank_d | \n", + "4950.83 | \n", + "495.083 | \n", + "452.860 | \n", + "197.249510 | \n", + "836.60 | \n", + "174.16 | \n", + "10.0 | \n", + "4908.11 | \n", + "... | \n", + "0.000000 | \n", + "-715.34 | \n", + "-715.34 | \n", + "0.0 | \n", + "1.0 | \n", + "0.0 | \n", + "1.0 | \n", + "20 | \n", + "0 | \n", + "0 | \n", + "
988 | \n", + "3186 | \n", + "bank_d | \n", + "5017.46 | \n", + "501.746 | \n", + "535.755 | \n", + "201.431903 | \n", + "760.61 | \n", + "143.20 | \n", + "10.0 | \n", + "4501.18 | \n", + "... | \n", + "0.000000 | \n", + "-762.74 | \n", + "-762.74 | \n", + "0.0 | \n", + "1.0 | \n", + "0.0 | \n", + "1.0 | \n", + "21 | \n", + "0 | \n", + "0 | \n", + "
989 | \n", + "3672 | \n", + "bank_d | \n", + "4158.74 | \n", + "415.874 | \n", + "472.215 | \n", + "220.598448 | \n", + "726.44 | \n", + "48.23 | \n", + "10.0 | \n", + "5418.51 | \n", + "... | \n", + "138.988706 | \n", + "-496.08 | \n", + "-768.61 | \n", + "0.0 | \n", + "3.0 | \n", + "0.0 | \n", + "1.0 | \n", + "26 | \n", + "0 | \n", + "0 | \n", + "
990 | \n", + "3209 | \n", + "bank_d | \n", + "5211.99 | \n", + "521.199 | \n", + "497.965 | \n", + "242.298954 | \n", + "918.87 | \n", + "151.21 | \n", + "10.0 | \n", + "4769.16 | \n", + "... | \n", + "0.000000 | \n", + "-854.94 | \n", + "-854.94 | \n", + "0.0 | \n", + "1.0 | \n", + "0.0 | \n", + "1.0 | \n", + "25 | \n", + "0 | \n", + "0 | \n", + "
991 | \n", + "3295 | \n", + "bank_d | \n", + "4898.47 | \n", + "489.847 | \n", + "498.865 | \n", + "116.582627 | \n", + "640.99 | \n", + "275.00 | \n", + "10.0 | \n", + "5152.57 | \n", + "... | \n", + "0.000000 | \n", + "-465.12 | \n", + "-465.12 | \n", + "0.0 | \n", + "1.0 | \n", + "0.0 | \n", + "1.0 | \n", + "27 | \n", + "0 | \n", + "0 | \n", + "
992 rows × 62 columns
\n", + "