diff --git a/examples/federated_learning.ipynb b/examples/federated_learning.ipynb new file mode 100644 index 0000000..f813fe1 --- /dev/null +++ b/examples/federated_learning.ipynb @@ -0,0 +1,580 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from flib.preprocess.feature_engineering import cal_features\n", + "import sys\n", + "import os\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Generate data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:__main__:Random seed: 0\n", + "INFO:__main__:Simulation name: 10K_accts\n", + "INFO:__main__:Add 14791 base transactions\n", + "INFO:__main__:Generated 10000 accounts.\n", + "INFO:__main__:Generated 17170 normal models.\n", + "INFO:__main__:Normal model counts {'single': 4000, 'fan_out': 551, 'fan_in': 619, 'forward': 4000, 'mutual': 4000, 'periodical': 4000}\n", + "INFO:__main__:Exported 10000 accounts to tmp/10K_accts/accounts.csv\n", + "INFO:__main__:Exported 15090 transactions to tmp/10K_accts/transactions.csv\n", + "INFO:__main__:Output alert member list to: tmp/10K_accts/alert_members.csv\n", + "INFO:__main__:Exported 276 members for 35 AML typologies to tmp/10K_accts/alert_members.csv\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[\u001b[1;34mINFO\u001b[m] Scanning for projects...\n", + "[\u001b[1;34mINFO\u001b[m] \n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m---------------------------< \u001b[0;36mamlsim:amlsim\u001b[0;1m >----------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1mBuilding amlsim 1.0.0\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m--------------------------------[ jar ]---------------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m--- \u001b[0;32mexec-maven-plugin:3.1.0:java\u001b[m \u001b[1m(default-cli)\u001b[m @ \u001b[36mamlsim\u001b[0;1m ---\u001b[m\n", + "General transaction interval: 7\n", + "Base transaction amount: Normal = 1.000000, Suspicious= 150000.000000\n", + "Random seed: 0\n", + "Simulation name: 10K_accts\n", + "Working directory: tmp/10K_accts/\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "INFO: Simulator Name: 10K_accts\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "INFO: Simulator Name: 10K_accts\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "WARNING: Output log directory already exists: outputs/10K_accts/\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim initSimulatorName\n", + "WARNING: Output log directory already exists: outputs/10K_accts/\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim executeSimulation\n", + "INFO: Transaction log file: outputs/10K_accts/tx_log.csv\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim executeSimulation\n", + "INFO: Transaction log file: outputs/10K_accts/tx_log.csv\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Account CSV header: ACCOUNT_ID,CUSTOMER_ID,INIT_BALANCE,COUNTRY,ACCOUNT_TYPE,IS_SAR,BANK_ID\n", + "Sep 03, 2024 3:03:28 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Account CSV header: ACCOUNT_ID,CUSTOMER_ID,INIT_BALANCE,COUNTRY,ACCOUNT_TYPE,IS_SAR,BANK_ID\n", + "Sep 03, 2024 3:03:29 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Number of total accounts: 10000\n", + "Sep 03, 2024 3:03:29 PM amlsim.AMLSim loadAccountFile\n", + "INFO: Number of total accounts: 10000\n", + "Sep 03, 2024 3:03:30 PM amlsim.AMLSim loadAlertMemberFile\n", + "INFO: Load alert member list from:tmp/10K_accts/alert_members.csv\n", + "Sep 03, 2024 3:03:30 PM amlsim.AMLSim loadAlertMemberFile\n", + "INFO: Load alert member list from:tmp/10K_accts/alert_members.csv\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting AMLSim Running for 30 steps. Current loop:0\n", + "****************************** - Finished running 30 steps \n", + "\n", + "It took: 4.174 seconds to execute the simulation\n", + "\n", + "Simulation name: 10K_accts\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m------------------------------------------------------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1;32mBUILD SUCCESS\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m------------------------------------------------------------------------\u001b[m\n", + "[\u001b[1;34mINFO\u001b[m] Total time: 6.687 s\n", + "[\u001b[1;34mINFO\u001b[m] Finished at: 2024-09-03T15:03:34Z\n", + "[\u001b[1;34mINFO\u001b[m] \u001b[1m------------------------------------------------------------------------\u001b[m\n", + "txs log: outputs/10K_accts/tx_log.csv\n" + ] + } + ], + "source": [ + "pwd = '/home/edvin/Desktop/flib/'\n", + "config_path = pwd + 'flib/AMLsim/paramFiles/10K_accts/conf.json'\n", + "\n", + "os.system(f'cd ../flib/AMLsim && python3 scripts/transaction_graph_generator.py \"{config_path}\"')\n", + "os.system(f'cd ../flib/AMLsim && mvn exec:java -Dexec.mainClass=amlsim.AMLSim -Dexec.args=\"{config_path}\"')\n", + "\n", + "with open(config_path, 'r') as f:\n", + " config = json.load(f)\n", + "tx_log_path = os.path.join(config['output']['directory'], config['general']['simulation_name'], config['output']['transaction_log'])\n", + "\n", + "print(f'txs log: {tx_log_path}')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "dfs = cal_features('../flib/AMLsim/' + tx_log_path, windows=(3, 10), overlap=0.9, include_edges=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
accountbanksums_spending_0_9means_spending_0_9medians_spending_0_9stds_spending_0_9maxs_spending_0_9mins_spending_0_9counts_spending_0_9sums_spending_9_18...stds_18_27maxs_18_27mins_18_27counts_in_18_27counts_out_18_27counts_unique_in_18_27counts_unique_out_18_27counts_days_in_bankcounts_phone_changesis_sar
03238bank_d5558.55555.855578.900101.646646674.31338.9710.04719.42...463.975713293.05-637.963.01.01.01.02700
13927bank_d5994.56599.456609.830255.681170992.79175.0610.04099.59...952.359697737.48-609.361.01.01.01.02800
23080bank_d5029.89502.989530.265136.664153673.19284.4210.03543.46...0.000000280.94280.941.00.01.00.01900
33885bank_d5146.61514.661469.990150.169421781.41329.0910.03704.01...443.822642973.90346.242.00.01.00.02500
43845bank_d5205.31520.531534.405204.613208773.0664.8210.05170.27...137.023152614.77420.992.00.01.00.02700
..................................................................
9873823bank_d4950.83495.083452.860197.249510836.60174.1610.04908.11...0.000000-715.34-715.340.01.00.01.02000
9883186bank_d5017.46501.746535.755201.431903760.61143.2010.04501.18...0.000000-762.74-762.740.01.00.01.02100
9893672bank_d4158.74415.874472.215220.598448726.4448.2310.05418.51...138.988706-496.08-768.610.03.00.01.02600
9903209bank_d5211.99521.199497.965242.298954918.87151.2110.04769.16...0.000000-854.94-854.940.01.00.01.02500
9913295bank_d4898.47489.847498.865116.582627640.99275.0010.05152.57...0.000000-465.12-465.120.01.00.01.02700
\n", + "

992 rows × 62 columns

\n", + "
" + ], + "text/plain": [ + " account bank sums_spending_0_9 means_spending_0_9 \\\n", + "0 3238 bank_d 5558.55 555.855 \n", + "1 3927 bank_d 5994.56 599.456 \n", + "2 3080 bank_d 5029.89 502.989 \n", + "3 3885 bank_d 5146.61 514.661 \n", + "4 3845 bank_d 5205.31 520.531 \n", + ".. ... ... ... ... \n", + "987 3823 bank_d 4950.83 495.083 \n", + "988 3186 bank_d 5017.46 501.746 \n", + "989 3672 bank_d 4158.74 415.874 \n", + "990 3209 bank_d 5211.99 521.199 \n", + "991 3295 bank_d 4898.47 489.847 \n", + "\n", + " medians_spending_0_9 stds_spending_0_9 maxs_spending_0_9 \\\n", + "0 578.900 101.646646 674.31 \n", + "1 609.830 255.681170 992.79 \n", + "2 530.265 136.664153 673.19 \n", + "3 469.990 150.169421 781.41 \n", + "4 534.405 204.613208 773.06 \n", + ".. ... ... ... \n", + "987 452.860 197.249510 836.60 \n", + "988 535.755 201.431903 760.61 \n", + "989 472.215 220.598448 726.44 \n", + "990 497.965 242.298954 918.87 \n", + "991 498.865 116.582627 640.99 \n", + "\n", + " mins_spending_0_9 counts_spending_0_9 sums_spending_9_18 ... \\\n", + "0 338.97 10.0 4719.42 ... \n", + "1 175.06 10.0 4099.59 ... \n", + "2 284.42 10.0 3543.46 ... \n", + "3 329.09 10.0 3704.01 ... \n", + "4 64.82 10.0 5170.27 ... \n", + ".. ... ... ... ... \n", + "987 174.16 10.0 4908.11 ... \n", + "988 143.20 10.0 4501.18 ... \n", + "989 48.23 10.0 5418.51 ... \n", + "990 151.21 10.0 4769.16 ... \n", + "991 275.00 10.0 5152.57 ... \n", + "\n", + " stds_18_27 maxs_18_27 mins_18_27 counts_in_18_27 counts_out_18_27 \\\n", + "0 463.975713 293.05 -637.96 3.0 1.0 \n", + "1 952.359697 737.48 -609.36 1.0 1.0 \n", + "2 0.000000 280.94 280.94 1.0 0.0 \n", + "3 443.822642 973.90 346.24 2.0 0.0 \n", + "4 137.023152 614.77 420.99 2.0 0.0 \n", + ".. ... ... ... ... ... \n", + "987 0.000000 -715.34 -715.34 0.0 1.0 \n", + "988 0.000000 -762.74 -762.74 0.0 1.0 \n", + "989 138.988706 -496.08 -768.61 0.0 3.0 \n", + "990 0.000000 -854.94 -854.94 0.0 1.0 \n", + "991 0.000000 -465.12 -465.12 0.0 1.0 \n", + "\n", + " counts_unique_in_18_27 counts_unique_out_18_27 counts_days_in_bank \\\n", + "0 1.0 1.0 27 \n", + "1 1.0 1.0 28 \n", + "2 1.0 0.0 19 \n", + "3 1.0 0.0 25 \n", + "4 1.0 0.0 27 \n", + ".. ... ... ... \n", + "987 0.0 1.0 20 \n", + "988 0.0 1.0 21 \n", + "989 0.0 1.0 26 \n", + "990 0.0 1.0 25 \n", + "991 0.0 1.0 27 \n", + "\n", + " counts_phone_changes is_sar \n", + "0 0 0 \n", + "1 0 0 \n", + "2 0 0 \n", + "3 0 0 \n", + "4 0 0 \n", + ".. ... ... \n", + "987 0 0 \n", + "988 0 0 \n", + "989 0 0 \n", + "990 0 0 \n", + "991 0 0 \n", + "\n", + "[992 rows x 62 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = dfs[1]\n", + "display(df[0][0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/flib/AMLsim/paramFiles/10K_accts/accounts.csv b/flib/AMLsim/paramFiles/10K_accts/accounts.csv index 1af549e..e4ccbe6 100644 --- a/flib/AMLsim/paramFiles/10K_accts/accounts.csv +++ b/flib/AMLsim/paramFiles/10K_accts/accounts.csv @@ -1,2 +1,11 @@ count,min_balance,max_balance,country,business_type,bank_id -10000,10000,100000,SWE,I,bank \ No newline at end of file +1000,10000,100000,SWE,I,bank_a +1000,10000,100000,SWE,I,bank_b +1000,10000,100000,SWE,I,bank_c +1000,10000,100000,SWE,I,bank_d +1000,10000,100000,SWE,I,bank_e +1000,10000,100000,SWE,I,bank_f +1000,10000,100000,SWE,I,bank_g +1000,10000,100000,SWE,I,bank_h +1000,10000,100000,SWE,I,bank_i +1000,10000,100000,SWE,I,bank_j \ No newline at end of file diff --git a/flib/AMLsim/paramFiles/10K_accts/alertPatterns.csv b/flib/AMLsim/paramFiles/10K_accts/alertPatterns.csv index 864f37d..fc75df2 100755 --- a/flib/AMLsim/paramFiles/10K_accts/alertPatterns.csv +++ b/flib/AMLsim/paramFiles/10K_accts/alertPatterns.csv @@ -1,8 +1,8 @@ count,type,schedule_id,min_accounts,max_accounts,min_amount,max_amount,min_period,max_period,bank_id,is_sar,source_type -1,fan_out,2,5,5,100,1000,2,28,bank,True,TRANSFER -1,fan_in,2,5,5,100,1000,2,28,bank,True,TRANSFER -1,cycle,2,5,5,100,1000,2,28,bank,True,TRANSFER -1,bipartite,2,5,5,100,1000,2,28,bank,True,TRANSFER -1,stack,2,5,5,100,1000,2,28,bank,True,TRANSFER -1,gather_scatter,2,6,6,100,1000,2,28,bank,True,TRANSFER -1,scatter_gather,2,6,6,100,1000,2,28,bank,True,TRANSFER +5,fan_out,2,5,10,100,1000,2,28,,True,CASH +5,fan_in,2,5,10,100,1000,2,28,,True,CASH +5,cycle,2,5,10,100,1000,2,28,,True,CASH +5,bipartite,2,5,10,100,1000,2,28,,True,CASH +5,stack,2,5,10,100,1000,2,28,,True,CASH +5,gather_scatter,2,6,12,100,1000,2,28,,True,CASH +5,scatter_gather,2,6,12,100,1000,2,28,,True,CASH diff --git a/flib/AMLsim/paramFiles/10K_accts/conf.json b/flib/AMLsim/paramFiles/10K_accts/conf.json index bfa89d4..8905d4e 100644 --- a/flib/AMLsim/paramFiles/10K_accts/conf.json +++ b/flib/AMLsim/paramFiles/10K_accts/conf.json @@ -9,7 +9,7 @@ "max_amount": 150000, "mean_amount": 637, "std_amount": 300, - "mean_amount_sar": 637, + "mean_amount_sar": 737, "std_amount_sar": 300, "prob_income": 0.0, "mean_income": 0.0, @@ -18,21 +18,21 @@ "mean_income_sar": 0.0, "std_income_sar": 0.0, "mean_outcome": 500.0, - "std_outcome": 100.0, - "mean_outcome_sar": 500.0, - "std_outcome_sar": 100.0, + "std_outcome": 200.0, + "mean_outcome_sar": 400.0, + "std_outcome_sar": 200.0, "prob_spend_cash": 0.0, "n_steps_balance_history": 7, "mean_phone_change_frequency": 1460, "std_phone_change_frequency": 365, - "mean_phone_change_frequency_sar": 1460, + "mean_phone_change_frequency_sar": 1260, "std_phone_change_frequency_sar": 365, "mean_bank_change_frequency": 1460, "std_bank_change_frequency": 365, - "mean_bank_change_frequency_sar": 1460, + "mean_bank_change_frequency_sar": 1260, "std_bank_change_frequency_sar": 365, "margin_ratio": 0.1, - "prob_participate_in_multiple_sars": 0.0 + "prob_participate_in_multiple_sars": 0.2 }, "input": { "directory": "paramFiles/10K_accts",