diff --git a/lwr_notebook.ipynb b/MFG-Pure-PIDL-LWR.ipynb similarity index 100% rename from lwr_notebook.ipynb rename to MFG-Pure-PIDL-LWR.ipynb diff --git a/non-sep_notebook.ipynb b/MFG-Pure-PIDL-Non-Separable.ipynb similarity index 100% rename from non-sep_notebook.ipynb rename to MFG-Pure-PIDL-Non-Separable.ipynb diff --git a/sep_notebook.ipynb b/MFG-Pure-PIDL-Separable.ipynb similarity index 100% rename from sep_notebook.ipynb rename to MFG-Pure-PIDL-Separable.ipynb diff --git a/MFG_VI.py b/MFG-RL-PIDL.py similarity index 83% rename from MFG_VI.py rename to MFG-RL-PIDL.py index 5bc94e3..db92c25 100644 --- a/MFG_VI.py +++ b/MFG-RL-PIDL.py @@ -10,5 +10,4 @@ option = options[2] d = np.loadtxt(f"data/rho-{option}.txt")[:, 0].flatten('F') train_ddpg(option, n_cell, T_terminal, d, fake_critic=True, pidl=True, surf_plot=True, smooth_plot=False, - diff_plot=True) - # plot_diff("./diff/", smooth=False) \ No newline at end of file + diff_plot=True) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..18daaa1 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +# MFG-RL-PIDL + +This is the source code for paper - A Hybrid Framework of Reinforcement Learning and Physics-Informed Deep Learning for Spatiotemporal Mean Field Games. + +## File Structure + +- `data` folder includes the numerical results of MFGs. +- `MFG-RL-PIDL.py` is the runner of Alg. 1 MFG RL-PIDL in our paper. +- `value_iteration_DDPG.py` is the training function of Alg. 1 MFG RL-PIDL. +- `MFG-Pure-PIDL-*.ipynb` are the source codes of the Alg. 2 MFG-Pure-PIDL. +- `model.py` includes the PyTorch network models of $\rho$ -Net, V-Net and u-net. +- `uitls.py` are the implementations of auxiliary function. \ No newline at end of file diff --git a/utils.py b/utils.py index e3a5922..a8e15b2 100644 --- a/utils.py +++ b/utils.py @@ -237,8 +237,8 @@ def plot_diff(fig_path=None, smooth=False): u_diff_plot = u_diff_hist rho_diff_plot = rho_diff_hist - plt.plot(u_diff_plot, lw=3, label=r"$|u^{(k)} - u^{(k-1)}|$", c='steelblue', ls='--') - plt.plot(rho_diff_plot, lw=3, label=r"$|\rho^{(k)} - \rho^{(k-1)}|$", c='indianred', alpha=.8) + plt.plot(u_diff_plot, lw=3, label=r"$|u^{(i)} - u^{(i-1)}|$", c='steelblue', ls='--') + plt.plot(rho_diff_plot, lw=3, label=r"$|\rho^{(i)} - \rho^{(i-1)}|$", c='indianred', alpha=.8) plt.xlabel("iterations", fontsize=18, labelpad=6) plt.xticks(fontsize=18) plt.ylabel("convergence gap", fontsize=18, labelpad=6) @@ -263,8 +263,8 @@ def plot_diff(fig_path=None, smooth=False): u_diff_plot = u_diff_hist rho_diff_plot = rho_diff_hist - plt.plot(u_diff_plot, lw=3, label=r"$|u^{(k)} - u^*|$", c='steelblue', ls='--') - plt.plot(rho_diff_plot, lw=3, label=r"$|\rho^{(k)} - \rho^*|$", c='indianred', alpha=.8) + plt.plot(u_diff_plot, lw=3, label=r"$|u^{(i)} - u^*|$", c='steelblue', ls='--') + plt.plot(rho_diff_plot, lw=3, label=r"$|\rho^{(i)} - \rho^*|$", c='indianred', alpha=.8) plt.xlabel("iterations", fontsize=18, labelpad=6) plt.xticks(fontsize=18) plt.ylabel("loss", fontsize=18, labelpad=6)