SciML · ayushinav · Sep 23, 2024 · Jul 26, 2024 · Jul 31, 2024 · Aug 4, 2024
diff --git a/bench/Project.toml b/bench/Project.toml
@@ -0,0 +1,8 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+NeuralOperators = "ea5c82af-86e5-48da-8ee1-382d6ad7af4b"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/bench/comparison.md b/bench/comparison.md
@@ -0,0 +1,65 @@
+# NeuralOperators.jl Benchmarks
+
+## Fourier Neural Operators
+
+### Lux.jl (Julia)
+
+| #layers | Forward | Train: 10 epochs |
+| ------- | --------------- | -------------------- |
+| 1 | 13.215269 ms | 857.77145 ms |
+| 2 | 21.864486 ms | 1509.3964 ms |
+| 3 | 29.911771 ms | 2039.654284 ms |
+| 4 | 37.942865 ms | 2417.136518 ms |
+| 5 | 43.498116 ms | 2822.866553 ms |
+
+### Flux.jl (Julia)
+
+| #layers | Forward | Train: 10 epochs |
+| ------- | --------------- | -------------------- |
+| 1 | 78.4451 ms | 2993.7750 ms |
+| 2 | 101.2194 ms | 3572.4716 ms |
+| 3 | 119.4590 ms | 4533.3477 ms |
+| 4 | 147.1671 ms | 5232.9407 ms |
+| 5 | 176.3559 ms | 6744.5985 ms |
+
+### neuraloperator (Python)
+
+| #layers | Forward | Train: 10 epochs |
+| ------- | --------------- | -------------------- |
+| 1 | 5.5957 ms | 18.8053 ms |
+| 2 | 7.6544 ms | 25.8644 ms |
+| 3 | 10.1342 ms | 33.6305 ms |
+| 4 | 12.5349 ms | 42.4017 ms |
+| 5 | 15.7246 ms | 52.0539 ms |
+
+## DeepONet
+
+### Lux.jl (Julia)
+
+| #layers | Forward | Train: 10 epochs |
+| ------- | --------------- | -------------------- |
+| 1 | 0.0772 ms | 2.5204 ms |
+| 2 | 0.1040 ms | 3.5234 ms |
+| 3 | 0.1313 ms | 4.4715 ms |
+| 4 | 0.1582 ms | 5.5147 ms |
+| 5 | 0.1857 ms | 6.5496 ms |
+
+### Flux.jl (Julia)
+
+| #layers | Forward | Train: 10 epochs |
+| ------- | --------------- | -------------------- |
+| 1 | 0.1970 ms | 8.3795 ms |
+| 2 | 0.2288 ms | 9.2419 ms |
+| 3 | 0.2573 ms | 11.7600 ms |
+| 4 | 0.2982 ms | 13.0193 ms |
+| 5 | 0.3296 ms | 14.0488 ms |
+
+### deepxde (Python)
+
+| #layers | Forward | Train: 10 epochs |
+| ------- | --------------- | -------------------- |
+| 1 | 0.9240 ms | 19.5328 ms |
+| 2 | 0.5718 ms | 25.1960 ms |
+| 3 | 0.7124 ms | 31.2356 ms |
+| 4 | 0.9271 ms | 37.0238 ms |
+| 5 | 1.0191 ms | 43.1386 ms |
diff --git a/bench/flux.jl b/bench/flux.jl
@@ -0,0 +1,87 @@
+using ThreadPinning
+pinthreads(:cores)
+threadinfo()
+
+using BenchmarkTools, NeuralOperators, Random, Zygote
+using Flux
+using Optimisers: Adam
+
+function train!(model::L, data, opt_state; epochs = 10, loss = Flux.Losses.mse) where { L<: FourierNeuralOperator}
+ for epoch in 1:epochs
+ Flux.train!(model, data, opt_state) do m, x, y
+ loss(m(x), y)
+ end
+ end
+end
+
+
+function train!(model::L, data, opt_state; epochs = 10, loss = Flux.Losses.mse) where {L <: DeepONet}
+ for epoch in 1:epochs
+ Flux.train!(model, data, opt_state) do m, u, y, g
+ loss(m(u, y), g)
+ end
+ end
+end
+
+# FNO
+n_points = 128
+batch_size = 64
+
+x = rand(Float32, 1, n_points, batch_size);
+y = rand(Float32, 1, n_points, batch_size);
+data = [(x, y)];
+t_fwd = zeros(5)
+t_train = zeros(5)
+
+for i in 1:5
+ chs = (1, 128, fill(64, i)..., 128, 1)
+ model = FourierNeuralOperator(; ch=chs, modes=(16,), σ=gelu)
+ model(x) # TTFX
+
+ t_fwd[i] = @belapsed $model($x)
+
+ opt_state = Flux.setup(Adam(), model)
+
+ t_train[i] = @belapsed train!($model, $data, $opt_state)
+end
+
+println("\n## FNO (Flux NeuralOperators.jl)")
+print("| #layers | Forward | Train: 10 epochs | \n")
+print("| --- | --- | --- | \n")
+for i in 1:5
+ print("| $i | $(t_fwd[i] * 1000) ms | $(t_train[i] * 1000) ms | \n")
+end
+
+# DeepONets
+eval_points = 64
+batch_size = 64
+dim_y = 1
+m = 32
+
+u = rand(Float32, m, batch_size);
+y = rand(Float32, dim_y, eval_points);
+
+g = rand(Float32, batch_size, eval_points);
+
+data = [(u, y, g)]
+t_fwd = zeros(5)
+t_train = zeros(5)
+for i in 1:5
+ ch_branch = (m, fill(64, i)..., 128)
+ ch_trunk = (dim_y, fill(64, i)..., 128)
+ model = DeepONet(ch_branch, ch_trunk)
+ model(u, y) # TTFX
+
+ t_fwd[i] = @belapsed $model($u, $y)
+
+ opt_state = Flux.setup(Adam(), model)
+
+ t_train[i] = @belapsed train!($model, $data, $opt_state)
+end
+
+println("\n## DeepONet (Flux NeuralOperators.jl)")
+print("| #layers | Forward | Train: 10 epochs | \n")
+print("| --- | --- | --- | \n")
+for i in 1:5
+ print("| $i | $(t_fwd[i] * 1000) ms | $(t_train[i] * 1000) ms | \n")
+end
diff --git a/bench/flux/Project.toml b/bench/flux/Project.toml
@@ -0,0 +1,7 @@
+[deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+NeuralOperators = "ea5c82af-86e5-48da-8ee1-382d6ad7af4b"
+Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
+ThreadPinning = "811555cd-349b-4f26-b7bc-1f208b848042"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
diff --git a/bench/lux.jl b/bench/lux.jl
@@ -0,0 +1,82 @@
+using ThreadPinning
+pinthreads(:cores)
+threadinfo()
+
+using BenchmarkTools, NeuralOperators, Random, Optimisers, Zygote, Lux
+
+rng = Xoshiro(1234)
+
+train!(args...; kwargs...) = train!(MSELoss(), AutoZygote(), args...; kwargs...)
+
+function train!(loss, backend, model, ps, st, data; epochs=10)
+ l1 = loss(model, ps, st, first(data))
+
+ tstate = Training.TrainState(model, ps, st, Adam(0.01f0))
+ for _ in 1:epochs, (x, y) in data
+ _, _, _, tstate = Training.single_train_step!(backend, loss, (x, y), tstate)
+ end
+
+ l2 = loss(model, ps, st, first(data))
+
+ return l2, l1
+end
+
+# FNO
+n_points = 128
+batch_size = 64
+
+x = rand(Float32, n_points, 1, batch_size);
+y = rand(Float32, n_points, 1, batch_size);
+data = [(x, y)];
+t_fwd = zeros(5)
+t_train = zeros(5)
+for i in 1:5
+ chs = (1, 128, fill(64, i)..., 128, 1)
+ model = FourierNeuralOperator(gelu; chs, modes=(16,), permuted=Val(true))
+ ps, st = Lux.setup(rng, model)
+ model(x, ps, st) # TTFX
+
+ t_fwd[i] = @belapsed $model($x, $ps, $st)
+
+ t_train[i] = @belapsed train!($model, $ps, $st, $data; epochs=10)
+end
+
+println("\n## FNO")
+print("| #layers | Forward | Train: 10 epochs | \n")
+print("| --- | --- | --- | \n")
+for i in 1:5
+ print("| $i | $(t_fwd[i] * 1000) ms | $(t_train[i] * 1000) ms | \n")
+end
+
+# DeepONets
+eval_points = 1
+batch_size = 64
+dim_y = 1
+m = 32
+
+u = rand(Float32, m, batch_size);
+y = rand(Float32, dim_y, eval_points, batch_size);
+
+g = rand(Float32, eval_points, batch_size);
+
+data = [((u, y), g)]
+t_fwd = zeros(5)
+t_train = zeros(5)
+for i in 1:5
+ ch_branch = (m, fill(64, i)..., 128)
+ ch_trunk = (dim_y, fill(64, i)..., 128)
+ model = DeepONet(; branch=ch_branch, trunk=ch_trunk)
+ ps, st = Lux.setup(rng, model)
+ model((u, y), ps, st) # TTFX
+
+ t_fwd[i] = @belapsed $model(($u, $y), $ps, $st)
+
+ t_train[i] = @belapsed train!($model, $ps, $st, $data; epochs=10)
+end
+
+println("\n## DeepONet")
+print("| #layers | Forward | Train: 10 epochs | \n")
+print("| --- | --- | --- | \n")
+for i in 1:5
+ print("| $i | $(t_fwd[i] * 1000) ms | $(t_train[i] * 1000) ms | \n")
+end
diff --git a/bench/pyproject.toml b/bench/pyproject.toml
@@ -0,0 +1,14 @@
+[tool.poetry]
+package-mode = false
+
+[tool.poetry.dependencies]
+python = "^3.10"
+torch = "^2.4.0"
+deepxde = "^1.12.0"
+neuraloperator = "^0.3.0"
+wandb = "^0.17.5"
+ipython = "^8.26.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/bench/pytorch.py b/bench/pytorch.py
@@ -0,0 +1,130 @@
+import timeit
+
+
+# DeepONet
+
+import_code = """import deepxde as dde
+import torch
+"""
+
+n_iters = 100
+fwd_timed_arr = [0.0] * 5
+training_timed_arr = [0.0] * 5
+
+for i in range(1, 6):
+ setup_code = f"""eval_points = 64
+batch_size = 64
+dim_y = 1
+m = 32
+
+ch_branch = [m] + [64]*{i} + [128]
+ch_trunk = [dim_y] + [64]*{i} + [128]
+u = torch.rand(batch_size, m)
+y = torch.rand(batch_size, dim_y)
+output = torch.rand(batch_size, dim_y)
+
+u_ = torch.rand(8, m)
+y_ = torch.rand(8, dim_y)
+output_ = torch.rand(8, dim_y)
+
+data = dde.data.Triple(
+ X_train= (u,y), y_train= output, X_test=(u_, y_), y_test= output_
+)
+
+net = dde.nn.DeepONet(
+ ch_branch,
+ ch_trunk,
+ "tanh",
+ "Glorot normal",
+)
+
+model = dde.Model(data, net)
+model.compile("adam", lr=0.001, metrics=["mean l2 relative error"])
+"""
+
+ test_code = "model.predict((u,y))"
+ timed = timeit.timeit(
+ setup=import_code + setup_code, stmt=test_code, number=n_iters
+ )
+ fwd_timed_arr[i - 1] = timed
+
+ test_code = "model.train(epochs = 10)"
+ timed = timeit.timeit(
+ setup=import_code + setup_code, stmt=test_code, number=n_iters
+ )
+ training_timed_arr[i - 1] = timed
+ # print(i, "\t", timed/n_iters * 1000, " ms \n")
+
+print("## DeepONet")
+print("| #layers | Forward | Train: 10 epochs |")
+print("| --- | --- | --- |")
+for i in range(1, 6):
+ print(
+ "| ",
+ i,
+ " | ",
+ fwd_timed_arr[i - 1] / n_iters * 1000,
+ " ms | ",
+ training_timed_arr[i - 1] / n_iters * 1000,
+ " ms |",
+ )
+
+
+# FNO
+
+import_code = """from neuralop.models import FNO1d
+import torch
+"""
+
+timed_arr = [0.0] * 5
+n_iters = 100
+fwd_timed_arr = [0.0] * 5
+training_timed_arr = [0.0] * 5
+
+n_iters = 1000
+for i in range(1, 6):
+ setup_code = f"""operator1d = operator1d = FNO1d(n_modes_height=16, 
+ hidden_channels=64,
+ in_channels=1, 
+ out_channels=3,
+ n_layers= {i},
+ lifting_channels=128,
+ projection_channels=128)
+
+batch_size = 64
+n_points = 128
+x = torch.rand(batch_size, 1, n_points)
+y = torch.rand(batch_size, 3, n_points)
+optimiser = torch.optim.Adam(operator1d.parameters(),lr=1e-4)
+def train_model(model, data, y, optimser, epochs):
+ loss = torch.mean((y- model(data))**2)
+ loss.backward()
+ optimiser.step()
+ """
+ test_code = "y = operator1d(x)"
+ timed = timeit.timeit(
+ setup=import_code + setup_code, stmt=test_code, number=n_iters
+ )
+ fwd_timed_arr[i - 1] = timed
+
+ test_code = """train_model(operator1d, x, y, optimiser, 10)
+ """
+ timed = timeit.timeit(
+ setup=import_code + setup_code, stmt=test_code, number=n_iters
+ )
+ training_timed_arr[i - 1] = timed
+
+
+print("## FNO ")
+print("| #layers | Forward | Train: 10 epochs | ")
+print("| --- | --- | --- | ")
+for i in range(1, 6):
+ print(
+ "| ",
+ i,
+ " | ",
+ fwd_timed_arr[i - 1] / n_iters * 1000,
+ " ms | ",
+ training_timed_arr[i - 1] / n_iters * 1000,
+ " ms | ",
+ )