-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDifferential Evolution Protein Structure Prediction.py
107 lines (84 loc) · 3.79 KB
/
Differential Evolution Protein Structure Prediction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from scipy.interpolate import splprep, splev
# Define the Enhanced Energy Function
def energy_function(positions):
"""Calculate the energy with Lennard-Jones potential and harmonic bonds."""
distances = pdist(positions) # Pairwise distances
distances_matrix = squareform(distances)
# Lennard-Jones potential for non-adjacent residues
lj_energy = np.sum(4 * ((1 / distances[distances > 0])**12 - (1 / distances[distances > 0])**6))
# Harmonic bond potential for adjacent residues
bond_energy = 0.5 * np.sum((distances_matrix[np.arange(len(positions)-1), np.arange(1, len(positions))] - 1)**2)
return lj_energy + bond_energy
# Initialize DE Parameters
num_particles = 40
num_dimensions = 3 # 3D space
num_amino_acids = 15 # Number of residues in the protein
num_iterations = 400
# Differential Evolution hyperparameters
mutation_factor = 0.5 # Controls the step size
crossover_probability = 0.9 # Probability of crossover
# Initialize particle positions
positions = np.random.uniform(-5, 5, (num_particles, num_amino_acids, num_dimensions))
# Evaluate initial fitness
fitness_scores = np.array([energy_function(p) for p in positions])
# Track convergence
convergence = []
# DE Main Loop
for iteration in range(num_iterations):
for i in range(num_particles):
# Mutation: Create a donor vector
indices = np.random.choice(np.delete(np.arange(num_particles), i), 3, replace=False)
a, b, c = positions[indices]
donor_vector = a + mutation_factor * (b - c)
# Crossover: Create a trial vector
trial_vector = np.copy(positions[i])
for j in range(num_amino_acids):
if np.random.rand() < crossover_probability:
trial_vector[j] = donor_vector[j]
# Selection: Compare trial vector with target vector
trial_fitness = energy_function(trial_vector)
if trial_fitness < fitness_scores[i]:
positions[i] = trial_vector
fitness_scores[i] = trial_fitness
# Track global best
global_best_index = np.argmin(fitness_scores)
global_best_position = positions[global_best_index]
global_best_score = fitness_scores[global_best_index]
# Track convergence
convergence.append(global_best_score)
print(f"Iteration {iteration + 1}/{num_iterations}, Best Score: {global_best_score:.4f}")
# Plot the convergence
plt.figure(figsize=(12, 6))
plt.plot(convergence, marker='o', linewidth=2)
plt.title("Convergence of DE on Enhanced Energy Function")
plt.xlabel("Iteration")
plt.ylabel("Best Energy")
plt.grid()
plt.show()
# Visualize the final protein structure
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(14, 10))
ax = fig.add_subplot(111, projection='3d')
# Smooth the backbone with splines
tck, u = splprep([global_best_position[:, 0], global_best_position[:, 1], global_best_position[:, 2]], s=2)
smoothed_coords = splev(np.linspace(0, 1, 100), tck)
# Plot amino acids
ax.scatter(global_best_position[:, 0], global_best_position[:, 1], global_best_position[:, 2], c='r', s=100, label='Amino Acids')
# Plot smoothed backbone
ax.plot(smoothed_coords[0], smoothed_coords[1], smoothed_coords[2], c='b', linewidth=2, label='Backbone')
# Annotate amino acids
for i, (x, y, z) in enumerate(global_best_position):
ax.text(x, y, z, str(i), color='black', fontsize=10)
ax.set_title("Optimized Protein Structure with Enhanced DE")
ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
ax.legend()
plt.show()
# Print final details
print("Final Optimized Amino Acid Positions:")
print(global_best_position)
print(f"Final Optimized Energy: {global_best_score:.4f}")