-
Notifications
You must be signed in to change notification settings - Fork 7
/
optperfprofpy.py
215 lines (187 loc) · 7.57 KB
/
optperfprofpy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
"""
source: https://github.com/Jvanschoubroeck/performance-profiles-in-python
"""
import warnings
import numpy as np
import pandas as pd
from math import isclose
import matplotlib.pyplot as plt
def calc_perprof(
df, problem_def, perf_meas, solver_char, inv_perf_meas=False, tau_val=None
):
"""Generate array for performance profiles.
Notes
-----
For a detailed description of performance profiles see the publication:
Benchmarking Optimization Software with Performance Profiles by
E. D. Dolan, and J. J. More.
Parameters
----------
df : pandas DataFrame
Data containing the problem definition, performance measure,
and solver characteristics.
problem_def : list
Label that define the unique problems.
The use of multiple labels is supported.
perf_meas : list containing string
Label that indicates the performance measure.
solver_char : list
Label that defines the unique solvers.
The use of multiple labels is supported.
inv_perf_meas : bool, optional
Indicating if the assigned performance measure is the
value divided by the smallest value (standard), or the
inverse of this operation.
tau_val : numpy.ndarray, optional
If supplied, the number of problems the unique solvers
have solved are checked at these values of tau.
Returns
-------
unique_taus : numpy.ndarray
Unique tau values where the solvers have an increased
number of solved problems. In this manner all information
present is extracted.
solver_taus : numpy.ndarray
Number of problems each solver solved within the ratio
of unique_taus.
solvers : string
Unique solver names
data : pandas DataFrame
Dataframe where the performance measure values have been
normalized.
Raises
------
TypeError
If any of the arguments is not a list.
ValueError
If the solver characteristics and problem definition
share a string.
Of if not all problems have been solved by the
unique solvers.
ValueError
If the problem lenghts are not e
Raises
------
AttributeError
The ``Raises`` section is a list of all exceptions
that are relevant to the interface.
"""
data = df.sort_values(by=problem_def + solver_char).copy()
if not all(isinstance(l, list) for l in [problem_def, perf_meas, solver_char]):
raise TypeError(
"`problem_def`, `perf_meas`, and `solver_char` should" "be lists"
)
if len(solver_char) > 1:
# Merging columns if one than one solver characteristic is selected
new_solver_nm = ""
for n, m in enumerate(solver_char):
if n == 0:
new_solver_nm += data["{}".format(solver_char[n])].map(str)
else:
new_solver_nm += "_" + data["{}".format(solver_char[n])].map(str)
data["{}".format(solver_char[0])] = new_solver_nm
if len(set(solver_char) & set(problem_def)) != 0:
# Checking if problem definition and solver characteristic are unique
raise ValueError(
"Solver characteristic and problem definition share characteristic: ",
list(set(solver_char) & set(problem_def)),
)
# Finding the unique solvers
solvers = data[solver_char[0]].unique()
# Generating df containing all unique problems
grouped_by_problem = data.groupby(problem_def)
# dividing by the minimum value
for i, (prob, gr) in enumerate(grouped_by_problem):
# Checking if all problems have an equal number of solvers
if i == 0:
gr_len = len(gr)
if gr_len != len(gr):
raise ValueError("Problem group lengths not equal! Problem gr:", prob)
try:
# Normalizing and penalizing infeasible designs
# If feasibility is satisfied, the performance measure is compared to
# the minimum value among all methods that are feasible.
# If feasibility is not satisfied, the maximum occuring value among
# all solvers is allocated and a small value is added.
# This value is added to be able to differentiate between the solvers
# that terminated with the maximum value that are feasible from
# the solvers that did not return a feasible point
true_min = gr.loc[gr["feas"] == True][perf_meas].min()[0]
if inv_perf_meas == False:
data.at[gr.loc[gr["feas"] == True].index, perf_meas] = (
gr[perf_meas] / true_min
)
data.at[gr.loc[gr["feas"] == False].index, perf_meas] = (
gr[perf_meas].max()[0] / true_min + 0.05
)
else:
if i == 0:
warnings.warn("Performance ratio calculated using inverse.")
data.at[gr.loc[gr["feas"] == True].index, perf_meas] = (
true_min / gr[perf_meas]
)
data.at[gr.loc[gr["feas"] == False].index, perf_meas] = (
true_min / gr[perf_meas].max()[0] + 0.05
)
except KeyError:
if not inv_perf_meas:
data.at[gr.index, perf_meas] = gr[perf_meas] / gr[perf_meas].min()[0]
else:
if i == 0:
warnings.warn("Performance ratio calculated using inverse.")
data.at[gr.index, perf_meas] = gr[perf_meas].min()[0] / gr[perf_meas]
# Generate array for plot
if (df[perf_meas[0]] < 0).any():
warnings.warn(
"Negative objective function value detected, this may "
"cause unwanted scaling of problems."
)
if (len(data) // len(solvers)) != len(grouped_by_problem):
warnings.warn(
"Combination of problem and solver characteristic "
"cause, possibly unwanted, aggregation of problems."
)
# Grouping by unique solver
grouped_by_solver = data.groupby(solver_char)
if tau_val == None:
# Finding the unique tau values
unique_taus = np.sort(data[perf_meas[0]].unique())
else:
# Using the user generated tau values
unique_taus = tau_val
# Finding the fraction of problems that each solver solved within tau
solver_taus = np.zeros((len(grouped_by_solver), len(unique_taus)))
for n, tau in enumerate(unique_taus):
for i, (_, gr) in enumerate(grouped_by_solver):
if i == 0 and n == 0:
print("Number of problems per solver: ", len(gr))
solver_taus[i, n] = len(gr.loc[gr[perf_meas[0]] <= tau]) / len(
grouped_by_problem
)
if not isclose(solver_taus[:, 0].sum(), 1, rel_tol=1e-3):
warnings.warn(
"Solvers do not solve 100% of problems. "
"Total amount of problems solved: {}".format(100 * solver_taus[:, 0].sum())
)
return unique_taus, solver_taus, solvers, data
def draw_simple_pp(taus, solver_vals, solvers):
"""Simple step plotter for performance profiles.
Parameters
----------
taus : numpy.ndarray
x values of plot.
solver_vals : numpy.ndarray
y values of plot.
solvers : list
Labels of curves.
"""
fig = plt.figure()
ax = fig.add_subplot(111)
# Add lines individually to support labels
for n, solver in enumerate(solvers):
ax.step(taus, solver_vals[n, :], label=solver)
plt.legend(loc=4)
plt.xlim(1, taus.max())
ax.set_xlabel("Tau")
ax.set_ylabel("Fraction of problems")
plt.plot()