-
Notifications
You must be signed in to change notification settings - Fork 0
/
calculations.py
332 lines (306 loc) · 16 KB
/
calculations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
from math import ceil
import numpy as np
import torch
from helpers import load_model, load_scaler, reverse_transform
from logs.Logger import Logger
from prediction.lstm import LOOKBACK
# Check if the usage of a logical volume is above 80% of its file system size
# If so, give it a higher priority
# and return the updated logical volume
# Otherwise, return the logical volume as is
# Also, return a boolean indicating if the calculations should be run
def check_logical_volume_usage_thresholds(lvm_logger: Logger, logical_volume: list):
# This variable is used to determine if the calculations/adjustments
# should be run after processing each logical volume
run_calculations = False
# Step 1: Check the usage threshold
lv_uuid = logical_volume["lv_uuid"]
usage_serie = logical_volume["usage_serie"]
file_system_size = logical_volume["file_system_size"]
lvm_logger.get_logger().info(
f"Checking usage threshold for {logical_volume['lv_name']}..."
)
# logical volume current usage percentage
current_used_volume_percentage = int(ceil(usage_serie[-1] *
100 / file_system_size))
lvm_logger.get_logger().info(
f"Current usage percentage for {logical_volume['lv_name']}: {current_used_volume_percentage}%"
)
if current_used_volume_percentage > 80:
run_calculations = True
# give the logical volume a superieur priority
if logical_volume["priority"] > 1:
logical_volume["priority"] = logical_volume["priority"] - 1
lvm_logger.get_logger().info(
f"Giving more priority to {logical_volume['lv_name']}: {logical_volume['priority']}"
)
return logical_volume, run_calculations
# Calculate the mean proportions for each logical volume
# By mean proportions, we mean the mean of the proportions of each logical volume
# The proportions of a logical volume refer to the proportion of its usage
# compared to the total usage of all logical volumes
def calculate_mean_proportions(logical_volumes_info_list: list[dict[str, int]]) -> list[dict[str, str | int | float]]:
usage_series = np.array(
[usage_serie["usage_serie"] for usage_serie in logical_volumes_info_list])
column_sums = usage_series.sum(axis=0)
proportions = np.where(column_sums == 0, 0, usage_series / column_sums)
mean_proportions: list[float] = np.mean(
proportions, axis=1).tolist()
return [
{
"lv_uuid": usage_serie["lv_uuid"],
"priority": usage_serie["priority"],
"mean_proportion": mean_proportions[index]
}
for index, usage_serie
in enumerate(logical_volumes_info_list)
]
# Calculate the mean proportions scaled by each logical volume priority
# The mean proportions scaled by logical volume priority refer to adjusting
# the mean proportions based on the priority of each logical volume.
def calculate_proportions_by_priority(logical_volumes_info_list: list[dict[str, int | str | list[float]]]) -> list[float]:
mean_proportions: list[
dict[str, int | str | list[float]]
] = calculate_mean_proportions(logical_volumes_info_list.copy())
# Step 1: Scale mean proportions by lvs_priorities
mean_proportions_scaled = np.zeros_like(
logical_volumes_info_list)
for index in range(len(mean_proportions_scaled)):
priority: int = mean_proportions[index]["priority"]
mean_proportion: float = mean_proportions[index]["mean_proportion"]
priority_count: int = [element["priority"]
for element in logical_volumes_info_list].count(priority)
mean_proportions_scaled[index] = mean_proportion / \
priority * priority_count
# Step 2: Normalize scaled proportions to ensure they sum up to 1
sum_mean_proportions_scaled = np.sum(mean_proportions_scaled)
normalized_mean_proportions = mean_proportions_scaled / sum_mean_proportions_scaled
return normalized_mean_proportions
# Calculate the total allocation/reclaims needed
# The total allocation/reclaims needed refer to the total allocation/reclaims needed for all logical volumes
# This is calculated by summing the 20% of predictions and (the allocation/reclaim sizes * mean proportions) for each logical volume
# This is done to make a precalculation of the total consumtion/reclaim of the free allocation volume
def sum_of_allocations_reclaims_per_volume_group(twenty_percent_of_predictions, allocation_reclaim_sizes, mean_proportions):
total_allocation_needed = []
for twenty_percent_of_prediction, allocation_reclaim_size, mean_proportion in zip(twenty_percent_of_predictions, allocation_reclaim_sizes, mean_proportions):
allocation_reclaim_size = allocation_reclaim_size
if allocation_reclaim_size >= 0:
total_allocation_needed.append(
(mean_proportion * allocation_reclaim_size) + twenty_percent_of_prediction)
else:
total_allocation_needed.append(
(mean_proportion * twenty_percent_of_prediction) +
twenty_percent_of_prediction
)
return sum(total_allocation_needed)
# Calculate the allocation volume size for each logical volume
# This is done by calculating the mean proportions of the logical volume scaled by it's priority
# Then, it's scaled by the allocation factor, which helps avoid a situation where one logical volume
# uses an excessive portion of the available allocation volume.
def calculate_allocations_volumes(lvm_logger: Logger, volume_group_informations: dict):
# Step 0: Get the free allocation volume size for the VG
allocation_volume = volume_group_informations["vg_free"]
# Step 1: process logical volumes for (predictions, possible allocations/reclaims)
lvm_logger.get_logger().debug(
f"************************************** {volume_group_informations['vg_name']} *****************************************")
# Extract the logical volumes informations list
logical_volumes_informations_list = [
logical_volume_informations for logical_volume_informations in volume_group_informations["logical_volumes"]]
# Extract the allocation/reclaim sizes
allocation_reclaim_sizes = [
logical_volume_informations["allocation_reclaim_size"] for index, logical_volume_informations in enumerate(logical_volumes_informations_list)]
# Extract the predictions
predictions = [
logical_volume_informations["prediction_size"] for index, logical_volume_informations in enumerate(logical_volumes_informations_list)]
# Extract the 20% of predictions
twenty_percent_of_predictions = [
logical_volume_informations["twenty_percent_of_prediction"] for index, logical_volume_informations in enumerate(logical_volumes_informations_list)]
# Step 2: Calculate mean proportions scaled by each logical volume priority
# The mean proportions scaled by logical volume priority refer to adjusting
# the mean proportions based on the priority of each logical volume.
# This adjustment is done to take into account the priority levels
# when calculating how resources or allocations are distributed among different logical volumes
mean_proportions: list[float] = calculate_proportions_by_priority(
[
{
"lv_uuid": logical_volume_informations["lv_uuid"],
"priority": logical_volume_informations["priority"],
"usage_serie": logical_volume_informations["usage_serie"]
}
for logical_volume_informations
in logical_volumes_informations_list
]
)
# Calculate the total allocation/reclaims needed
total_allocations_reclaims_needed = sum_of_allocations_reclaims_per_volume_group(
twenty_percent_of_predictions.copy(),
allocation_reclaim_sizes.copy(),
mean_proportions.copy()
)
# the allocation factor helps avoid a situation where one logical volume
# uses an excessive portion of the available allocation volume.
# It prevents any single logical volume from monopolizing resources,
# ensuring a fair and balanced distribution of the allocation among all logical volumes.
if total_allocations_reclaims_needed == 0:
allocation_factor = 1
else:
allocation_factor = allocation_volume / total_allocations_reclaims_needed
lvm_logger.get_logger().debug(f"Allocation factor: {allocation_factor}")
# Step 5 : Calculate the allocation volume size for each logical volume
logical_volumes_adjustments_sizes: list[dict[str, int]] = []
for logical_volume_informations, prediction, twenty_percent_of_prediction, allocation_reclaim_size, mean_proportion in zip(logical_volumes_informations_list, predictions, twenty_percent_of_predictions, allocation_reclaim_sizes, mean_proportions):
lvm_logger.get_logger().debug(
f"----------------------------------------------- ({volume_group_informations['vg_name']}/{logical_volume_informations['lv_name']}) -----------------------------------------------")
file_system_size = logical_volume_informations["file_system_size"]
prediction_size = int(prediction)
if allocation_reclaim_size < 0:
# logical volume prediction < file system size
# giving up volume
# logical volume future size
size = int(np.round(
(twenty_percent_of_prediction * mean_proportion) + twenty_percent_of_prediction))
+ allocation_reclaim_size
# logical volume future size, scaled by the allocation factor
size_scaled = int(np.round(
(twenty_percent_of_prediction * mean_proportion * allocation_factor) + twenty_percent_of_prediction))
+ allocation_reclaim_size
else:
# logical volume prediction > file system size
# requesting for more volume
size = int(np.round(
(mean_proportion * allocation_reclaim_size) + twenty_percent_of_prediction))
+ allocation_reclaim_size
size_scaled = int(np.round(
(allocation_factor * mean_proportion * allocation_reclaim_size) + twenty_percent_of_prediction))
+ allocation_reclaim_size
if size >= 0:
# When the future_size is positive,
# we opt for the smaller value between it and the scaled future size
# considering that the allocation factor could be greater than 1
# leading to a scenario of over-sizing
free_space_addition = int(min(
size, size_scaled))
else:
# When the future_size is negative,
# we opt for the larger value between it and the scaled future size
# considering that the allocation factor could be greater than 1
# leading to a scenario of over-decreasing
free_space_addition = int(max(
size, size_scaled))
if free_space_addition >= 0:
logical_volumes_adjustment_size = int(prediction_size +
free_space_addition - file_system_size)
logical_volume_future_size = prediction_size + \
free_space_addition
else:
logical_volumes_adjustment_size = int(free_space_addition)
logical_volume_future_size = int(abs(
allocation_reclaim_size) - abs(free_space_addition) + prediction_size)
# Update allocation_volume by subtracting the allocated/reclaimed volume
allocation_volume -= logical_volumes_adjustment_size
lvm_logger.get_logger().debug(
f"File system size : {file_system_size} MiB"
)
lvm_logger.get_logger().debug(
f"Prediction: {prediction_size} MiB"
)
lvm_logger.get_logger().debug(
f"Future LV size: {logical_volume_future_size} MiB"
)
lvm_logger.get_logger().debug(
f"Adjustment size: {logical_volumes_adjustment_size} MiB"
)
lvm_logger.get_logger().debug(
f"Priority: {logical_volume_informations['priority']}"
)
lvm_logger.get_logger().debug(
f"Mean proportion: {mean_proportion}"
)
lvm_logger.get_logger().debug(
f"VG Free: {allocation_volume} MiB"
)
logical_volumes_adjustments_sizes.append(
{"lv_uuid": logical_volume_informations["lv_uuid"],
"size": logical_volumes_adjustment_size}
)
lvm_logger.get_logger().debug(
"----------------------------------------------------------------------------------")
lvm_logger.get_logger().debug(
"----------------------------------------------------------------------------------")
return logical_volumes_adjustments_sizes
# Make predictions for a logical volume based on its usage serie
# and return the logical volume with the prediction
def predict_logical_volume_future_usage(lvm_logger: Logger, hostname: str, vg_name: str, logical_volume: dict):
lv_uuid = logical_volume["lv_uuid"]
usage_serie = logical_volume["usage_serie"]
# load model
try:
lvm_logger.get_logger().info(
f"Loading model for {hostname}/{vg_name}/{logical_volume['lv_name']}..."
)
model = load_model(lv_uuid=lv_uuid)
lvm_logger.get_logger().info(
f"Model for {hostname}/{vg_name}/{logical_volume['lv_name']} loaded"
)
except FileNotFoundError:
lvm_logger.get_logger().error(
f"Model for {hostname}/{vg_name}/{logical_volume['lv_name']} not found"
)
# return None to indicate that the model was not found
return None
# load scaler
try:
lvm_logger.get_logger().info(
f"Loading scaler for {hostname}/{vg_name}/{logical_volume['lv_name']}..."
)
scaler = load_scaler(lv_uuid=lv_uuid)
lvm_logger.get_logger().info(
f"Scaler for {hostname}/{vg_name}/{logical_volume['lv_name']} loaded"
)
except FileNotFoundError:
lvm_logger.get_logger().error(
f"Scaler for {hostname}/{vg_name}/{logical_volume['lv_name']} not found"
)
# return None to indicate that the scaler was not found
return logical_volume
# append a dummy value to the usage serie, since the model expects a sequence of length LOOKBACK+1
# that +1 is supposed to be the label (real value)
usage_serie.append(
sum(usage_serie)/len(usage_serie)
)
# transform (scale) lv fs usage
lvm_logger.get_logger().info(
f"Transforming usage serie for {hostname}/{vg_name}/{logical_volume['lv_name']}..."
)
lvs_lastest_usage_series = np.array(
np.array(usage_serie)
).reshape(1, LOOKBACK+1)
lvs_lastest_usage_series_scaled = scaler.transform(
lvs_lastest_usage_series)
lv_fs_usage_tensor = torch.tensor(
lvs_lastest_usage_series_scaled[:, :-1].reshape(-1, LOOKBACK, 1)).float()
lvm_logger.get_logger().info(
f"Usage serie for {hostname}/{vg_name}/{logical_volume['lv_name']} transformed"
)
lvm_logger.get_logger().info(
f"Predicting usage serie for {hostname}/{vg_name}/{logical_volume['lv_name']}..."
)
# make prediction
prediction = model(lv_fs_usage_tensor).to(
"cpu").detach().numpy().flatten()
# reverse transform logical volume prediction
reverse_transformed_prediction = np.round(
reverse_transform(scaler, prediction).flatten()[0]
)
lvm_logger.get_logger().info(
f"Future usage for {hostname}/{vg_name}/{logical_volume['lv_name']} predicted: {reverse_transformed_prediction} MiB"
)
allocation_reclaim_size = reverse_transformed_prediction - \
logical_volume["file_system_size"]
# # calculate the difference between the latest usage volume size and the prediction
# # to observe if the prediction is about extending or reducing
logical_volume["twenty_percent_of_prediction"] = int(
np.round(0.2 * reverse_transformed_prediction))
logical_volume["prediction_size"] = reverse_transformed_prediction
logical_volume["allocation_reclaim_size"] = allocation_reclaim_size
return logical_volume