Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release :) #35

Merged
merged 15 commits into from
May 27, 2024
10 changes: 10 additions & 0 deletions depth_stats.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
depth total all_traffic_nodes_reachable
1 1,534,791 False
2 20,867,094 False
3 99,345,984 False
4 208,286,238 False
5 274,915,690 False
6 294,770,278 False
7 298,131,875 False
8 298,459,823 True
9 298,471,755 True
1 change: 1 addition & 0 deletions p6/calc_type_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ class CalcType(Enum):
AVERAGE = "average"
MAX = "max"
SQUARED = "squared"
PATHS = "paths"
RATIOS = "ratios"
210 changes: 210 additions & 0 deletions p6/linear_optimization/netflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
import os
import gurobipy as gp
import pandas as pd
from collections import deque

from dotenv import load_dotenv
from p6.utils import log
from p6.utils import data as dataUtils


logger = log.setupCustomLogger(__name__)
load_dotenv("variables.env")

options = {
"WLSACCESSID": os.getenv("WLSACCESSID"),
"WLSSECRET": os.getenv("WLSSECRET"),
"LICENSEID": int(os.getenv("LICENSEID")),
}


def optMC(parserArgs, links, flowTraffic, timestamp):
with gp.Env(params=options) as env, gp.Model(env=env) as m:
m = gp.Model("netflow", env=env)

nodes = []
edges = []
for link in links:
split = link.split(";")
edges.append((split[0], split[1]))

if split[0] not in nodes:
nodes.append(split[0])
if split[1] not in nodes:
nodes.append(split[1])

traffic = {}
sorted_flowTraffic = sorted(
flowTraffic.items(), key=lambda item: item[1], reverse=True
)
total_demand = sum(flowTraffic.values())
percentage = 0.2 # TODO: Add this as a parameter
demand_threshold = total_demand * percentage
cumulative_demand = 0
significant_flowTraffic = {}
for flow, value in sorted_flowTraffic:
if cumulative_demand <= demand_threshold:
significant_flowTraffic[flow] = value
cumulative_demand += value
else:
break # Stop adding values once the threshold is reached

for flow in significant_flowTraffic:
split = flow.split(";")
traffic[flow, split[0]] = significant_flowTraffic[flow]
traffic[flow, split[1]] = -significant_flowTraffic[flow]

utilization = m.addVars(links, vtype=gp.GRB.CONTINUOUS, name="Utilization")
m.setObjective(
gp.quicksum((utilization[link] ** 2 for link in links)),
gp.GRB.MINIMIZE,
)

logger.info(
f"adding vars for flow: {len(significant_flowTraffic):,} and edges: {len(edges):,} so {len(significant_flowTraffic) * len(edges):,} flowVars in total"
)

flowVars = m.addVars(significant_flowTraffic.keys(), edges, name="flow")

logger.info(f"adding capacity constraints for {len(edges):,} edges")

m.addConstrs(
(
flowVars.sum("*", start, end) <= links[start + ";" + end]["capacity"]
for start, end in edges
),
"cap",
)

logger.info(
f"adding flow constraints for {len(significant_flowTraffic):,} flows and {len(nodes):,} nodes"
)

m.addConstrs(
(
(
flowVars.sum(flow, "*", node) + traffic[flow, node]
== flowVars.sum(flow, node, "*")
if (flow, node) in traffic
else flowVars.sum(flow, "*", node) == flowVars.sum(flow, node, "*")
)
for flow in significant_flowTraffic
for node in nodes
),
"flow",
)

logger.info(f"adding utilization constraints for {len(edges):,} edges")

# Constraints to set the flow through each link as the sum of flows for all traffic pairs
for start, end in edges:
linkFlow = gp.quicksum(
flowVars[flow, start, end]
for flow in significant_flowTraffic
if (flow, start, end) in flowVars
)

m.addConstr(
linkFlow
== utilization[f"{start};{end}"] * links[f"{start};{end}"]["capacity"],
f"util_{start};{end}",
)

m.write("multiCommodityFlowProblem.lp")
m.optimize()

# Define the threshold percentage (e.g., 10%)
threshold_percentage = 0.001

if m.Status == gp.GRB.OPTIMAL:
solution = m.getAttr("X", flowVars)
flow_values = {
(flow, i, j): solution[flow, i, j]
for flow in significant_flowTraffic
for i, j in edges
if solution[flow, i, j] > 0
}

unique_flows = set()

# New dictionary to hold the threshold values for each flow
threshold_values = {
flow: flowTraffic[flow] * threshold_percentage for flow in flowTraffic
}

new_flow_values = {}
for (flow, start, end), value in flow_values.items():
# Get the threshold value for the current flow
current_threshold_value = threshold_values[flow]

# Compare each flow value with its corresponding threshold value
if value >= current_threshold_value:
# Check if the flow is unique and print it
if flow not in unique_flows:
unique_flows.add(flow)
new_flow_values[(flow, start, end)] = value

logger.info(f"Flows before paths cut-off: {len(significant_flowTraffic)}")
logger.info(f"Flows after paths cut-off: {len(unique_flows)}")

flow_values = new_flow_values

# Calculate ratios for all flows
# calculate time taken to calculate ratios
startTime = pd.Timestamp.now()
all_paths_with_ratios = calculate_ratios_for_all_flows(
flow_values, significant_flowTraffic, timestamp
)
endTime = pd.Timestamp.now()

logger.info(f"Time taken to calculate ratios: {endTime - startTime}")

dataUtils.writeDataToFile(
pd.DataFrame(
all_paths_with_ratios,
columns=["timestamp", "flowName", "path", "ratio"],
),
"ratioData",
parserArgs,
)

return


def find_paths(flow_values, flowName, source, target):
paths = []
queue = deque([(source, [source], float("inf"))])
visited = set()
while queue:
node, path, flow = queue.popleft()

if (node, flow) in visited:
continue
visited.add((node, flow))

if node == target:
paths.append((path, flow))
continue

for (flow_id, start, end), f in flow_values.items():
if start == node and flow_id == flowName and (end, f) not in visited:
new_flow = min(flow, f)
if new_flow > 0:
queue.append((end, path + [end], new_flow))

return paths


# Function to calculate ratios for all paths
def calculate_ratios_for_all_flows(flow_values, flowTraffic, timestamp):
all_paths_with_ratios = []
for flow_id, flow_amount in flowTraffic.items():
source, target = flow_id.split(";")
paths = find_paths(flow_values, flow_id, source, target)
total_flow = sum(flow for _, flow in paths)
# Ensure each path has its own flowName
for path, flow in paths:
all_paths_with_ratios.append(
[timestamp, flow_id, (";".join(path)), flow / total_flow]
)
return all_paths_with_ratios
4 changes: 1 addition & 3 deletions p6/linear_optimization/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ def runLinearOptimizationModel(
case CalcType.AVERAGE.value:
utilization = m.addVars(links, vtype=GRB.CONTINUOUS, name="Utilization")
m.setObjective(
gp.quicksum(
(utilization[link] / links[link]["capacity"] for link in links)
),
gp.quicksum((utilization[link] for link in links)),
GRB.MINIMIZE,
)
case CalcType.MAX.value:
Expand Down
75 changes: 57 additions & 18 deletions p6/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from p6.utils import data as dataUtils
from p6.utils import network as nwUtils
from p6.utils import log
from p6.linear_optimization import optimizer as linOpt
from p6.linear_optimization import netflow, optimizer as linOpt

logger = log.setupCustomLogger(__name__)

Expand All @@ -31,12 +31,26 @@ def calcLinkUtil(links):
def process_flows_hour(timestamp, flows, traffic, args, links):
logger.info(f"Processing {timestamp} with {len(flows)} flows...")
ratios = None
useRatios = False

# Read ratios if specified
if args.use_ratios:
hour = timestamp[4:6]
day, ratioType, date = args.use_ratios
ratios = dataUtils.readRatios(date, ratioType, day, hour)
useRatios = True
ratios = dataUtils.readPathRatios(date, day, hour, ratioType)
elif args.use_paths:
hour = timestamp[4:6]
day, date, useRatios = args.use_paths
useRatios = useRatios == "True"
ratios = dataUtils.readPathRatios(date, day, hour)
i = 0
for flow in flows:
if flow not in ratios:
continue
i += 1
flows[flow] = list(ratios[flow].keys())
logger.info(f"Updated {i} flows with paths!")

# Initialize totalTraffic and listFlows for all links
for linkKey in links:
Expand All @@ -46,11 +60,11 @@ def process_flows_hour(timestamp, flows, traffic, args, links):
for flow in flows:
# Get all links in the flow
linksFlow = nwUtils.getLinksFromFlow(flows[flow])

identicalPaths = True

if ratios is not None:
for path in flows[flow]:
if (flow, path) not in ratios:
if flow not in ratios or path not in ratios[flow]:
identicalPaths = False
break

Expand All @@ -70,17 +84,19 @@ def process_flows_hour(timestamp, flows, traffic, args, links):
totalTraffic = 0
for path in flows[flow]:
if link in path:
if ratios is not None and identicalPaths:
totalTraffic += traffic[flow] * float(ratios[flow, path])
if useRatios and identicalPaths:
totalTraffic += traffic[flow] * float(ratios[flow][path])
else:
totalTraffic += traffic[flow] * (1 / len(flows[flow]))

links[link]["totalTraffic"] += totalTraffic
links[link]["listFlows"].append(flow)

# Run linear optimization or baseline calculations
if args.model_type == CalcType.BASELINE.value:
linkUtil = calcLinkUtil(links)
elif args.model_type == CalcType.PATHS.value:
netflow.optMC(args, links, traffic, timestamp)
return None
else:
linkUtil = linOpt.runLinearOptimizationModel(
args, links, flows, traffic, timestamp, args.save_lp_models
Expand All @@ -103,6 +119,7 @@ def main():
CalcType.AVERAGE.value,
CalcType.MAX.value,
CalcType.SQUARED.value,
CalcType.PATHS.value,
],
help="type of calculation to run",
)
Expand All @@ -126,6 +143,14 @@ def main():
metavar=("DAY", "TYPE", "DATE"),
help="use existing path ratios for calculations",
)
parser.add_argument(
"-up",
"--use-paths",
nargs=3,
metavar=("DAY", "DATE", "USERATIOS?"),
help="use existing paths for calculations",
)

args = parser.parse_args()

if args.use_ratios:
Expand All @@ -143,15 +168,30 @@ def main():
CalcType.AVERAGE.value,
CalcType.MAX.value,
CalcType.SQUARED.value,
CalcType.PATHS.value,
]:
parser.error(
"Invalid ratio type. Please use 'average', 'max' or 'squared'."
"Invalid ratio type. Please use 'average', 'path', 'max' or 'squared'."
)
if args.model_type != CalcType.BASELINE.value:
parser.error(
"Cannot use existing path ratios with the specified model type."
)

if args.use_paths:
day, date, useratios = args.use_paths
if not day.isdigit():
parser.error("Invalid day number.")
if (
not date.isdigit()
or len(date) != 8
or int(date[4:6]) > 12
or int(date[6:]) > 31
):
parser.error("Invalid date. Please use a date in the format YYYYMMDD.")
if useratios not in ["True", "False"]:
parser.error("Invalid useratios value. Please use 'True' or 'False'.")

# Set start method to spawn to avoid issues with multiprocessing on Windows
set_start_method("spawn")

Expand All @@ -167,21 +207,20 @@ def main():
process_flows_hour,
[
(timestamp, flows[timestamp], traffic[timestamp], args, links.copy())
# for timestamp in list(flows.keys())[:1]
for timestamp in flows
],
)

logger.info("Finished processing all timestamps!")
if results[0] is not None:
results.sort()

results.sort()
dataUtils.writeDataToFile(
data=pd.DataFrame(
results, columns=["timestamp", "min_util", "max_util", "avg_util"]
),
parserArgs=args,
outputFile="overviewData",
)
dataUtils.writeDataToFile(
data=pd.DataFrame(
results, columns=["timestamp", "min_util", "max_util", "avg_util"]
),
parserArgs=args,
outputFile="overviewData",
)

endTime = pd.Timestamp.now()

Expand Down
Loading