sabotack · ViktorPlatz · May 27, 2024 · May 14, 2024 · May 15, 2024 · May 15, 2024
diff --git a/depth_stats.csv b/depth_stats.csv
@@ -0,0 +1,10 @@
+depth	total	all_traffic_nodes_reachable
+1	1,534,791	False
+2	20,867,094	False
+3	99,345,984	False
+4	208,286,238	False
+5	274,915,690	False
+6	294,770,278	False
+7	298,131,875	False
+8	298,459,823	True
+9	298,471,755	True
diff --git a/p6/calc_type_enum.py b/p6/calc_type_enum.py
@@ -6,4 +6,5 @@ class CalcType(Enum):
     AVERAGE = "average"
     MAX = "max"
     SQUARED = "squared"
+    PATHS = "paths"
     RATIOS = "ratios"
diff --git a/p6/linear_optimization/netflow.py b/p6/linear_optimization/netflow.py
@@ -0,0 +1,210 @@
+import os
+import gurobipy as gp
+import pandas as pd
+from collections import deque
+
+from dotenv import load_dotenv
+from p6.utils import log
+from p6.utils import data as dataUtils
+
+
+logger = log.setupCustomLogger(__name__)
+load_dotenv("variables.env")
+
+options = {
+    "WLSACCESSID": os.getenv("WLSACCESSID"),
+    "WLSSECRET": os.getenv("WLSSECRET"),
+    "LICENSEID": int(os.getenv("LICENSEID")),
+}
+
+
+def optMC(parserArgs, links, flowTraffic, timestamp):
+    with gp.Env(params=options) as env, gp.Model(env=env) as m:
+        m = gp.Model("netflow", env=env)
+
+        nodes = []
+        edges = []
+        for link in links:
+            split = link.split(";")
+            edges.append((split[0], split[1]))
+
+            if split[0] not in nodes:
+                nodes.append(split[0])
+            if split[1] not in nodes:
+                nodes.append(split[1])
+
+        traffic = {}
+        sorted_flowTraffic = sorted(
+            flowTraffic.items(), key=lambda item: item[1], reverse=True
+        )
+        total_demand = sum(flowTraffic.values())
+        percentage = 0.2  # TODO: Add this as a parameter
+        demand_threshold = total_demand * percentage
+        cumulative_demand = 0
+        significant_flowTraffic = {}
+        for flow, value in sorted_flowTraffic:
+            if cumulative_demand <= demand_threshold:
+                significant_flowTraffic[flow] = value
+                cumulative_demand += value
+            else:
+                break  # Stop adding values once the threshold is reached
+
+        for flow in significant_flowTraffic:
+            split = flow.split(";")
+            traffic[flow, split[0]] = significant_flowTraffic[flow]
+            traffic[flow, split[1]] = -significant_flowTraffic[flow]
+
+        utilization = m.addVars(links, vtype=gp.GRB.CONTINUOUS, name="Utilization")
+        m.setObjective(
+            gp.quicksum((utilization[link] ** 2 for link in links)),
+            gp.GRB.MINIMIZE,
+        )
+
+        logger.info(
+            f"adding vars for flow: {len(significant_flowTraffic):,} and edges: {len(edges):,} so {len(significant_flowTraffic) * len(edges):,} flowVars in total"
+        )
+
+        flowVars = m.addVars(significant_flowTraffic.keys(), edges, name="flow")
+
+        logger.info(f"adding capacity constraints for {len(edges):,} edges")
+
+        m.addConstrs(
+            (
+                flowVars.sum("*", start, end) <= links[start + ";" + end]["capacity"]
+                for start, end in edges
+            ),
+            "cap",
+        )
+
+        logger.info(
+            f"adding flow constraints for {len(significant_flowTraffic):,} flows and {len(nodes):,} nodes"
+        )
+
+        m.addConstrs(
+            (
+                (
+                    flowVars.sum(flow, "*", node) + traffic[flow, node]
+                    == flowVars.sum(flow, node, "*")
+                    if (flow, node) in traffic
+                    else flowVars.sum(flow, "*", node) == flowVars.sum(flow, node, "*")
+                )
+                for flow in significant_flowTraffic
+                for node in nodes
+            ),
+            "flow",
+        )
+
+        logger.info(f"adding utilization constraints for {len(edges):,} edges")
+
+        # Constraints to set the flow through each link as the sum of flows for all traffic pairs
+        for start, end in edges:
+            linkFlow = gp.quicksum(
+                flowVars[flow, start, end]
+                for flow in significant_flowTraffic
+                if (flow, start, end) in flowVars
+            )
+
+            m.addConstr(
+                linkFlow
+                == utilization[f"{start};{end}"] * links[f"{start};{end}"]["capacity"],
+                f"util_{start};{end}",
+            )
+
+        m.write("multiCommodityFlowProblem.lp")
+        m.optimize()
+
+    # Define the threshold percentage (e.g., 10%)
+    threshold_percentage = 0.001
+
+    if m.Status == gp.GRB.OPTIMAL:
+        solution = m.getAttr("X", flowVars)
+        flow_values = {
+            (flow, i, j): solution[flow, i, j]
+            for flow in significant_flowTraffic
+            for i, j in edges
+            if solution[flow, i, j] > 0
+        }
+
+        unique_flows = set()
+
+        # New dictionary to hold the threshold values for each flow
+        threshold_values = {
+            flow: flowTraffic[flow] * threshold_percentage for flow in flowTraffic
+        }
+
+        new_flow_values = {}
+        for (flow, start, end), value in flow_values.items():
+            # Get the threshold value for the current flow
+            current_threshold_value = threshold_values[flow]
+
+            # Compare each flow value with its corresponding threshold value
+            if value >= current_threshold_value:
+                # Check if the flow is unique and print it
+                if flow not in unique_flows:
+                    unique_flows.add(flow)
+                new_flow_values[(flow, start, end)] = value
+
+        logger.info(f"Flows before paths cut-off: {len(significant_flowTraffic)}")
+        logger.info(f"Flows after paths cut-off: {len(unique_flows)}")
+
+        flow_values = new_flow_values
+
+        # Calculate ratios for all flows
+        # calculate time taken to calculate ratios
+        startTime = pd.Timestamp.now()
+        all_paths_with_ratios = calculate_ratios_for_all_flows(
+            flow_values, significant_flowTraffic, timestamp
+        )
+        endTime = pd.Timestamp.now()
+
+        logger.info(f"Time taken to calculate ratios: {endTime - startTime}")
+
+        dataUtils.writeDataToFile(
+            pd.DataFrame(
+                all_paths_with_ratios,
+                columns=["timestamp", "flowName", "path", "ratio"],
+            ),
+            "ratioData",
+            parserArgs,
+        )
+
+    return
+
+
+def find_paths(flow_values, flowName, source, target):
+    paths = []
+    queue = deque([(source, [source], float("inf"))])
+    visited = set()
+    while queue:
+        node, path, flow = queue.popleft()
+
+        if (node, flow) in visited:
+            continue
+        visited.add((node, flow))
+
+        if node == target:
+            paths.append((path, flow))
+            continue
+
+        for (flow_id, start, end), f in flow_values.items():
+            if start == node and flow_id == flowName and (end, f) not in visited:
+                new_flow = min(flow, f)
+                if new_flow > 0:
+                    queue.append((end, path + [end], new_flow))
+
+    return paths
+
+
+# Function to calculate ratios for all paths
+def calculate_ratios_for_all_flows(flow_values, flowTraffic, timestamp):
+    all_paths_with_ratios = []
+    for flow_id, flow_amount in flowTraffic.items():
+        source, target = flow_id.split(";")
+        paths = find_paths(flow_values, flow_id, source, target)
+        total_flow = sum(flow for _, flow in paths)
+        # Ensure each path has its own flowName
+        for path, flow in paths:
+            all_paths_with_ratios.append(
+                [timestamp, flow_id, (";".join(path)), flow / total_flow]
+            )
+    return all_paths_with_ratios
diff --git a/p6/linear_optimization/optimizer.py b/p6/linear_optimization/optimizer.py
@@ -70,9 +70,7 @@ def runLinearOptimizationModel(
             case CalcType.AVERAGE.value:
                 utilization = m.addVars(links, vtype=GRB.CONTINUOUS, name="Utilization")
                 m.setObjective(
-                    gp.quicksum(
-                        (utilization[link] / links[link]["capacity"] for link in links)
-                    ),
+                    gp.quicksum((utilization[link] for link in links)),
                     GRB.MINIMIZE,
                 )
             case CalcType.MAX.value:

diff --git a/p6/main.py b/p6/main.py
@@ -10,7 +10,7 @@
 from p6.utils import data as dataUtils
 from p6.utils import network as nwUtils
 from p6.utils import log
-from p6.linear_optimization import optimizer as linOpt
+from p6.linear_optimization import netflow, optimizer as linOpt
 
 logger = log.setupCustomLogger(__name__)
 
@@ -31,12 +31,26 @@ def calcLinkUtil(links):
 def process_flows_hour(timestamp, flows, traffic, args, links):
     logger.info(f"Processing {timestamp} with {len(flows)} flows...")
     ratios = None
+    useRatios = False
 
     # Read ratios if specified
     if args.use_ratios:
         hour = timestamp[4:6]
         day, ratioType, date = args.use_ratios
-        ratios = dataUtils.readRatios(date, ratioType, day, hour)
+        useRatios = True
+        ratios = dataUtils.readPathRatios(date, day, hour, ratioType)
+    elif args.use_paths:
+        hour = timestamp[4:6]
+        day, date, useRatios = args.use_paths
+        useRatios = useRatios == "True"
+        ratios = dataUtils.readPathRatios(date, day, hour)
+        i = 0
+        for flow in flows:
+            if flow not in ratios:
+                continue
+            i += 1
+            flows[flow] = list(ratios[flow].keys())
+        logger.info(f"Updated {i} flows with paths!")
 
     # Initialize totalTraffic and listFlows for all links
     for linkKey in links:
@@ -46,11 +60,11 @@ def process_flows_hour(timestamp, flows, traffic, args, links):
     for flow in flows:
         # Get all links in the flow
         linksFlow = nwUtils.getLinksFromFlow(flows[flow])
-
         identicalPaths = True
+
         if ratios is not None:
             for path in flows[flow]:
-                if (flow, path) not in ratios:
+                if flow not in ratios or path not in ratios[flow]:
                     identicalPaths = False
                     break
 
@@ -70,17 +84,19 @@ def process_flows_hour(timestamp, flows, traffic, args, links):
             totalTraffic = 0
             for path in flows[flow]:
                 if link in path:
-                    if ratios is not None and identicalPaths:
-                        totalTraffic += traffic[flow] * float(ratios[flow, path])
+                    if useRatios and identicalPaths:
+                        totalTraffic += traffic[flow] * float(ratios[flow][path])
                     else:
                         totalTraffic += traffic[flow] * (1 / len(flows[flow]))
-
             links[link]["totalTraffic"] += totalTraffic
             links[link]["listFlows"].append(flow)
 
     # Run linear optimization or baseline calculations
     if args.model_type == CalcType.BASELINE.value:
         linkUtil = calcLinkUtil(links)
+    elif args.model_type == CalcType.PATHS.value:
+        netflow.optMC(args, links, traffic, timestamp)
+        return None
     else:
         linkUtil = linOpt.runLinearOptimizationModel(
             args, links, flows, traffic, timestamp, args.save_lp_models
@@ -103,6 +119,7 @@ def main():
             CalcType.AVERAGE.value,
             CalcType.MAX.value,
             CalcType.SQUARED.value,
+            CalcType.PATHS.value,
         ],
         help="type of calculation to run",
     )
@@ -126,6 +143,14 @@ def main():
         metavar=("DAY", "TYPE", "DATE"),
         help="use existing path ratios for calculations",
     )
+    parser.add_argument(
+        "-up",
+        "--use-paths",
+        nargs=3,
+        metavar=("DAY", "DATE", "USERATIOS?"),
+        help="use existing paths for calculations",
+    )
+
     args = parser.parse_args()
 
     if args.use_ratios:
@@ -143,15 +168,30 @@ def main():
             CalcType.AVERAGE.value,
             CalcType.MAX.value,
             CalcType.SQUARED.value,
+            CalcType.PATHS.value,
         ]:
             parser.error(
-                "Invalid ratio type. Please use 'average', 'max' or 'squared'."
+                "Invalid ratio type. Please use 'average', 'path', 'max' or 'squared'."
             )
         if args.model_type != CalcType.BASELINE.value:
             parser.error(
                 "Cannot use existing path ratios with the specified model type."
             )
 
+    if args.use_paths:
+        day, date, useratios = args.use_paths
+        if not day.isdigit():
+            parser.error("Invalid day number.")
+        if (
+            not date.isdigit()
+            or len(date) != 8
+            or int(date[4:6]) > 12
+            or int(date[6:]) > 31
+        ):
+            parser.error("Invalid date. Please use a date in the format YYYYMMDD.")
+        if useratios not in ["True", "False"]:
+            parser.error("Invalid useratios value. Please use 'True' or 'False'.")
+
     # Set start method to spawn to avoid issues with multiprocessing on Windows
     set_start_method("spawn")
 
@@ -167,21 +207,20 @@ def main():
             process_flows_hour,
             [
                 (timestamp, flows[timestamp], traffic[timestamp], args, links.copy())
-                # for timestamp in list(flows.keys())[:1]
                 for timestamp in flows
             ],
         )
 
-    logger.info("Finished processing all timestamps!")
+    if results[0] is not None:
+        results.sort()
 
-    results.sort()
-    dataUtils.writeDataToFile(
-        data=pd.DataFrame(
-            results, columns=["timestamp", "min_util", "max_util", "avg_util"]
-        ),
-        parserArgs=args,
-        outputFile="overviewData",
-    )
+        dataUtils.writeDataToFile(
+            data=pd.DataFrame(
+                results, columns=["timestamp", "min_util", "max_util", "avg_util"]
+            ),
+            parserArgs=args,
+            outputFile="overviewData",
+        )
 
     endTime = pd.Timestamp.now()