Merge pull request #25 from sabotack/multiprocessing

Use multiprocessing for grouping and processing timestamps
sabotack · Apr 29, 2024 · 5d71c96 · 5d71c96
2 parents 33e0614 + f1f1deb
commit 5d71c96
Show file tree

Hide file tree

Showing 11 changed files with 589 additions and 193 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -0,0 +1,67 @@
+name: Python QA
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["main"]
+
+permissions:
+  contents: read
+
+jobs:
+  black:
+    runs-on: ubuntu-latest
+    name: Python QA
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.12
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+
+      # Cache the installation of Poetry itself, e.g. the next step. This prevents the workflow
+      # from installing Poetry every time, which can be slow.
+      - name: cache poetry install
+        uses: actions/cache@v3
+        with:
+          path: ~/.local
+          key: poetry-1.8.2
+
+      # Install Poetry.
+      # The key configuration value here is `virtualenvs-in-project: true`: this creates the
+      # venv as a `.venv` in your testing directory, which allows the next step to easily
+      # cache it.
+      - uses: snok/install-poetry@v1.3.4
+        with:
+          version: 1.8.2
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          installer-parallel: true
+
+      # Cache your dependencies (i.e. all the stuff in your `pyproject.toml`). Note the cache
+      # key: if you're using multiple Python versions, or multiple OSes, you'd need to include
+      # them in the cache key. I'm not, so it can be simple and just depend on the poetry.lock.
+      - name: cache deps
+        id: cache-deps
+        uses: actions/cache@v3
+        with:
+          path: .venv
+          key: pydeps-${{ hashFiles('**/poetry.lock') }}
+
+      # Install dependencies. `--no-root` means "install all dependencies but not the project itself"
+      - run: poetry install --no-interaction --no-root
+        if: steps.cache-deps.outputs.cache-hit != 'true'
+
+      # Now install _your_ project.
+      - run: poetry install --no-interaction
+
+      ################################################################
+      # Now finally run your code quality tools
+      ################################################################
+
+      - name: Format with black
+        run: |
+          poetry run black 'p6' --check
diff --git a/p6/calc_type_enum.py b/p6/calc_type_enum.py
@@ -1,9 +1,9 @@
 from enum import Enum
 
+
 class CalcType(Enum):
-    BASELINE = 'baseline'
-    AVERAGE = 'average'
-    MAX = 'max'
-    SQUARED = 'squared'
-    RATIOS = 'ratios'
-
+    BASELINE = "baseline"
+    AVERAGE = "average"
+    MAX = "max"
+    SQUARED = "squared"
+    RATIOS = "ratios"
diff --git a/p6/linear_optimization/optimizer.py b/p6/linear_optimization/optimizer.py
@@ -1,113 +1,155 @@
+from datetime import datetime
 import os
 import pandas as pd
 import gurobipy as gp
 
 from gurobipy import GRB
-from datetime import datetime
 from dotenv import load_dotenv
 
 from p6.calc_type_enum import CalcType
 from p6.utils import log
 from p6.utils import data as dataUtils
+
 logger = log.setupCustomLogger(__name__)
 
-load_dotenv('variables.env')
+load_dotenv("variables.env")
 
-# Environment variables
+OPT_MODELS_OUTPUT_DIR = os.getenv("OPT_MODELS_OUTPUT_DIR")
+
+# Environment gurobi license variables
 options = {
     "WLSACCESSID": os.getenv("WLSACCESSID"),
     "WLSSECRET": os.getenv("WLSSECRET"),
     "LICENSEID": int(os.getenv("LICENSEID")),
 }
 
-LOGGING_DIR = os.getenv('LOGGING_DIR')
 
-def runLinearOptimizationModel(model, links, flows, traffic, timestamp):
+def runLinearOptimizationModel(model, links, flows, traffic, timestamp, savelp=False):
     """
     Runs the linear optimization model to calculate the link utilization and the average link utilization.
 
     ### Parameters:
     ----------
     #### model: string
     The optimization model to run, can be 'averageUtilization', 'maxUtilization', or 'squaredUtilization'.
-    
+
     #### links: dict
     The links in the network, indexed by linkName.
-    
+
     #### paths: dict
     The paths for each source-destination pair, with the paths split into a list of paths.
-    
+
     #### traffic: dict
     The traffic for each source-destination pair.
-    
+
     ### Returns:
     ----------
     The total link utilization, the average link utilization, and the link utilization for each link.
     """
-    logger.info('Started running linear optimization model...')
+    logger.info("Started running linear optimization model...")
 
     with gp.Env(params=options) as env, gp.Model(env=env) as m:
         # Create optimization model based on the input model
         m = gp.Model("network_optimization", env=env)
 
         # Decision variables for path ratios for each source-destination pair
-        path_ratios = m.addVars([(sd, pathNum) for sd in flows for pathNum in range(len(flows[sd]))], vtype=GRB.CONTINUOUS, name="PathRatios")
+        path_ratios = m.addVars(
+            [(sd, pathNum) for sd in flows for pathNum in range(len(flows[sd]))],
+            vtype=GRB.CONTINUOUS,
+            name="PathRatios",
+        )
         match model:
             case CalcType.AVERAGE.value:
                 utilization = m.addVars(links, vtype=GRB.CONTINUOUS, name="Utilization")
-                m.setObjective(gp.quicksum((utilization[link]/links[link]['capacity'] for link in links)), GRB.MINIMIZE)
+                m.setObjective(
+                    gp.quicksum(
+                        (utilization[link] / links[link]["capacity"] for link in links)
+                    ),
+                    GRB.MINIMIZE,
+                )
             case CalcType.MAX.value:
                 max_utilization = m.addVar(vtype=GRB.CONTINUOUS, name="MaxUtilization")
                 m.setObjective(max_utilization, GRB.MINIMIZE)
             case CalcType.SQUARED.value:
                 utilization = m.addVars(links, vtype=GRB.CONTINUOUS, name="Utilization")
-                m.setObjective(gp.quicksum((utilization[link]**2 for link in links)), GRB.MINIMIZE)
+                m.setObjective(
+                    gp.quicksum((utilization[link] ** 2 for link in links)),
+                    GRB.MINIMIZE,
+                )
             case _:
-                raise ValueError(f'Invalid model: {model}')
+                raise ValueError(f"Invalid model: {model}")
 
         # Constraints for each link's utilization
         # Consists of the sum of ratios and traffic for each path related to the link
         for link in links:
             linkTuple = tuple((link[:5], link[5:]))
             link_flow = gp.quicksum(
-                path_ratios[sd, pathNum] * traffic[sd]
-                if linkTuple in zip(flows[sd][pathNum][:-1], flows[sd][pathNum][1:])
-                else 0
-                for sd in links[link]['listFlows'] for pathNum in range(len(flows[sd]))
+                (
+                    path_ratios[sd, pathNum] * traffic[sd]
+                    if linkTuple in zip(flows[sd][pathNum][:-1], flows[sd][pathNum][1:])
+                    else 0
+                )
+                for sd in links[link]["listFlows"]
+                for pathNum in range(len(flows[sd]))
             )
 
-            m.addConstr(link_flow <= links[link]['capacity'], name=f"cap_{link}")
+            m.addConstr(link_flow <= links[link]["capacity"], name=f"cap_{link}")
 
             match model:
-                case CalcType.AVERAGE.value: 
-                    m.addConstr(link_flow == links[link]['capacity'] * utilization[link], name=f"util_{link}")
+                case CalcType.AVERAGE.value:
+                    m.addConstr(
+                        link_flow == links[link]["capacity"] * utilization[link],
+                        name=f"util_{link}",
+                    )
                 case CalcType.MAX.value:
-                    m.addConstr(link_flow / links[link]['capacity'] <= max_utilization, name=f"util_{link}")
+                    m.addConstr(
+                        link_flow / links[link]["capacity"] <= max_utilization,
+                        name=f"util_{link}",
+                    )
                 case CalcType.SQUARED.value:
-                    m.addConstr(link_flow == utilization[link] * links[link]['capacity'], name=f"util_{link}")
+                    m.addConstr(
+                        link_flow == utilization[link] * links[link]["capacity"],
+                        name=f"util_{link}",
+                    )
                 case _:
-                    raise ValueError(f'Invalid model: {model}')
-                
+                    raise ValueError(f"Invalid model: {model}")
+
         for sd in traffic:
-            m.addConstr(path_ratios.sum(sd, '*') == 1, name=f"traffic_split_{sd}")
+            m.addConstr(path_ratios.sum(sd, "*") == 1, name=f"traffic_split_{sd}")
 
-        m.write(f"{model}.lp")
+        if savelp:
+            if not os.path.exists(OPT_MODELS_OUTPUT_DIR):
+                os.makedirs(OPT_MODELS_OUTPUT_DIR)
 
-        logger.info('Started optimization...')
+            ts = datetime.now().strftime("%Y%m%d")
+            time = (timestamp[:3] + timestamp[4:-6]).lower()
+            m.write(f"{OPT_MODELS_OUTPUT_DIR}/{ts}_{model}_{time}.lp")
+
+        logger.info("Started optimization...")
         m.optimize()
-        logger.info('Finished optimization')
+        logger.info("Finished optimization")
 
         # Output the results
         ratioData = []
         if m.status == GRB.OPTIMAL:
-            #debug and save optimal path ratios
+            # debug and save optimal path ratios
             for sd in flows:
                 logger.debug(f"Optimal path ratios for {sd}:")
                 for pathNum in range(len(flows[sd])):
-                    ratioData.append([timestamp, sd, pathNum, path_ratios[sd, pathNum].x])
-                    logger.debug(f"   Path {pathNum}: {path_ratios[sd, pathNum].x * 100} %")
-
-            dataUtils.writeDataToFile(pd.DataFrame(ratioData, columns=['timestamp', 'flowName', 'pathNum', 'ratio']), model, True)
+                    ratioData.append(
+                        [timestamp, sd, pathNum, path_ratios[sd, pathNum].x]
+                    )
+                    logger.debug(
+                        f"   Path {pathNum}: {path_ratios[sd, pathNum].x * 100} %"
+                    )
+
+            dataUtils.writeDataToFile(
+                pd.DataFrame(
+                    ratioData, columns=["timestamp", "flowName", "pathNum", "ratio"]
+                ),
+                model,
+                True,
+            )
 
             # Calculate average, min and max link utilization
             totalLinkUtil = 0
@@ -116,31 +158,34 @@ def runLinearOptimizationModel(model, links, flows, traffic, timestamp):
             for link in links:
                 linkTuple = tuple((link[:5], link[5:]))
                 link_flow = sum(
-                    path_ratios[sd, pathNum].x * traffic[sd]
-                    if linkTuple in zip(flows[sd][pathNum][:-1], flows[sd][pathNum][1:])
-                    else 0
-                    for sd in links[link]['listFlows'] for pathNum in range(len(flows[sd]))
+                    (
+                        path_ratios[sd, pathNum].x * traffic[sd]
+                        if linkTuple
+                        in zip(flows[sd][pathNum][:-1], flows[sd][pathNum][1:])
+                        else 0
+                    )
+                    for sd in links[link]["listFlows"]
+                    for pathNum in range(len(flows[sd]))
                 )
-                totalLinkUtil += link_flow / links[link]['capacity'] * 100
-                
+                totalLinkUtil += link_flow / links[link]["capacity"] * 100
+
                 # Update min and max link utilization
-                if (link_flow / links[link]['capacity'] * 100) < minLinkUtil:
-                    minLinkUtil = link_flow / links[link]['capacity'] * 100
-                if (link_flow / links[link]['capacity'] * 100) > maxLinkUtil:
-                    maxLinkUtil = link_flow / links[link]['capacity'] * 100
-                
+                if (link_flow / links[link]["capacity"] * 100) < minLinkUtil:
+                    minLinkUtil = link_flow / links[link]["capacity"] * 100
+                if (link_flow / links[link]["capacity"] * 100) > maxLinkUtil:
+                    maxLinkUtil = link_flow / links[link]["capacity"] * 100
+
             avgLinkUtil = totalLinkUtil / len(links)
             logger.info(f"Average link utilization: {avgLinkUtil}% for model {model}")
-            
+
             return avgLinkUtil, minLinkUtil, maxLinkUtil
 
         elif m.status == GRB.INFEASIBLE:
-            logger.error('Model is infeasible')
+            logger.error("Model is infeasible")
             m.computeIIS()
-            logger.error('The following constraints cannot be satisfied:')
+            logger.error("The following constraints cannot be satisfied:")
             for c in m.getConstrs():
                 if c.IISConstr:
                     logger.error(c.constrName)
         else:
-            logger.error('Optimization ended with status %d' % m.status)
-
+            logger.error("Optimization ended with status %d" % m.status)