Skip to content

Commit

Permalink
Merge pull request #24 from sabotack/DataWrite
Browse files Browse the repository at this point in the history
Write data to files
  • Loading branch information
sabotack authored Apr 29, 2024
2 parents c18f551 + 7c23bac commit 33e0614
Show file tree
Hide file tree
Showing 12 changed files with 107 additions and 127 deletions.
Binary file removed .DS_Store
Binary file not shown.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,14 @@ internal-dataset-week.tar

# data
output/
ratios/

# logs
log/

# Mac
.DS_Store
*.DS_Store

# code runner
tempCodeRunnerFile.py
tempCodeRunnerFile.py
9 changes: 9 additions & 0 deletions p6/calc_type_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from enum import Enum

class CalcType(Enum):
BASELINE = 'baseline'
AVERAGE = 'average'
MAX = 'max'
SQUARED = 'squared'
RATIOS = 'ratios'

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,26 @@
import gurobipy as gp

from gurobipy import GRB
from datetime import datetime
from dotenv import load_dotenv
from enum import Enum

from p6.calc_type_enum import CalcType
from p6.utils import log
from p6.utils import data as dataUtils
logger = log.setupCustomLogger(__name__)

load_dotenv('variables.env')

#environment variables
# Environment variables
options = {
"WLSACCESSID": os.getenv("WLSACCESSID"),
"WLSSECRET": os.getenv("WLSSECRET"),
"LICENSEID": int(os.getenv("LICENSEID")),
}

class LinearOptimizationModel(Enum):
"""
Enum class for the linear optimization models.
"""
averageUtilization = 'averageUtilization'
maxUtilization = 'maxUtilization'
squaredUtilization = 'squaredUtilization'
LOGGING_DIR = os.getenv('LOGGING_DIR')


def runLinearOptimizationModel(model, links, flows, traffic):
def runLinearOptimizationModel(model, links, flows, traffic, timestamp):
"""
Runs the linear optimization model to calculate the link utilization and the average link utilization.
Expand Down Expand Up @@ -55,41 +50,40 @@ def runLinearOptimizationModel(model, links, flows, traffic):
# Create optimization model based on the input model
m = gp.Model("network_optimization", env=env)

m.setParam('logFile', 'gurobi.log')

# Decision variables for path ratios for each source-destination pair
path_ratios = m.addVars([(sd, pathNum) for sd in flows for pathNum in range(len(flows[sd]))], vtype=GRB.CONTINUOUS, name="PathRatios")
match model:
case LinearOptimizationModel.averageUtilization:
case CalcType.AVERAGE.value:
utilization = m.addVars(links, vtype=GRB.CONTINUOUS, name="Utilization")
m.setObjective(gp.quicksum((utilization[link]/links[link]['capacity'] for link in links)), GRB.MINIMIZE)
case LinearOptimizationModel.maxUtilization:
case CalcType.MAX.value:
max_utilization = m.addVar(vtype=GRB.CONTINUOUS, name="MaxUtilization")
m.setObjective(max_utilization, GRB.MINIMIZE)
case LinearOptimizationModel.squaredUtilization:
case CalcType.SQUARED.value:
utilization = m.addVars(links, vtype=GRB.CONTINUOUS, name="Utilization")
m.setObjective(gp.quicksum((utilization[link]**2 for link in links)), GRB.MINIMIZE)
case _:
raise ValueError(f'Invalid model: {model}')

# Constraints for each link's utilization
# Consists of the sum of ratios and traffic for each path related to the link
for link in links:
linkTuple = tuple((link[:5], link[5:]))
link_flow = gp.quicksum(
path_ratios[sd, pathNum] * traffic[sd]
if linkTuple in zip(flows[sd][pathNum][:-1], flows[sd][pathNum][1:])
else 0
for sd in flows for pathNum in range(len(flows[sd]))
for sd in links[link]['listFlows'] for pathNum in range(len(flows[sd]))
)

m.addConstr(link_flow <= links[link]['capacity'], name=f"cap_{link}")

match model:
case LinearOptimizationModel.averageUtilization:
case CalcType.AVERAGE.value:
m.addConstr(link_flow == links[link]['capacity'] * utilization[link], name=f"util_{link}")
case LinearOptimizationModel.maxUtilization:
case CalcType.MAX.value:
m.addConstr(link_flow / links[link]['capacity'] <= max_utilization, name=f"util_{link}")
case LinearOptimizationModel.squaredUtilization:
case CalcType.SQUARED.value:
m.addConstr(link_flow == utilization[link] * links[link]['capacity'], name=f"util_{link}")
case _:
raise ValueError(f'Invalid model: {model}')
Expand All @@ -100,45 +94,46 @@ def runLinearOptimizationModel(model, links, flows, traffic):
m.write(f"{model}.lp")

logger.info('Started optimization...')

m.optimize()

logger.info('Finished optimization')

# Output the results
ratioData = []
if m.status == GRB.OPTIMAL:
#find largest util and print
match model:
case LinearOptimizationModel.averageUtilization:
totalLinkUtil = m.getObjective().getValue() / len(links) * 100
case LinearOptimizationModel.maxUtilization:
totalLinkUtil = max_utilization.x * 100
case LinearOptimizationModel.squaredUtilization:
totalLinkUtil = m.getObjective().getValue() / len(links) * 100
case _:
raise ValueError(f'Invalid model: {model}')
#debug and save optimal path ratios
for sd in flows:
logger.info(f"Optimal path ratios for {sd}:")
logger.debug(f"Optimal path ratios for {sd}:")
for pathNum in range(len(flows[sd])):
logger.info(f" Path {pathNum}: {path_ratios[sd, pathNum].x * 100} %")

logger.info("")
ratioData.append([timestamp, sd, pathNum, path_ratios[sd, pathNum].x])
logger.debug(f" Path {pathNum}: {path_ratios[sd, pathNum].x * 100} %")

dataUtils.writeDataToFile(pd.DataFrame(ratioData, columns=['timestamp', 'flowName', 'pathNum', 'ratio']), model, True)

# Calculate average link utilization
# Calculate average, min and max link utilization
totalLinkUtil = 0
minLinkUtil = 0
maxLinkUtil = 0
for link in links:
linkTuple = tuple((link[:5], link[5:]))
link_flow = sum(
path_ratios[sd, pathNum].x * traffic[sd]
if linkTuple in zip(flows[sd][pathNum][:-1], flows[sd][pathNum][1:])
else 0
for sd in flows for pathNum in range(len(flows[sd]))
for sd in links[link]['listFlows'] for pathNum in range(len(flows[sd]))
)
if (link_flow / links[link]['capacity'] * 100) >= 10:
logger.info(f'Link {link} has utilization: {link_flow / links[link]["capacity"] * 100}%')
totalLinkUtil += link_flow / links[link]['capacity'] * 100
totalLinkUtil = totalLinkUtil / len(links)
logger.info(f"Average link utilization: {totalLinkUtil}% for model {model}")

# Update min and max link utilization
if (link_flow / links[link]['capacity'] * 100) < minLinkUtil:
minLinkUtil = link_flow / links[link]['capacity'] * 100
if (link_flow / links[link]['capacity'] * 100) > maxLinkUtil:
maxLinkUtil = link_flow / links[link]['capacity'] * 100

avgLinkUtil = totalLinkUtil / len(links)
logger.info(f"Average link utilization: {avgLinkUtil}% for model {model}")

return avgLinkUtil, minLinkUtil, maxLinkUtil

elif m.status == GRB.INFEASIBLE:
logger.error('Model is infeasible')
m.computeIIS()
Expand All @@ -147,4 +142,5 @@ def runLinearOptimizationModel(model, links, flows, traffic):
if c.IISConstr:
logger.error(c.constrName)
else:
logger.error('Optimization ended with status %d' % m.status)
logger.error('Optimization ended with status %d' % m.status)

99 changes: 30 additions & 69 deletions p6/main.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
import gurobipy as gp
from gurobipy import GRB

import argparse
import statistics as stats

from p6.calc_type_enum import CalcType
from p6.utils import data as dataUtils
from p6.utils import network as nwUtils
from p6.utils import log
from p6.linearOptimization import LinearOptimization as linOpt
from p6.linearOptimization.LinearOptimization import LinearOptimizationModel
from p6.linear_optimization import optimizer as linOpt

logger = log.setupCustomLogger(__name__)

import pandas as pd

DATA_DAY = 2

# --- FUNCTIONS ---
def calcLinkUtil(links):
util = {}

Expand All @@ -25,24 +22,30 @@ def calcLinkUtil(links):
return util

def main():
logger.info('Started')
parser = argparse.ArgumentParser()
parser.add_argument('model_type', choices=[CalcType.BASELINE.value, CalcType.AVERAGE.value, CalcType.MAX.value, CalcType.SQUARED.value], help='type of calculation to run')
args = parser.parse_args()

logger.info('Started, model_type: ' + str(args.model_type))

flows = dataUtils.readFlows(DATA_DAY)
links = dataUtils.readLinks()
traffic = dataUtils.readTraffic(DATA_DAY)

dailyUtil = pd.DataFrame(columns=['timestamp', 'min_util', 'max_util', 'avg_util'])
utilStats = []

for timestamp in flows:
# Reset totalTraffic for all links in this timestamp
# Reset totalTraffic and listFlows for all links in this timestamp
for linkKey in links:
links[linkKey]['totalTraffic'] = 0
links[linkKey]['listFlows'] = []

for i, flow in enumerate(flows[timestamp]):
logger.info(f'Processing {timestamp} with {len(flows[timestamp])} flows...')
for flow in flows[timestamp]:
routers = nwUtils.getRoutersHashFromFlow(flows[timestamp][flow])
flowLinks = nwUtils.getFlowLinks(routers, links)

# Update links with traffic
# Update links with traffic, and if link is new, add it to links
for linkKey in flowLinks:
if(linkKey in links):
links[linkKey]['totalTraffic'] += traffic[timestamp][flow] * flowLinks[linkKey].trafficRatio
Expand All @@ -53,61 +56,19 @@ def main():
'capacity': flowLinks[linkKey].capacity,
'totalTraffic': traffic[timestamp][flow] * flowLinks[linkKey].trafficRatio
}

# Log number of processed flows
if(i % 10000 == 0):
logger.info(f'Processed {timestamp} {i+1} flows of {len(flows[timestamp])}...')
if(i == len(flows[timestamp]) - 1):
logger.info(f'Processed {timestamp} {i+1} flows of {len(flows[timestamp])}...')


linkUtil = calcLinkUtil(links)
dailyUtil.loc[len(dailyUtil.index)] = [timestamp, min(linkUtil.values()), max(linkUtil.values()), stats.mean(linkUtil.values())]

#run linear optimization model
#linOpt.runLinearOptimizationModel(LinearOptimizationModel.averageUtilization, links, flows[timestamp], traffic[timestamp])
#linOpt.runLinearOptimizationModel(LinearOptimizationModel.maxUtilization, links, flows[timestamp], traffic[timestamp])
#linOpt.runLinearOptimizationModel(LinearOptimizationModel.squaredUtilization, links, flows[timestamp], traffic[timestamp])

dataUtils.writeDataToFile(dailyUtil)



# logger.debug(f"Flows: {len(flows)}")

# for flow in flows:
# print(f"Flow: {flow}")
# for path in flows[flow]:
# print(f"-: {path}")
# print("\n")

# # --- LINKS ---
# linksCapacity = {}
# linksCapacity['AB'] = 600
# linksCapacity['AC'] = 2000
# linksCapacity['BD'] = 500
# linksCapacity['BE'] = 600
# linksCapacity['CF'] = 1500
# linksCapacity['DG'] = 400
# linksCapacity['EG'] = 600
# linksCapacity['FG'] = 1500

# # --- PATHS ---
# flows = {}
# flows['AG'] = {}
# flows['AG'][0] = ['A', 'B', 'D', 'G']
# flows['AG'][1] = ['A', 'B', 'E', 'G']
# flows['AG'][2] = ['A', 'C', 'F', 'G']

# # --- TRAFFIC ---
# traffic = {}
# traffic['AG'] = 100

# # --- RATIOS ---

# logger.info('Populating routers hash from flows')
# routersHash = nwUtils.getRoutersHashFromFlows(flows)

# logger.info('Calculating ratios')
# links = {}
# nwUtils.recCalcRatios(links, routersHash['G'], linksCapacity)
links[linkKey]['listFlows'] = []

# Add this flow to the list of flows for this link
links[linkKey]['listFlows'].append(flow)

# Run linear optimization or baseline calculations
if (args.model_type == CalcType.BASELINE.value):
linkUtil = calcLinkUtil(links)
utilStats.append([timestamp, min(linkUtil.values()), max(linkUtil.values()), stats.mean(linkUtil.values())])
else:
avgLinkUtil, minLinkUtil, maxLinkUtil = linOpt.runLinearOptimizationModel(args.model_type, links, flows[timestamp], traffic[timestamp], timestamp)
utilStats.append([timestamp, minLinkUtil, maxLinkUtil, avgLinkUtil])

dataUtils.writeDataToFile(pd.DataFrame(utilStats, columns=['timestamp', 'min_util', 'max_util', 'avg_util']), args.model_type)

logger.info('Finished')
File renamed without changes.
26 changes: 18 additions & 8 deletions p6/utils/data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import datetime
import os
import sys
import pandas as pd

from p6.utils import log
from datetime import datetime
from dotenv import load_dotenv

load_dotenv('variables.env')
Expand All @@ -14,7 +15,7 @@
DATASET_LINKS_NAME = os.getenv("DATASET_LINKS_NAME")

DATA_OUTPUT_DIR = os.getenv("DATA_OUTPUT_DIR")
DATA_OUTPUT_NAME = os.getenv("DATA_OUTPUT_NAME")
RATIO_OUTPUT_DIR = os.getenv("RATIO_OUTPUT_DIR")

def readFlows(day):
"""
Expand Down Expand Up @@ -143,25 +144,34 @@ def readTraffic(day):
return traffic


def writeDataToFile(dailyUtil):
def writeDataToFile(data, type, ratioData=None):
"""
Writes the daily utilization data to a CSV file.
### Parameters:
----------
#### dailyUtil: pandas.DataFrame
#### data: pandas.DataFrame
The daily utilization data to write to a file.
"""

try:
if not os.path.exists(DATA_OUTPUT_DIR):
os.makedirs(DATA_OUTPUT_DIR)
if not os.path.exists(RATIO_OUTPUT_DIR):
os.makedirs(RATIO_OUTPUT_DIR)

filePath = ''
timestamp = datetime.now().strftime("%Y%m%d")

if ratioData is not None:
time = (data['timestamp'][0][:3] + data['timestamp'][0][4:-6]).lower()
filePath = f'{RATIO_OUTPUT_DIR}/{timestamp}_{type}_{time}_ratios.csv'
else:
filePath = f'{DATA_OUTPUT_DIR}/{timestamp}_{type}.csv'

timestamp = datetime.datetime.now().strftime("%Y%m%d")

logger.info(f'Writing data to file...')
dailyUtil.to_csv(f'{DATA_OUTPUT_DIR}/{DATA_OUTPUT_NAME}_{timestamp}.csv', mode='w', header=True, index=False)
data.to_csv(filePath, mode='w', header=True, index=False)
logger.info(f'Finished writing data to file')
except Exception as e:
logger.error(f'Error writing data to file: {e}')
sys.exit(1)
sys.exit(1)
Loading

0 comments on commit 33e0614

Please sign in to comment.