Assistance with gym environment code #647

Sorour332000 · 2024-10-13T06:41:23Z

Sorour332000
Oct 13, 2024

Hi everyone, I am trying to create an inventory management program using proximal policy optimization. This is the environment I have created below. Problem is that from the reward graphs there is no improvement in the reward, the agents just fill the inventory instantly without considering transportation, delays or even the carried over unsatisfied demand. Appreciate any assistance in this.

The environment:
import numpy as np
import gym
from gym import spaces
import matplotlib.pyplot as plt
import pandas as pd

class InventoryMgmt(gym.Env):
def init(self, data, weights_cost, weights_emission, initial_inventory, max_transport_capacity, max_inventory_capacity):
super(InventoryMgmt, self).init()
self.data = data
self.weights_cost = weights_cost
self.weights_emission = weights_emission
self.initial_inventory = initial_inventory
self.max_transport_capacity = max_transport_capacity
self.max_inventory_capacity = max_inventory_capacity

    # Continuous action space to control order quantity
    self.action_space = spaces.Box(low=0, high=self.max_transport_capacity, shape=(1,), dtype=np.float32)

    # Observation space: normalized inventory level, demand, and reorder point
    self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=np.float32)

    self.results_df = pd.DataFrame()  # DataFrame to store results
    self.pending_orders = []  # Track pending orders (quantity and delivery day)
    self.carry_over_backorder = 0  # Track backordered demand carried over
    self.reset()

def step(self, action):
    """
    Step through one day in the environment. Process the current day's demand, satisfy it using available
    inventory, and carry over unmet demand to the next day if necessary.
    """
    row = self.data.iloc[self.current_day]
    actual_demand = row['Demand']

    # Add any carried-over backorder to today's demand
    total_demand = actual_demand + self.carry_over_backorder

    carrying_cost = row['Carrying Cost']
    item_cost = row['Item Cost']
    order_cost = row['Order Cost']
    transport_emission_rate = row['Transport Emission Rate']
    holding_emission_rate = row['Holding Emission Rate']
    lead_time = row['Lead Time']
    backorder_cost = row['Backorder Cost']

    max_demand = self.data['Demand'].max()
    avg_demand = self.data['Demand'].mean()
    avg_lead_time = self.data['Lead Time'].mean()

    holding_cost = item_cost * carrying_cost
    safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)
    reorder_point = safety_stock + (avg_demand * lead_time)

    # Initialize order_quantity to 0 in case no order is placed
    order_quantity = 0

    # Process pending orders
    for order in self.pending_orders:
        if self.current_day >= order['delivery_day']:
            self.inventory_level += order['quantity']
            print(f"Order delivered: {order['quantity']} units on day {self.current_day}")
    self.pending_orders = [order for order in self.pending_orders if self.current_day < order['delivery_day']]


    # Ensure inventory level does not exceed capacity
    self.inventory_level = min(self.inventory_level, self.max_inventory_capacity)

    # Calculate backorder before placing new orders
    if total_demand > self.inventory_level:
        backorder_quantity = total_demand - self.inventory_level
        backorder_penalty = backorder_quantity * backorder_cost
        self.inventory_level = 0
        self.carry_over_backorder = backorder_quantity
    else:
        self.inventory_level -= total_demand
        backorder_quantity = 0
        backorder_penalty = 0
        self.carry_over_backorder = 0

    # Check if inventory is below the reorder point
    if self.inventory_level <= reorder_point:
        order_quantity = max(0, min(action[0], self.max_inventory_capacity - self.inventory_level))
        if order_quantity > 0:
            self.pending_orders.append({'quantity': order_quantity, 'delivery_day': self.current_day + lead_time})
            print(f"Order placed: {order_quantity} units on day {self.current_day}")

    # Calculate daily costs and emissions
    holding_cost_total = max(0, self.inventory_level * holding_cost)
    order_cost_total = max(0, order_quantity * item_cost + order_cost)
    daily_cost = holding_cost_total + order_cost_total + backorder_penalty

    holding_emissions = max(0, self.inventory_level * holding_emission_rate)
    transport_emissions = max(0, (np.ceil(order_quantity / self.max_transport_capacity)) * transport_emission_rate)
    daily_emissions = holding_emissions + transport_emissions

    # Update totals
    self.total_cost += daily_cost
    self.total_emissions += daily_emissions
    self.daily_costs.append(daily_cost)
    self.daily_emissions.append(daily_emissions)
    self.inventory_levels.append(self.inventory_level)

    # Refined reward calculation
    backorder_penalty_reward = -5.0 * backorder_quantity * backorder_cost
    inventory_penalty = -0.5 * self.inventory_level
    desired_inventory_level = reorder_point
    efficiency_reward = -abs(self.inventory_level - desired_inventory_level) * 0.2

    if self.inventory_level <= reorder_point:
        order_incentive = 2.0
    else:
        order_incentive = -0.5

    reward_cost = -daily_cost + backorder_penalty_reward + inventory_penalty + efficiency_reward + order_incentive
    reward_emission = -daily_emissions

    normalized_inventory = self.inventory_level / self.max_inventory_capacity
    normalized_demand = total_demand / max_demand
    normalized_reorder_point = reorder_point / self.max_inventory_capacity
    normalized_observation = np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)

    daily_data = {
        'Day': self.current_day + 1,
        'Actual Demand': actual_demand,
        'Carried Over Demand': self.carry_over_backorder,
        'Inventory Level': self.inventory_level,
        'Order Quantity': order_quantity if self.inventory_level <= reorder_point else 0,
        'Holding Cost': holding_cost_total,
        'Order Cost': order_cost_total,
        'Backorder Penalty': backorder_penalty,
        'Total Daily Cost': daily_cost,
        'Daily Holding Emissions': holding_emissions,
        'Daily Transport Emissions': transport_emissions,
        'Total Daily Emissions': daily_emissions
    }
    self.results_df = pd.concat([self.results_df, pd.DataFrame([daily_data])], ignore_index=True)

    self.current_day += 1
    done = self.current_day >= len(self.data)
    self.rewards.append((reward_cost, reward_emission))


    return normalized_observation, (reward_cost, reward_emission), done, {}

def reset(self):
    self.current_day = 0
    self.inventory_level = self.initial_inventory
    self.total_cost = 0
    self.total_emissions = 0
    self.daily_costs = []
    self.daily_emissions = []
    self.inventory_levels = []
    self.rewards = []
    self.pending_orders = []
    self.carry_over_backorder = 0

    # Calculate initial values for safety_stock, avg_demand, and lead_time
    max_demand = self.data['Demand'].max()
    avg_demand = self.data['Demand'].mean()
    avg_lead_time = self.data['Lead Time'].mean()
    initial_lead_time = self.data.iloc[self.current_day]['Lead Time']

    holding_cost = self.data.iloc[self.current_day]['Item Cost'] * self.data.iloc[self.current_day]['Carrying Cost']
    safety_stock = (max_demand * avg_lead_time) - (avg_demand * avg_lead_time)
    reorder_point = safety_stock + (avg_demand * initial_lead_time)  # Calculate reorder_point

    # Initial observation
    initial_demand = self.data.iloc[self.current_day]['Demand']
    normalized_inventory = self.inventory_level / self.max_inventory_capacity
    normalized_demand = initial_demand / max_demand
    normalized_reorder_point = reorder_point / self.max_inventory_capacity  # Normalize reorder_point

    return np.array([normalized_inventory, normalized_demand, normalized_reorder_point], dtype=np.float32)

def render(self):
    print(f"Final Total Cost: {self.total_cost}")
    print(f"Final Total Emissions: {self.total_emissions}")

    days = range(1, len(self.inventory_levels) + 1)

    plt.figure(figsize=(20, 10))

    plt.subplot(5, 1, 1)
    plt.plot(days, self.inventory_levels, marker='o')
    plt.title('Inventory Level Over Time')
    plt.xlabel('Day')
    plt.ylabel('Inventory Level')

    plt.subplot(5, 1, 2)
    plt.plot(days, self.daily_costs, marker='o')
    plt.title('Daily Costs Over Time')
    plt.xlabel('Day')
    plt.ylabel('Costs')

    plt.subplot(5, 1, 3)
    plt.plot(days, self.daily_emissions, marker='o')
    plt.title('Daily Emissions Over Time')
    plt.xlabel('Day')
    plt.ylabel('Emissions')

    plt.subplot(5, 1, 4)
    cost_rewards = [r[0] for r in self.rewards]
    plt.plot(days, cost_rewards, marker='o', label='Cost Rewards')
    plt.title('Cost Rewards Over Time')
    plt.xlabel('Day')
    plt.ylabel('Rewards')

    plt.subplot(5, 1, 5)
    emission_rewards = [r[1] for r in self.rewards]
    plt.plot(days, emission_rewards, marker='o', label='Emission Rewards')
    plt.title('Emission Rewards Over Time')
    plt.xlabel('Day')
    plt.ylabel('Rewards')

    plt.tight_layout()
    plt.show()

def save_to_excel(self, file_name='inventory_output.xlsx'):
    """Save the results DataFrame to an Excel file."""
    if not self.results_df.empty:
        self.results_df.to_excel(file_name, index=False)
        print(f"Results saved to {file_name}")
    else:
        print("No results to save.")

Register the environment

gym.envs.registration.register(
id='InventoryMgmt-v1',
entry_point='InvEnv_multi_v1:InventoryMgmt',
)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Assistance with gym environment code #647

{{title}}

Replies: 0 comments

Select a reply

Assistance with gym environment code #647

Sorour332000 Oct 13, 2024

Register the environment

Replies: 0 comments

Sorour332000
Oct 13, 2024