Is tupledict() really faster than Python Dictionary?
AnsweredI recently found out the tupledict data-type for model building. It definitely improves the readability of the code. But does it really speed up the modelling process?
To test it out, I built 3-tier Network optimization model, where there is a need to create constraints faster. Below is the code with dictionary implementation.
import gurobipy as gp
from gurobipy import GRB
# Connect tier1 nodes to tier2 nodes
tier1_to_tier2_arcs = [(i, j) for i in range(1000) for j in range(1000)]
# Connect tier2 nodes to tier3 nodes
tier2_to_tier3_arcs = [(i, j) for i in range(1000) for j in range(1000)]
# Create supply and demand at tier 1 and tier 3 nodes
supply = {i: np.random.randint(1, 10) for i in tier1_nodes}
demand = {j: np.random.randint(1, 10) for j in tier3_nodes}
# Create a Gurobi model
model = gp.Model()
# Create flow variables using a dictionary
flow_tier1_to_tier2 = {(i, j): model.addVar(name=f"flow_{i}_{j}") for i, j in tier1_to_tier2_arcs}
flow_tier2_to_tier3 = {(i, j): model.addVar(name=f"flow_{i}_{j}") for i, j in tier2_to_tier3_arcs}
# Create constraints for supply at tier1 nodes
start_time = time.time()
for i in tier1_nodes:
model.addConstr(gp.quicksum(flow_tier1_to_tier2[i, j] for j in tier2_nodes if (i, j) in flow_tier1_to_tier2) <= supply[i], name=f"supply_{i}")
end_time = time.time()
print(f"Supply constraints creation time: {end_time - start_time:.4f} seconds")
# Create constraints for demand at tier3 nodes
start_time = time.time()
for j in tier3_nodes:
model.addConstr(gp.quicksum(flow_tier2_to_tier3[i, j] for i in tier2_nodes if (i, j) in flow_tier2_to_tier3) >= demand[j], name=f"demand_{j}")
end_time = time.time()
print(f"Demand constraints creation time: {end_time - start_time:.4f} seconds")
# Create node balancing constraints for tier2 nodes
start_time = time.time()
for k in tier2_nodes:
inflow = gp.quicksum(flow_tier1_to_tier2[i, k] for i in tier1_nodes if (i, k) in flow_tier1_to_tier2)
outflow = gp.quicksum(flow_tier2_to_tier3[k, j] for j in tier3_nodes if (k, j) in flow_tier2_to_tier3)
model.addConstr(inflow == outflow, name=f"node_balance_{k}")
end_time = time.time()
print(f"Node balancing constraints creation time with dictionary: {end_time - start_time:.4f} seconds")
Below is the tupledict implementation, with same initial data preparation part.
# Create a Gurobi model
model = gp.Model()
# Create flow variables using tupledict
flow_tier1_to_tier2 = model.addVars(tier1_to_tier2_arcs, name="flow")
flow_tier2_to_tier3 = model.addVars(tier2_to_tier3_arcs, name="flow")
# Update model to integrate new variables
model.update()
# Create constraints for supply at tier1 nodes
start_time = time.time()
for i in tier1_nodes:
model.addConstr(flow_tier1_to_tier2.sum(i, '*') <= supply[i], name=f"supply_{i}")
end_time = time.time()
print(f"Supply constraints creation time: {end_time - start_time:.4f} seconds")
start_time = time.time()
# Create constraints for demand at tier3 nodes
for j in tier3_nodes:
model.addConstr(flow_tier2_to_tier3.sum('*', j) >= demand[j], name=f"demand_{j}")
end_time = time.time()
print(f"Demand constraints creation time: {end_time - start_time:.4f} seconds")
# Create node balancing constraints for tier2 nodes
start_time = time.time()
for k in tier2_nodes:
inflow = flow_tier1_to_tier2.sum('*', k)
outflow = flow_tier2_to_tier3.sum(k, '*')
model.addConstr(inflow == outflow, name=f"node_balance_{k}")
end_time = time.time()
print(f"Node balancing constraints creation time with tupledict: {end_time - start_time:.4f} seconds")
In my test, tupledict(8.3 seconds) was quite slower than traditional dictionary(6.26 seconds). Is it true or am I missing something here?
-
Hi Devarshi,
It may depend how you're using it (I can't see your notebook).
For the following example, using tupledicts runs about 200 times faster:
import numpy as np
import timeit
setup = """
import gurobipy as gp
m = gp.Model()
x = m.addVars(100,100,100, vtype="B")
"""
statement1= """
m.addConstr(x.sum("*", 42, "*") == 1)
"""
statement2= """
m.addConstr(gp.quicksum(x[i,42,j] for i in range(100) for j in range(100)) == 1)
"""
factor = np.divide(
np.min(timeit.repeat(stmt=statement1, setup=setup, number=1, repeat=10)),
np.min(timeit.repeat(stmt=statement2, setup=setup, number=1, repeat=10)),
)
print("Using tupledicts is", factor, "times faster.")- Riley
0 -
Hi Riley Clement, I couldn't attach notebook here. So, I updated the question with both versions of code.
Also, even if we consider your code, as you're dividing run-time of statement 1 by statement 2, it looks like tupledicts runs about 200 times slower, and not faster.0 -
Hi Devarshi,
it looks like tupledicts runs about 200 times slower, and not faster.
You are right. I did not check properly and rushed in with pre-conceived notions. If I recall correctly when tupledicts were introduced they were designed to be faster than the Python dictionary, but this was back in the time of Python 2, and the dictionary has been overhauled, so it looks like it is simply not true anymore. There may be situations where tupledict is faster but I think one of the dev team would need to explain. Perhaps the only advantage of tupledicts now is readability.
I translated your code into the timeit/numpy framework - using time.time() leaves the result susceptible to machine noise, which is why you want to run it many times (the more the better) and compare minimums (see Raymond Hettinger's comment on this stackoverflow post).
import numpy
setup = """
import gurobipy as gp
import numpy as np
from gurobipy import GRB
# Connect tier1 nodes to tier2 nodes
tier1_to_tier2_arcs = [(i, j) for i in range(1000) for j in range(1000)]
# Connect tier2 nodes to tier3 nodes
tier2_to_tier3_arcs = [(i, j) for i in range(1000) for j in range(1000)]
# Create supply and demand at tier 1 and tier 3 nodes
supply = {i: np.random.randint(1, 10) for i in range(1000)}
demand = {j: np.random.randint(1, 10) for j in range(1000)}
# Create a Gurobi model
model = gp.Model()
# Create flow variables using a dictionary
flow_tier1_to_tier2 = {(i, j): model.addVar(name=f"flow_{i}_{j}") for i, j in tier1_to_tier2_arcs}
flow_tier2_to_tier3 = {(i, j): model.addVar(name=f"flow_{i}_{j}") for i, j in tier2_to_tier3_arcs}
# Create flow variables using tupledict
flow_tier1_to_tier2 = model.addVars(tier1_to_tier2_arcs, name="flow")
flow_tier2_to_tier3 = model.addVars(tier2_to_tier3_arcs, name="flow")
"""
tier1_supply_v1 = """
for i in range(1000):
model.addConstr(gp.quicksum(flow_tier1_to_tier2[i, j] for j in range(1000) if (i, j) in flow_tier1_to_tier2) <= supply[i], name=f"supply_{i}")
"""
# Create constraints for demand at tier3 nodes
tier3_demand_v1 = """
for j in range(1000):
model.addConstr(gp.quicksum(flow_tier2_to_tier3[i, j] for i in range(1000) if (i, j) in flow_tier2_to_tier3) >= demand[j], name=f"demand_{j}")
"""
# Create node balancing constraints for tier2 nodes
node_balance_v1 = """
for k in range(1000):
inflow = gp.quicksum(flow_tier1_to_tier2[i, k] for i in range(1000) if (i, k) in flow_tier1_to_tier2)
outflow = gp.quicksum(flow_tier2_to_tier3[k, j] for j in range(1000) if (k, j) in flow_tier2_to_tier3)
model.addConstr(inflow == outflow, name=f"node_balance_{k}")
"""
# Create constraints for supply at tier1 nodes
tier1_supply_v2 = """
for i in range(1000):
model.addConstr(flow_tier1_to_tier2.sum(i, '*') <= supply[i], name=f"supply_{i}")
"""
# Create constraints for demand at tier3 nodes
tier3_demand_v2 = """
for j in range(1000):
model.addConstr(flow_tier2_to_tier3.sum('*', j) >= demand[j], name=f"demand_{j}")
"""
# Create node balancing constraints for tier2 nodes
node_balance_v2 = """
for k in range(1000):
inflow = flow_tier1_to_tier2.sum('*', k)
outflow = flow_tier2_to_tier3.sum(k, '*')
model.addConstr(inflow == outflow, name=f"node_balance_{k}")
"""
t1 = np.min(timeit.repeat(stmt=tier1_supply_v1, setup=setup, number=1, repeat=10))
t2 = np.min(timeit.repeat(stmt=tier1_supply_v2, setup=setup, number=1, repeat=10))
factor = np.divide(
t2,t1
)
print("Dictionary is", factor, "times faster.")
t1 = np.min(timeit.repeat(stmt=tier3_demand_v1, setup=setup, number=1, repeat=10))
t2 = np.min(timeit.repeat(stmt=tier3_demand_v2, setup=setup, number=1, repeat=10))
factor = np.divide(
t2,t1
)
print("Dictionary is", factor, "times faster.")
t1 = np.min(timeit.repeat(stmt=node_balance_v1, setup=setup, number=1, repeat=10))
t2 = np.min(timeit.repeat(stmt=node_balance_v2, setup=setup, number=1, repeat=10))
factor = np.divide(
t2,t1
)
print("Dictionary is", factor, "times faster.")Using the dictionary was:
- 2.7x faster on the tier 1 supply constraints
- 2x faster on the tier 3 demand constraints
- 2.2x faster on the node balancing constraintsSo, to answer your original question, you are not missing anything :D
- Riley
0 -
Thanks Riley for the clarity. I really appreciate your quick response on this.
0
Please sign in to leave a comment.
Comments
4 comments