Skip to content

Commit

Permalink
Merge pull request verilog-to-routing#2516 from verilog-to-routing/pa…
Browse files Browse the repository at this point in the history
…rtition_subtree

Net decomposition: tuning and polishing
  • Loading branch information
vaughnbetz authored Nov 26, 2024
2 parents 798811b + 40814f8 commit c246f54
Show file tree
Hide file tree
Showing 15 changed files with 240 additions and 72 deletions.
18 changes: 14 additions & 4 deletions vpr/src/route/DecompNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

/** @file Parallel and net-decomposing case for NetlistRouter. Works like
* \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to
* the next level of the partition tree where possible. */
* the next level of the partition tree where possible.
* See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
#include "netlist_routers.h"

#include <tbb/task_group.h>
Expand Down Expand Up @@ -57,6 +58,8 @@ class DecompNetlistRouter : public NetlistRouter {
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
/** Inform the PartitionTree of the nets with updated bounding boxes */
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
/** Set RCV enable flag for all routers managed by this netlist router.
* Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */
void set_rcv_enabled(bool x);
Expand All @@ -65,10 +68,14 @@ class DecompNetlistRouter : public NetlistRouter {
private:
/** Should we decompose this net? */
bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node);
/** Get a bitset with sinks to route before net decomposition */
/** Get a bitset of sinks to route before net decomposition. Output bitset is
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
* be routed */
vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node);
/** Get a bitset with sinks to route before virtual net decomposition */
vtr::dynamic_bitset<> get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node);
/** Get a bitset of sinks to route before virtual net decomposition. Output bitset is
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
* be routed */
vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node);
/** Decompose and route a regular net. Output the resulting vnets to \p left and \p right.
* \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */
bool decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right);
Expand Down Expand Up @@ -115,6 +122,9 @@ class DecompNetlistRouter : public NetlistRouter {
float _pres_fac;
float _worst_neg_slack;

/** The partition tree. Holds the groups of nets for each partition */
vtr::optional<PartitionTree> _tree;

/** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1]
* (i.e. when routing fails after decomposition for a sink, sample it on next iteration) */
vtr::vector<ParentNetId, vtr::dynamic_bitset<>> _net_known_samples;
Expand Down
61 changes: 45 additions & 16 deletions vpr/src/route/DecompNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
/** @file Impls for DecompNetlistRouter */

#include "DecompNetlistRouter.h"
#include "globals.h"
#include "netlist_routers.h"
#include "route_net.h"
#include "sink_sampling.h"
Expand All @@ -21,25 +22,44 @@ inline RouteIterResults DecompNetlistRouter<HeapType>::route_netlist(int itry, f
_pres_fac = pres_fac;
_worst_neg_slack = worst_neg_slack;

vtr::Timer timer;

/* Organize netlist into a PartitionTree.
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
PartitionTree tree(_net_list);
if(!_tree){
_tree = PartitionTree(_net_list);
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
}

/* Remove all virtual nets: we will create them for each iteration.
* This needs to be done because the partition tree can change between iterations
* due to bounding box updates, which invalidates virtual nets */
_tree->clear_vnets();

/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
tbb::task_group g;
route_partition_tree_node(g, tree.root());
g.wait();
tbb::task_group group;
route_partition_tree_node(group, _tree->root());
group.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");

/* Combine results from threads */
RouteIterResults out;
for (auto& results : _results_th) {
out.stats.combine(results.stats);
out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end());
out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end());
out.is_routable &= results.is_routable;
}

return out;
}

template<typename HeapType>
void DecompNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
VTR_ASSERT(_tree);
_tree->update_nets(nets);
}

template<typename HeapType>
void DecompNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
if (x)
Expand Down Expand Up @@ -120,6 +140,10 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod
template<typename HeapType>
void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
auto& route_ctx = g_vpr_ctx.mutable_routing();
vtr::Timer timer;

/* node.nets is an unordered set, copy into vector to sort */
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());

/* Sort so that nets with the most sinks are routed first.
* We want to interleave virtual nets with regular ones, so sort an "index vector"
Expand All @@ -129,15 +153,14 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
std::vector<size_t> order(node.nets.size() + node.vnets.size());
std::iota(order.begin(), order.end(), 0);
std::stable_sort(order.begin(), order.end(), [&](size_t i, size_t j) -> bool {
ParentNetId id1 = i < node.nets.size() ? node.nets[i] : node.vnets[i - node.nets.size()].net_id;
ParentNetId id2 = j < node.nets.size() ? node.nets[j] : node.vnets[j - node.nets.size()].net_id;
ParentNetId id1 = i < node.nets.size() ? nets[i] : node.vnets[i - nets.size()].net_id;
ParentNetId id2 = j < node.nets.size() ? nets[j] : node.vnets[j - nets.size()].net_id;
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
});

vtr::Timer t;
for (size_t i : order) {
if (i < node.nets.size()) { /* Regular net (not decomposed) */
ParentNetId net_id = node.nets[i];
if (i < nets.size()) { /* Regular net (not decomposed) */
ParentNetId net_id = nets[i];
if (!should_route_net(_net_list, net_id, _connections_inf, _budgeting_inf, _worst_neg_slack, true))
continue;
/* Setup the net (reset or prune) only once here in the flow. Then all calls to route_net turn off auto-setup */
Expand Down Expand Up @@ -188,6 +211,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
if (flags.retry_with_full_bb) {
/* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
route_ctx.route_bb[net_id] = full_device_bb();
_results_th.local().bb_updated_nets.push_back(net_id);
/* Disable decomposition for nets like this: they're already problematic */
_is_decomp_disabled[net_id] = true;
continue;
Expand All @@ -206,7 +230,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
continue;
}
}
/* Route the full vnet. Again we don't care about the flags, they should be handled by the regular path */
/* Route the full vnet. We don't care about the flags, they should be handled by the regular path */
auto sink_mask = get_vnet_sink_mask(vnet);
route_net(
_routers_th.local(),
Expand Down Expand Up @@ -234,7 +258,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g

PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
+ " nets and " + std::to_string(node.vnets.size())
+ " virtual nets routed in " + std::to_string(t.elapsed_sec())
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
+ " s");

/* This node is finished: add left & right branches to the task queue */
Expand Down Expand Up @@ -277,7 +301,7 @@ inline void make_vnet_pair(ParentNetId net_id, const t_bb& bb, Axis cutline_axis

template<typename HeapType>
bool DecompNetlistRouter<HeapType>::decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) {
auto& route_ctx = g_vpr_ctx.routing();
auto& route_ctx = g_vpr_ctx.mutable_routing();
auto& net_bb = route_ctx.route_bb[net_id];

/* Sample enough sinks to provide branch-off points to the virtual nets we create */
Expand Down Expand Up @@ -382,7 +406,7 @@ inline std::string describe_vnet(const VirtualNet& vnet) {
template<typename HeapType>
bool DecompNetlistRouter<HeapType>::decompose_and_route_vnet(VirtualNet& vnet, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) {
/* Sample enough sinks to provide branch-off points to the virtual nets we create */
auto sink_mask = get_vnet_decomposition_mask(vnet, node);
auto sink_mask = get_decomposition_mask_vnet(vnet, node);

/* Route the *parent* net with the given mask: only the sinks we ask for will be routed */
auto flags = route_net(
Expand Down Expand Up @@ -499,6 +523,7 @@ inline bool get_reduction_mask(ParentNetId net_id, Axis cutline_axis, int cutlin
template<typename HeapType>
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node) {
const auto& route_ctx = g_vpr_ctx.routing();

const RouteTree& tree = route_ctx.route_trees[net_id].value();
size_t num_sinks = tree.num_sinks();

Expand All @@ -512,6 +537,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask(Pare
bool is_reduced = get_reduction_mask(net_id, node.cutline_axis, node.cutline_pos, out);

bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);

if (!is_reduced || source_on_cutline)
convex_hull_downsample(net_id, route_ctx.route_bb[net_id], out);

Expand Down Expand Up @@ -638,7 +664,7 @@ inline bool get_reduction_mask_vnet_with_source(const VirtualNet& vnet, Axis cut
}

template<typename HeapType>
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node) {
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node) {
const auto& route_ctx = g_vpr_ctx.routing();
const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value();
int num_sinks = tree.num_sinks();
Expand All @@ -652,8 +678,9 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask
if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */
bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out);
bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);
if (!is_reduced || source_on_cutline)
if (!is_reduced || source_on_cutline){
convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out);
}
} else {
int reduced_sides = get_reduction_mask_vnet_no_source(vnet, node.cutline_axis, node.cutline_pos, out);
if (reduced_sides < 2) {
Expand All @@ -666,9 +693,11 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask
/* Sample if a sink is too close to the cutline (and unreached).
* Those sinks are likely to fail routing */
for (size_t isink : isinks) {
RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink];
if (!inside_bb(rr_sink, vnet.clipped_bb))
continue;
if (is_isink_reached.get(isink))
continue;
RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink];
if (is_close_to_cutline(rr_sink, node.cutline_axis, node.cutline_pos, 1)) {
out.set(isink, true);
continue;
Expand Down
8 changes: 7 additions & 1 deletion vpr/src/route/ParallelNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
*
* Note that the parallel router does not support graphical router breakpoints.
*
* [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
* [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
#include "netlist_routers.h"
#include "vtr_optional.h"

#include <tbb/task_group.h>

Expand Down Expand Up @@ -52,6 +53,8 @@ class ParallelNetlistRouter : public NetlistRouter {
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
/** Inform the PartitionTree of the nets with updated bounding boxes */
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
void set_rcv_enabled(bool x);
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);

Expand Down Expand Up @@ -95,6 +98,9 @@ class ParallelNetlistRouter : public NetlistRouter {
int _itry;
float _pres_fac;
float _worst_neg_slack;

/** The partition tree. Holds the groups of nets for each partition */
vtr::optional<PartitionTree> _tree;
};

#include "ParallelNetlistRouter.tpp"
37 changes: 29 additions & 8 deletions vpr/src/route/ParallelNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

/** @file Impls for ParallelNetlistRouter */

#include <string>
#include "netlist_routers.h"
#include "route_net.h"
#include "vtr_time.h"
Expand All @@ -20,18 +21,24 @@ inline RouteIterResults ParallelNetlistRouter<HeapType>::route_netlist(int itry,

/* Organize netlist into a PartitionTree.
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
PartitionTree tree(_net_list);
vtr::Timer timer;
if(!_tree){
_tree = PartitionTree(_net_list);
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
}

/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
tbb::task_group g;
route_partition_tree_node(g, tree.root());
g.wait();
tbb::task_group group;
route_partition_tree_node(group, _tree->root());
group.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");

/* Combine results from threads */
RouteIterResults out;
for (auto& results : _results_th) {
out.stats.combine(results.stats);
out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end());
out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end());
out.is_routable &= results.is_routable;
}
return out;
Expand All @@ -41,13 +48,16 @@ template<typename HeapType>
void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
auto& route_ctx = g_vpr_ctx.mutable_routing();

/* node.nets is an unordered set, copy into vector to sort */
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());

/* Sort so net with most sinks is routed first. */
std::stable_sort(node.nets.begin(), node.nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
std::stable_sort(nets.begin(), nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
});

vtr::Timer t;
for (auto net_id : node.nets) {
vtr::Timer timer;
for (auto net_id : nets) {
auto flags = route_net(
_routers_th.local(),
_net_list,
Expand Down Expand Up @@ -76,13 +86,18 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
if (flags.retry_with_full_bb) {
/* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
route_ctx.route_bb[net_id] = full_device_bb();
_results_th.local().bb_updated_nets.push_back(net_id);
continue;
}
if (flags.was_rerouted) {
_results_th.local().rerouted_nets.push_back(net_id);
}
}
PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s");

PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
+ " nets and " + std::to_string(node.vnets.size())
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
+ " s");

/* This node is finished: add left & right branches to the task queue */
if (node.left && node.right) {
Expand All @@ -97,6 +112,12 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
}
}

template<typename HeapType>
void ParallelNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
VTR_ASSERT(_tree);
_tree->update_nets(nets);
}

template<typename HeapType>
void ParallelNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
for (auto& router : _routers_th) {
Expand Down
1 change: 1 addition & 0 deletions vpr/src/route/SerialNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class SerialNetlistRouter : public NetlistRouter {
~SerialNetlistRouter() {}

RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
void set_rcv_enabled(bool x);
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);

Expand Down
12 changes: 11 additions & 1 deletion vpr/src/route/SerialNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@

#include "SerialNetlistRouter.h"
#include "route_net.h"
#include "vtr_time.h"

template<typename HeapType>
inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, float pres_fac, float worst_neg_slack) {
auto& route_ctx = g_vpr_ctx.mutable_routing();
RouteIterResults out;

vtr::Timer timer;

/* Sort so net with most sinks is routed first */
auto sorted_nets = std::vector<ParentNetId>(_net_list.nets().begin(), _net_list.nets().end());
std::stable_sort(sorted_nets.begin(), sorted_nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
Expand Down Expand Up @@ -45,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
}

if (flags.retry_with_full_bb) {
/* Grow the BB and retry this net right away. */
/* Grow the BB and retry this net right away.
* We don't populate out.bb_updated_nets for the serial router, since
* there is no partition tree to update. */
route_ctx.route_bb[net_id] = full_device_bb();
inet--;
continue;
Expand All @@ -59,9 +64,14 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
}
}

PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
return out;
}

template<typename HeapType>
void SerialNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& /* nets */) {
}

template<typename HeapType>
void SerialNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
_router.set_rcv_enabled(x);
Expand Down
Loading

0 comments on commit c246f54

Please sign in to comment.