| /* |
| Given a group of atom blocks and a partially-packed complex block, find placement for group of atom blocks in complex block |
| To use, keep "cluster_placement_stats" data structure throughout packing |
| cluster_placement_stats undergoes these major states: |
| Initialization - performed once at beginning of packing |
| Reset - reset state in between packing of clusters |
| In flight - Speculatively place |
| Finalized - Commit or revert placements |
| Freed - performed once at end of packing |
| |
| Author: Jason Luu |
| March 12, 2012 |
| */ |
| |
| #include <cstdio> |
| #include <cstring> |
| #include <queue> |
| using namespace std; |
| |
| #include "vtr_assert.h" |
| #include "vtr_memory.h" |
| |
| #include "read_xml_arch_file.h" |
| #include "vpr_types.h" |
| #include "globals.h" |
| #include "atom_netlist.h" |
| #include "vpr_utils.h" |
| #include "hash.h" |
| #include "cluster_placement.h" |
| #include "pack_molecules.h" |
| |
| /****************************************/ |
| /*Local Datastrcture Declaration */ |
| /****************************************/ |
| struct MoleculePlaceInfo { |
| bool legal = false; |
| std::map<MoleculeBlockId,t_pb_graph_node*> block_locations; |
| std::set<t_pb_graph_node*> used_pb_nodes; |
| float cost = HUGE_POSITIVE_FLOAT; |
| }; |
| |
| /****************************************/ |
| /*Local Function Declaration */ |
| /****************************************/ |
| static void load_cluster_placement_stats_for_pb_graph_node( |
| t_cluster_placement_stats& cluster_placement_stats, |
| t_pb_graph_node *pb_graph_node); |
| static void requeue_primitive( |
| t_cluster_placement_stats& cluster_placement_stats, |
| t_cluster_placement_primitive *cluster_placement_primitive); |
| static void update_primitive_cost_or_status(const t_pb_graph_node *pb_graph_node, |
| const float incremental_cost, const bool valid); |
| static float try_place_molecule( |
| const PackMolecules& molecules, |
| const PackMoleculeId molecule_id, |
| const MoleculeStats& molecule_stats, |
| t_pb_graph_node *root, |
| vtr::vector<MoleculeBlockId,t_pb_graph_node*>& primitives_list); |
| |
| static MoleculePlaceInfo try_place_molecule_recurr(const PackMolecule& molecule, |
| const MoleculeBlockId molecule_block, |
| MoleculePlaceInfo partial_placement, |
| t_pb_graph_node* pb_node, |
| int depth=0); |
| |
| static t_pb_graph_node* try_place_molecule_block(const PackMolecule& molecule, const MoleculeBlockId molecule_blk, |
| const MoleculePlaceInfo& partial_placement, |
| t_pb_graph_node* pb_node); |
| |
| static bool molecule_block_upstream_connections_feasible(const PackMolecule& molecule, const MoleculeBlockId molecule_blk, |
| const MoleculePlaceInfo& partial_placement, t_pb_graph_node* pb_node); |
| |
| static bool cluster_routing_path_feasible(const t_pb_graph_pin* driver, const t_pb_graph_pin* sink); |
| |
| static bool expand_forced_pack_molecule_placement( |
| const t_pack_molecule *molecule, |
| const t_pack_pattern_block *pack_pattern_block, |
| vtr::vector<MoleculeBlockId,t_pb_graph_node*>& primitives_list, |
| float *cost); |
| |
| static t_pb_graph_pin *expand_pack_molecule_pin_edge(const int pattern_id, |
| const t_pb_graph_pin *cur_pin, const bool forward); |
| static void flush_intermediate_queues( |
| t_cluster_placement_stats& cluster_placement_stats); |
| |
| static bool is_force_pack_molecule(const MoleculeStats& molecule_stats, const PackMoleculeId molecule_id); |
| |
| /****************************************/ |
| /*Function Definitions */ |
| /****************************************/ |
| |
| /** |
| * [0..num_pb_types-1] array of cluster placement stats, one for each device_ctx.block_types |
| */ |
| std::vector<t_cluster_placement_stats> alloc_and_load_cluster_placement_stats() { |
| auto& device_ctx = g_vpr_ctx.device(); |
| |
| std::vector<t_cluster_placement_stats> cluster_placement_stats(device_ctx.num_block_types); |
| |
| for (int i = 0; i < device_ctx.num_block_types; i++) { |
| if (device_ctx.EMPTY_TYPE != &device_ctx.block_types[i]) { |
| cluster_placement_stats[i].valid_primitives = (t_cluster_placement_primitive **) vtr::calloc( |
| get_max_primitives_in_pb_type(device_ctx.block_types[i].pb_type) + 1, |
| sizeof(t_cluster_placement_primitive*)); /* too much memory allocated but shouldn't be a problem */ |
| load_cluster_placement_stats_for_pb_graph_node( |
| cluster_placement_stats[i], |
| device_ctx.block_types[i].pb_graph_head); |
| } |
| } |
| return cluster_placement_stats; |
| } |
| |
| /** |
| * get next list of primitives for list of atom blocks |
| * |
| * primitives is the list of ptrs to primitives that matches with the list of atom block |
| * - if this is a new block, requeue tried primitives and return a in-flight primitive list to try |
| * - if this is an old block, put root primitive to tried queue, requeue rest of primitives. try another set of primitives |
| * |
| * return true if can find next primitive, false otherwise |
| * |
| * cluster_placement_stats - ptr to the current cluster_placement_stats of open complex block |
| * molecule - molecule to pack into open complex block |
| * primitives_list - a list of primitives indexed to match atom_block_ids of molecule. |
| * Expects an allocated array of primitives ptrs as inputs. |
| * This function loads the array with the lowest cost primitives that implement molecule |
| */ |
| bool get_next_primitive_list( |
| t_cluster_placement_stats& cluster_placement_stats, |
| const PackMolecules& molecules, |
| const PackMoleculeId molecule_id, |
| const MoleculeStats& molecule_stats, |
| vtr::vector<MoleculeBlockId,t_pb_graph_node*>& primitives_list) { |
| t_cluster_placement_primitive *cur, *next, *best, *before_best, *prev; |
| int i; |
| float cost, lowest_cost; |
| best = nullptr; |
| before_best = nullptr; |
| |
| if (cluster_placement_stats.curr_molecule != molecule_id) { |
| /* New block, requeue tried primitives and in-flight primitives */ |
| flush_intermediate_queues(cluster_placement_stats); |
| |
| cluster_placement_stats.curr_molecule = molecule_id; |
| } else { |
| /* Hack! Same failed molecule may re-enter if upper stream functions suck, |
| * I'm going to make the molecule selector more intelligent. |
| * TODO: Remove later |
| */ |
| if (cluster_placement_stats.in_flight != nullptr) { |
| /* Hack end */ |
| |
| /* old block, put root primitive currently inflight to tried queue */ |
| cur = cluster_placement_stats.in_flight; |
| next = cur->next_primitive; |
| cur->next_primitive = cluster_placement_stats.tried; |
| cluster_placement_stats.tried = cur; |
| /* should have only one block in flight at any point in time */ |
| VTR_ASSERT(next == nullptr); |
| cluster_placement_stats.in_flight = nullptr; |
| } |
| } |
| |
| /* find next set of blocks |
| 1. Remove invalid blocks to invalid queue |
| 2. Find lowest cost array of primitives that implements blocks |
| 3. When found, move current blocks to in-flight, return lowest cost array of primitives |
| 4. Return NULL if not found |
| */ |
| lowest_cost = HUGE_POSITIVE_FLOAT; |
| for (i = 0; i < cluster_placement_stats.num_pb_types; i++) { |
| if (cluster_placement_stats.valid_primitives[i]->next_primitive == nullptr) { |
| continue; /* no more primitives of this type available */ |
| } |
| t_pb_type* pb_type = cluster_placement_stats.valid_primitives[i]->next_primitive->pb_graph_node->pb_type; |
| vtr::printf("Checking if molecule '%s' is feasible rooted at primitive '%s'\n", molecules.pack_molecules[molecule_id].name().c_str(), pb_type->name); |
| prev = cluster_placement_stats.valid_primitives[i]; |
| cur = cluster_placement_stats.valid_primitives[i]->next_primitive; |
| while (cur) { |
| /* remove invalid nodes lazily when encountered */ |
| while (cur && cur->valid == false) { |
| prev->next_primitive = cur->next_primitive; |
| cur->next_primitive = cluster_placement_stats.invalid; |
| cluster_placement_stats.invalid = cur; |
| cur = prev->next_primitive; |
| } |
| if (cur == nullptr) { |
| break; |
| } |
| /* try place molecule at root location cur */ |
| cost = try_place_molecule( |
| molecules, |
| molecule_id, |
| molecule_stats, |
| cur->pb_graph_node, |
| primitives_list); |
| if (cost < lowest_cost) { |
| lowest_cost = cost; |
| best = cur; |
| before_best = prev; |
| } |
| prev = cur; |
| cur = cur->next_primitive; |
| } |
| } |
| if (best == nullptr) { |
| /* failed to find a placement */ |
| primitives_list.assign(primitives_list.size(), nullptr); |
| } else { |
| /* populate primitive list with best */ |
| cost = try_place_molecule( |
| molecules, |
| molecule_id, |
| molecule_stats, |
| best->pb_graph_node, |
| primitives_list); |
| VTR_ASSERT(cost == lowest_cost); |
| |
| /* take out best node and put it in flight */ |
| cluster_placement_stats.in_flight = best; |
| before_best->next_primitive = best->next_primitive; |
| best->next_primitive = nullptr; |
| } |
| |
| if (best == nullptr) { |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Resets one cluster placement stats by clearing incremental costs and returning all primitives to valid queue |
| */ |
| void reset_cluster_placement_stats( |
| t_cluster_placement_stats& cluster_placement_stats) { |
| t_cluster_placement_primitive *cur, *next; |
| int i; |
| |
| /* Requeue primitives */ |
| flush_intermediate_queues(cluster_placement_stats); |
| cur = cluster_placement_stats.invalid; |
| while (cur != nullptr) { |
| next = cur->next_primitive; |
| requeue_primitive(cluster_placement_stats, cur); |
| cur = next; |
| } |
| cur = cluster_placement_stats.invalid = nullptr; |
| /* reset flags and cost */ |
| for (i = 0; i < cluster_placement_stats.num_pb_types; i++) { |
| VTR_ASSERT( |
| cluster_placement_stats.valid_primitives[i] != nullptr && cluster_placement_stats.valid_primitives[i]->next_primitive != nullptr); |
| cur = cluster_placement_stats.valid_primitives[i]->next_primitive; |
| while (cur != nullptr) { |
| cur->incremental_cost = 0; |
| cur->valid = true; |
| cur = cur->next_primitive; |
| } |
| } |
| cluster_placement_stats.curr_molecule = PackMoleculeId::INVALID(); |
| } |
| |
| /** |
| * Free linked lists found in cluster_placement_stats_list |
| */ |
| void free_cluster_placement_stats(std::vector<t_cluster_placement_stats>& cluster_placement_stats_list) { |
| t_cluster_placement_primitive *cur, *next; |
| int i, j; |
| auto& device_ctx = g_vpr_ctx.device(); |
| |
| for (i = 0; i < device_ctx.num_block_types; i++) { |
| cur = cluster_placement_stats_list[i].tried; |
| while (cur != nullptr) { |
| next = cur->next_primitive; |
| free(cur); |
| cur = next; |
| } |
| cur = cluster_placement_stats_list[i].in_flight; |
| while (cur != nullptr) { |
| next = cur->next_primitive; |
| free(cur); |
| cur = next; |
| } |
| cur = cluster_placement_stats_list[i].invalid; |
| while (cur != nullptr) { |
| next = cur->next_primitive; |
| free(cur); |
| cur = next; |
| } |
| for (j = 0; j < cluster_placement_stats_list[i].num_pb_types; j++) { |
| cur = |
| cluster_placement_stats_list[i].valid_primitives[j]->next_primitive; |
| while (cur != nullptr) { |
| next = cur->next_primitive; |
| free(cur); |
| cur = next; |
| } |
| free(cluster_placement_stats_list[i].valid_primitives[j]); |
| } |
| free(cluster_placement_stats_list[i].valid_primitives); |
| } |
| } |
| |
| /** |
| * Put primitive back on queue of valid primitives |
| * Note that valid status is not changed because if the primitive is not valid, it will get properly collected later |
| */ |
| static void requeue_primitive( |
| t_cluster_placement_stats& cluster_placement_stats, |
| t_cluster_placement_primitive *cluster_placement_primitive) { |
| int i; |
| int null_index; |
| bool success; |
| null_index = OPEN; |
| |
| success = false; |
| for (i = 0; i < cluster_placement_stats.num_pb_types; i++) { |
| if (cluster_placement_stats.valid_primitives[i]->next_primitive == nullptr) { |
| null_index = i; |
| continue; |
| } |
| if (cluster_placement_primitive->pb_graph_node->pb_type |
| == cluster_placement_stats.valid_primitives[i]->next_primitive->pb_graph_node->pb_type) { |
| success = true; |
| cluster_placement_primitive->next_primitive = |
| cluster_placement_stats.valid_primitives[i]->next_primitive; |
| cluster_placement_stats.valid_primitives[i]->next_primitive = |
| cluster_placement_primitive; |
| } |
| } |
| if (success == false) { |
| VTR_ASSERT(null_index != OPEN); |
| cluster_placement_primitive->next_primitive = |
| cluster_placement_stats.valid_primitives[null_index]->next_primitive; |
| cluster_placement_stats.valid_primitives[null_index]->next_primitive = |
| cluster_placement_primitive; |
| } |
| } |
| |
| /** |
| * Add any primitives found in pb_graph_nodes to cluster_placement_stats |
| * Adds backward link from pb_graph_node to cluster_placement_primitive |
| */ |
| static void load_cluster_placement_stats_for_pb_graph_node( |
| t_cluster_placement_stats& cluster_placement_stats, |
| t_pb_graph_node *pb_graph_node) { |
| int i, j, k; |
| t_cluster_placement_primitive *placement_primitive; |
| const t_pb_type *pb_type = pb_graph_node->pb_type; |
| bool success; |
| if (pb_type->modes == nullptr) { |
| placement_primitive = (t_cluster_placement_primitive *) vtr::calloc(1, sizeof(t_cluster_placement_primitive)); |
| placement_primitive->pb_graph_node = pb_graph_node; |
| placement_primitive->valid = true; |
| pb_graph_node->cluster_placement_primitive = placement_primitive; |
| placement_primitive->base_cost = compute_primitive_base_cost(pb_graph_node); |
| success = false; |
| i = 0; |
| while (success == false) { |
| if (cluster_placement_stats.valid_primitives[i] == nullptr |
| || cluster_placement_stats.valid_primitives[i]->next_primitive->pb_graph_node->pb_type == pb_graph_node->pb_type) { |
| if (cluster_placement_stats.valid_primitives[i] == nullptr) { |
| cluster_placement_stats.valid_primitives[i] = (t_cluster_placement_primitive *) vtr::calloc(1, sizeof(t_cluster_placement_primitive)); |
| /* head of linked list is empty, makes it easier to remove nodes later */ |
| cluster_placement_stats.num_pb_types++; |
| } |
| success = true; |
| placement_primitive->next_primitive = cluster_placement_stats.valid_primitives[i]->next_primitive; |
| cluster_placement_stats.valid_primitives[i]->next_primitive = placement_primitive; |
| } |
| i++; |
| } |
| } else { |
| for (i = 0; i < pb_type->num_modes; i++) { |
| for (j = 0; j < pb_type->modes[i].num_pb_type_children; j++) { |
| for (k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) { |
| load_cluster_placement_stats_for_pb_graph_node( |
| cluster_placement_stats, |
| &pb_graph_node->child_pb_graph_nodes[i][j][k]); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Commit primitive, invalidate primitives blocked by mode assignment and update costs for primitives in same cluster as current |
| * Costing is done to try to pack blocks closer to existing primitives |
| * actual value based on closest common ancestor to committed placement, the farther the ancestor, the less reduction in cost there is |
| * Side effects: All cluster_placement_primitives may be invalidated/costed in this algorithm |
| * All intermediate queues are requeued |
| */ |
| void commit_primitive(t_cluster_placement_stats& cluster_placement_stats, |
| const t_pb_graph_node *primitive) { |
| t_pb_graph_node *pb_graph_node, *skip; |
| float incr_cost; |
| int i, j, k; |
| int valid_mode; |
| t_cluster_placement_primitive *cur; |
| |
| /* Clear out intermediate queues */ |
| flush_intermediate_queues(cluster_placement_stats); |
| |
| /* commit primitive as used, invalidate it */ |
| cur = primitive->cluster_placement_primitive; |
| VTR_ASSERT(cur->valid == true); |
| |
| cur->valid = false; |
| incr_cost = -0.01; /* cost of using a node drops as its neighbours are used, this drop should be small compared to scarcity values */ |
| |
| pb_graph_node = cur->pb_graph_node; |
| /* walk up pb_graph_node and update primitives of children */ |
| while (pb_graph_node->parent_pb_graph_node != nullptr) { |
| skip = pb_graph_node; /* do not traverse stuff that's already traversed */ |
| valid_mode = pb_graph_node->pb_type->parent_mode->index; |
| pb_graph_node = pb_graph_node->parent_pb_graph_node; |
| for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) { |
| for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) { |
| for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) { |
| if (&pb_graph_node->child_pb_graph_nodes[i][j][k] != skip) { |
| update_primitive_cost_or_status( |
| &pb_graph_node->child_pb_graph_nodes[i][j][k], |
| incr_cost, (bool)(i == valid_mode)); |
| } |
| } |
| } |
| } |
| incr_cost /= 10; /* blocks whose ancestor is further away in tree should be affected less than blocks closer in tree */ |
| } |
| } |
| |
| /** |
| * Set mode of cluster |
| */ |
| void set_mode_cluster_placement_stats(const t_pb_graph_node *pb_graph_node, |
| int mode) { |
| int i, j, k; |
| for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) { |
| if (i != mode) { |
| for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) { |
| for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) { |
| update_primitive_cost_or_status(&pb_graph_node->child_pb_graph_nodes[i][j][k], 0, false); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * For sibling primitives of pb_graph node, decrease cost |
| * For modes invalidated by pb_graph_node, invalidate primitive |
| * int distance is the distance of current pb_graph_node from original |
| */ |
| static void update_primitive_cost_or_status(const t_pb_graph_node *pb_graph_node, |
| const float incremental_cost, const bool valid) { |
| int i, j, k; |
| t_cluster_placement_primitive *placement_primitive; |
| if (pb_graph_node->pb_type->num_modes == 0) { |
| /* is primitive */ |
| placement_primitive = |
| (t_cluster_placement_primitive*) pb_graph_node->cluster_placement_primitive; |
| if (valid) { |
| placement_primitive->incremental_cost += incremental_cost; |
| } else { |
| placement_primitive->valid = false; |
| } |
| } else { |
| for (i = 0; i < pb_graph_node->pb_type->num_modes; i++) { |
| for (j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) { |
| for (k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) { |
| update_primitive_cost_or_status( |
| &pb_graph_node->child_pb_graph_nodes[i][j][k], |
| incremental_cost, valid); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Try place molecule at root location, populate primitives list with locations of placement if successful |
| */ |
| static float try_place_molecule( |
| const PackMolecules& molecules, |
| const PackMoleculeId molecule_id, |
| const MoleculeStats& /*molecule_stats*/, |
| t_pb_graph_node *root, |
| vtr::vector<MoleculeBlockId,t_pb_graph_node*>& primitives_list) { |
| |
| const PackMolecule& molecule = molecules.pack_molecules[molecule_id]; |
| |
| MoleculePlaceInfo empty_placement; |
| empty_placement.legal = true; |
| empty_placement.cost = 0.; |
| |
| vtr::printf(" Try place molecule at root %s\n", describe_pb_graph_node_hierarchy(root).c_str()); |
| |
| MoleculePlaceInfo molecule_placement = try_place_molecule_recurr(molecule, |
| molecule.root_block(), |
| empty_placement, |
| root, |
| 1); |
| |
| vtr::printf(" Proposed molecule placement: (cost %f)\n", molecule_placement.cost); |
| if (molecule_placement.legal) { |
| primitives_list.assign(molecule.blocks().size(), nullptr); |
| |
| for (auto molecule_blk : molecule.blocks()) { |
| if (molecule_placement.block_locations.count(molecule_blk)) { |
| t_pb_graph_node* loc = molecule_placement.block_locations[molecule_blk]; |
| VTR_ASSERT(loc); |
| |
| primitives_list[molecule_blk] = loc; |
| |
| vtr::printf(" %zu: %s\n", size_t(molecule_blk), describe_pb_graph_node_hierarchy(primitives_list[molecule_blk]).c_str()); |
| } else { |
| vtr::printf(" %zu: N/A\n", size_t(molecule_blk)); |
| } |
| } |
| } else { |
| VTR_ASSERT(!molecule_placement.legal); |
| VTR_ASSERT(molecule_placement.cost == HUGE_POSITIVE_FLOAT); |
| vtr::printf(" NONE\n"); |
| } |
| |
| return molecule_placement.cost; |
| } |
| |
| static MoleculePlaceInfo try_place_molecule_recurr(const PackMolecule& molecule, const MoleculeBlockId molecule_block, |
| MoleculePlaceInfo partial_placement, |
| t_pb_graph_node* pb_node, |
| int depth) { |
| |
| MoleculePlaceInfo placement = partial_placement; |
| |
| if (molecule.block_type(molecule_block) == PackMolecule::BlockType::EXTERNAL) { |
| //Recurse down to internal blocks |
| |
| //For every molecule block subtree fanning out from the current external molecule block |
| vtr::printf("%sPlacing sub-tree rooted at external molecule block %zu\n", vtr::indent(depth).c_str(), size_t(molecule_block)); |
| for (auto molecule_driver_pin_id : molecule.block_output_pins(molecule_block)) { |
| auto molecule_edge_id = molecule.pin_edge(molecule_driver_pin_id); |
| |
| for (auto molecule_sink_pin_id : molecule.edge_sinks(molecule_edge_id)) { |
| MoleculeBlockId molecule_subtree_block = molecule.pin_block(molecule_sink_pin_id); |
| |
| //See if the subtree is valid at the current pb_node |
| MoleculePlaceInfo subtree_placement = try_place_molecule_recurr(molecule, molecule_subtree_block, placement, pb_node, depth+1); |
| if (subtree_placement.legal) { |
| placement = subtree_placement; |
| } else { |
| VTR_ASSERT(!subtree_placement.legal); |
| //No legal location found for the next molecule block subtree, |
| //so the entire molecule is illegal |
| return MoleculePlaceInfo(); |
| } |
| } |
| } |
| } else { |
| VTR_ASSERT(molecule.block_type(molecule_block) == PackMolecule::BlockType::INTERNAL); |
| |
| AtomBlockId atom_blk = molecule.block_atom(molecule_block); |
| |
| vtr::printf("%sTrying to place internal molecule block %zu (%s) in %s\n", vtr::indent(depth).c_str(), size_t(molecule_block), |
| g_vpr_ctx.atom().nlist.block_model(atom_blk)->name, |
| describe_pb_graph_node_hierarchy(pb_node).c_str()); |
| |
| //Find a valid placement location for the atom associated with this block in the molecule |
| t_pb_graph_node* atom_loc = try_place_molecule_block(molecule, molecule_block, placement, pb_node); |
| |
| if (atom_loc) { |
| //Recursively try to place the sub-tree rooted at the current molecule block |
| |
| VTR_ASSERT(atom_loc->is_primitive()); |
| |
| placement.legal = true; |
| placement.block_locations.emplace(molecule_block,atom_loc); |
| placement.used_pb_nodes.insert(atom_loc); |
| |
| vtr::printf("%sFound legal placement location of molecule block %zu in %s\n", vtr::indent(depth).c_str(), size_t(molecule_block), describe_pb_graph_node_hierarchy(atom_loc).c_str()); |
| vtr::printf("%sPlacing sub-tree rooted at internal molecule block %zu\n", vtr::indent(depth).c_str(), size_t(molecule_block)); |
| for (auto molecule_driver_pin_id : molecule.block_output_pins(molecule_block)) { |
| auto molecule_edge_id = molecule.pin_edge(molecule_driver_pin_id); |
| |
| for (auto molecule_sink_pin_id : molecule.edge_sinks(molecule_edge_id)) { |
| MoleculeBlockId molecule_subtree_block = molecule.pin_block(molecule_sink_pin_id); |
| |
| //Try placing the molecule sub-tree within the same parent as the current atom, |
| //if it isn't valid, retry placing into the grand-parent pb node (this is done |
| //repeatedly up to the root). |
| // |
| //This ensures that atoms prefer to stay in the same parent pb_node if possible |
| t_pb_graph_node* subtree_pb_node = atom_loc->parent_pb_graph_node; |
| MoleculePlaceInfo subtree_placement; |
| while (!subtree_placement.legal && subtree_pb_node != nullptr) { |
| subtree_placement = try_place_molecule_recurr(molecule, molecule_subtree_block, placement, subtree_pb_node, depth+1); |
| subtree_pb_node = subtree_pb_node->parent_pb_graph_node; //Next parent |
| } |
| |
| if (subtree_placement.legal) { |
| placement = subtree_placement; |
| } else { |
| //No valid placement possible for this subtree. |
| //This implies the molecule placement at the current atom location is illegal. |
| return MoleculePlaceInfo(); |
| } |
| } |
| } |
| } else { |
| //No valid internal block location |
| placement = MoleculePlaceInfo(); //Invalid |
| } |
| } |
| return placement; |
| } |
| |
| static t_pb_graph_node* try_place_molecule_block(const PackMolecule& molecule, const MoleculeBlockId molecule_blk, |
| const MoleculePlaceInfo& partial_placement, |
| t_pb_graph_node* pb_node) { |
| if (partial_placement.used_pb_nodes.count(pb_node)) { |
| return nullptr; //Already in use |
| } |
| |
| if (pb_node->is_primitive()) { |
| if (pb_node->cluster_placement_primitive->valid |
| && primitive_type_feasible(molecule.block_atom(molecule_blk), pb_node->pb_type) |
| && molecule_block_upstream_connections_feasible(molecule, molecule_blk, partial_placement, pb_node)) { |
| return pb_node; //Primitive match |
| } |
| |
| return nullptr; //Primitive mismatch |
| } |
| |
| //Not primitive, recursively search children |
| // |
| //TODO: Could speed-up by tracing pack pattern annotations in arch, |
| // instead of enumerating legal placements (?) |
| VTR_ASSERT(!pb_node->is_primitive()); |
| |
| for (int imode = 0; imode < pb_node->num_modes(); ++imode) { |
| for (int ichild_type = 0; ichild_type < pb_node->num_pb_type_children(imode); ++ichild_type) { |
| for (int ichild = 0; ichild < pb_node->num_pb_type_child_pb(imode, ichild_type); ++ichild) { |
| t_pb_graph_node *child_pb_node = &pb_node->child_pb_graph_nodes[imode][ichild_type][ichild]; |
| t_pb_graph_node* legal_child = try_place_molecule_block(molecule, molecule_blk, partial_placement, child_pb_node); |
| if (legal_child) { //Legal within child sub-nodes |
| return legal_child; |
| } |
| } |
| } |
| } |
| |
| return nullptr; //No match within children |
| } |
| |
| static bool molecule_block_upstream_connections_feasible(const PackMolecule& molecule, const MoleculeBlockId molecule_blk, |
| const MoleculePlaceInfo& partial_placement, t_pb_graph_node* pb_node) { |
| for (auto molecule_sink_pin : molecule.block_input_pins(molecule_blk)) { |
| auto molecule_edge = molecule.pin_edge(molecule_sink_pin); |
| auto molecule_driver_pin = molecule.edge_driver(molecule_edge); |
| auto molecule_driver_blk = molecule.pin_block(molecule_driver_pin); |
| |
| const t_pb_graph_pin* sink_pin = find_corresponding_pb_graph_pin(pb_node, molecule.pin_atom(molecule_sink_pin)); |
| VTR_ASSERT(sink_pin); |
| |
| //Verify there is a path from src to sink |
| const t_pb_graph_pin* driver_pin = nullptr; |
| auto itr = partial_placement.block_locations.find(molecule_driver_blk); |
| if (itr == partial_placement.block_locations.end()) { |
| //External block connection |
| |
| //TODO: instead of skipping check, verify that 'correct' top-level pin can reach the corresponding sink |
| continue; |
| } else { |
| VTR_ASSERT(itr != partial_placement.block_locations.end()); |
| t_pb_graph_node* driver_loc = itr->second; |
| |
| driver_pin = find_corresponding_pb_graph_pin(driver_loc, molecule.pin_atom(molecule_driver_pin)); |
| } |
| VTR_ASSERT(driver_pin); |
| |
| if (!cluster_routing_path_feasible(driver_pin, sink_pin)) { |
| return false; //Infeasible |
| } |
| } |
| |
| return true; //Feasible |
| } |
| |
| static bool cluster_routing_path_feasible(const t_pb_graph_pin* driver, const t_pb_graph_pin* sink) { |
| //Simple BFS (for now) from driver to sink |
| std::queue<const t_pb_graph_pin*> q; |
| q.push(driver); |
| |
| while (!q.empty()) { |
| const t_pb_graph_pin* pin = q.front(); |
| q.pop(); |
| |
| if (pin == sink) return true; |
| |
| //Expand neighbours |
| for (int iedge = 0; iedge < pin->num_output_edges; ++iedge) { |
| t_pb_graph_edge* edge = pin->output_edges[iedge]; |
| |
| for (int ipin = 0; ipin < edge->num_output_pins; ++ipin) { |
| t_pb_graph_pin* sink_pin = edge->output_pins[ipin]; |
| q.push(sink_pin); |
| } |
| } |
| } |
| |
| return false; //No path |
| } |
| |
| /** |
| * Expand molecule at pb_graph_node |
| * Assumes molecule and pack pattern connections have fan-out 1 |
| */ |
| static bool expand_forced_pack_molecule_placement( |
| const t_pack_molecule *molecule, |
| const t_pack_pattern_block *pack_pattern_block, |
| t_pb_graph_node **primitives_list, float *cost) { |
| t_pb_graph_node *pb_graph_node = |
| primitives_list[pack_pattern_block->block_id]; |
| t_pb_graph_node *next_primitive; |
| t_pack_pattern_connections *cur; |
| t_pb_graph_pin *cur_pin, *next_pin; |
| t_pack_pattern_block *next_block; |
| |
| cur = pack_pattern_block->connections; |
| while (cur) { |
| if (cur->from_block == pack_pattern_block) { |
| next_block = cur->to_block; |
| } else { |
| next_block = cur->from_block; |
| } |
| if (primitives_list[next_block->block_id] == nullptr && molecule->atom_block_ids[next_block->block_id]) { |
| /* first time visiting location */ |
| |
| /* find next primitive based on pattern connections, expand next primitive if not visited */ |
| if (cur->from_block == pack_pattern_block) { |
| /* forward expand to find next block */ |
| int from_pin, from_port; |
| from_pin = cur->from_pin->pin_number; |
| from_port = cur->from_pin->port->port_index_by_type; |
| cur_pin = &pb_graph_node->output_pins[from_port][from_pin]; |
| next_pin = expand_pack_molecule_pin_edge( |
| pack_pattern_block->pattern_index, cur_pin, true); |
| } else { |
| /* backward expand to find next block */ |
| VTR_ASSERT(cur->to_block == pack_pattern_block); |
| int to_pin, to_port; |
| to_pin = cur->to_pin->pin_number; |
| to_port = cur->to_pin->port->port_index_by_type; |
| |
| if (cur->from_pin->port->is_clock) { |
| cur_pin = &pb_graph_node->clock_pins[to_port][to_pin]; |
| } else { |
| cur_pin = &pb_graph_node->input_pins[to_port][to_pin]; |
| } |
| next_pin = expand_pack_molecule_pin_edge( |
| pack_pattern_block->pattern_index, cur_pin, false); |
| } |
| /* found next primitive */ |
| if (next_pin != nullptr) { |
| next_primitive = next_pin->parent_node; |
| /* Check for legality of placement, if legal, expand from legal placement, if not, return false */ |
| if (molecule->atom_block_ids[next_block->block_id] && primitives_list[next_block->block_id] == nullptr) { |
| if (next_primitive->cluster_placement_primitive->valid |
| == true |
| && primitive_type_feasible( |
| molecule->atom_block_ids[next_block->block_id], |
| next_primitive->pb_type)) { |
| primitives_list[next_block->block_id] = next_primitive; |
| *cost += |
| next_primitive->cluster_placement_primitive->base_cost |
| + next_primitive->cluster_placement_primitive->incremental_cost; |
| if (!expand_forced_pack_molecule_placement(molecule, |
| next_block, primitives_list, cost)) { |
| return false; |
| } |
| } else { |
| return false; |
| } |
| } |
| } else { |
| return false; |
| } |
| } |
| cur = cur->next; |
| } |
| |
| return true; |
| } |
| |
| /** |
| * Find next primitive pb_graph_pin |
| */ |
| static t_pb_graph_pin *expand_pack_molecule_pin_edge(const int pattern_id, |
| const t_pb_graph_pin *cur_pin, const bool forward) { |
| int i, j, k; |
| t_pb_graph_pin *temp_pin, *dest_pin; |
| temp_pin = nullptr; |
| dest_pin = nullptr; |
| if (forward) { |
| for (i = 0; i < cur_pin->num_output_edges; i++) { |
| /* one fanout assumption */ |
| if (cur_pin->output_edges[i]->infer_pattern) { |
| for (k = 0; k < cur_pin->output_edges[i]->num_output_pins; |
| k++) { |
| if (cur_pin->output_edges[i]->output_pins[k]->parent_node->pb_type->num_modes |
| == 0) { |
| temp_pin = cur_pin->output_edges[i]->output_pins[k]; |
| } else { |
| temp_pin = expand_pack_molecule_pin_edge(pattern_id, |
| cur_pin->output_edges[i]->output_pins[k], |
| forward); |
| } |
| } |
| if (temp_pin != nullptr) { |
| VTR_ASSERT(dest_pin == nullptr || dest_pin == temp_pin); |
| dest_pin = temp_pin; |
| } |
| } else { |
| for (j = 0; j < cur_pin->output_edges[i]->num_pack_patterns; |
| j++) { |
| if (cur_pin->output_edges[i]->pack_pattern_indices[j] |
| == pattern_id) { |
| for (k = 0; |
| k < cur_pin->output_edges[i]->num_output_pins; |
| k++) { |
| if (cur_pin->output_edges[i]->output_pins[k]->parent_node->pb_type->num_modes |
| == 0) { |
| temp_pin = |
| cur_pin->output_edges[i]->output_pins[k]; |
| } else { |
| temp_pin = |
| expand_pack_molecule_pin_edge( |
| pattern_id, |
| cur_pin->output_edges[i]->output_pins[k], |
| forward); |
| } |
| } |
| if (temp_pin != nullptr) { |
| VTR_ASSERT(dest_pin == nullptr || dest_pin == temp_pin); |
| dest_pin = temp_pin; |
| } |
| } |
| } |
| } |
| } |
| } else { |
| for (i = 0; i < cur_pin->num_input_edges; i++) { |
| /* one fanout assumption */ |
| if (cur_pin->input_edges[i]->infer_pattern) { |
| for (k = 0; k < cur_pin->input_edges[i]->num_input_pins; k++) { |
| if (cur_pin->input_edges[i]->input_pins[k]->parent_node->pb_type->num_modes |
| == 0) { |
| temp_pin = cur_pin->input_edges[i]->input_pins[k]; |
| } else { |
| temp_pin = expand_pack_molecule_pin_edge(pattern_id, |
| cur_pin->input_edges[i]->input_pins[k], |
| forward); |
| } |
| } |
| if (temp_pin != nullptr) { |
| VTR_ASSERT(dest_pin == nullptr || dest_pin == temp_pin); |
| dest_pin = temp_pin; |
| } |
| } else { |
| for (j = 0; j < cur_pin->input_edges[i]->num_pack_patterns; |
| j++) { |
| if (cur_pin->input_edges[i]->pack_pattern_indices[j] |
| == pattern_id) { |
| for (k = 0; k < cur_pin->input_edges[i]->num_input_pins; |
| k++) { |
| if (cur_pin->input_edges[i]->input_pins[k]->parent_node->pb_type->num_modes |
| == 0) { |
| temp_pin = |
| cur_pin->input_edges[i]->input_pins[k]; |
| } else { |
| temp_pin = expand_pack_molecule_pin_edge( |
| pattern_id, |
| cur_pin->input_edges[i]->input_pins[k], |
| forward); |
| } |
| } |
| if (temp_pin != nullptr) { |
| VTR_ASSERT(dest_pin == nullptr || dest_pin == temp_pin); |
| dest_pin = temp_pin; |
| } |
| } |
| } |
| } |
| } |
| } |
| return dest_pin; |
| } |
| |
| static void flush_intermediate_queues( |
| t_cluster_placement_stats& cluster_placement_stats) { |
| t_cluster_placement_primitive *cur, *next; |
| cur = cluster_placement_stats.tried; |
| while (cur != nullptr) { |
| next = cur->next_primitive; |
| requeue_primitive(cluster_placement_stats, cur); |
| cur = next; |
| } |
| cluster_placement_stats.tried = nullptr; |
| |
| cur = cluster_placement_stats.in_flight; |
| if (cur != nullptr) { |
| next = cur->next_primitive; |
| requeue_primitive(cluster_placement_stats, cur); |
| /* should have at most one block in flight at any point in time */ |
| VTR_ASSERT(next == nullptr); |
| } |
| cluster_placement_stats.in_flight = nullptr; |
| } |
| |
| /* Determine max index + 1 of molecule */ |
| int get_array_size_of_molecule(const t_pack_molecule *molecule) { |
| if (molecule->type == MOLECULE_FORCED_PACK) { |
| return molecule->pack_pattern->num_blocks; |
| } else { |
| return molecule->num_blocks; |
| } |
| } |
| |
| /* Given atom block, determines if a free primitive exists for it */ |
| bool exists_free_primitive_for_atom_block( |
| t_cluster_placement_stats& cluster_placement_stats, |
| const AtomBlockId blk_id) { |
| int i; |
| t_cluster_placement_primitive *cur, *prev; |
| |
| /* might have a primitive in flight that's still valid */ |
| if (cluster_placement_stats.in_flight) { |
| if (primitive_type_feasible(blk_id, cluster_placement_stats.in_flight->pb_graph_node->pb_type)) { |
| return true; |
| } |
| } |
| |
| /* Look through list of available primitives to see if any valid */ |
| for (i = 0; i < cluster_placement_stats.num_pb_types; i++) { |
| if (cluster_placement_stats.valid_primitives[i]->next_primitive == nullptr) { |
| continue; /* no more primitives of this type available */ |
| } |
| if (primitive_type_feasible(blk_id, cluster_placement_stats.valid_primitives[i]->next_primitive->pb_graph_node->pb_type)) { |
| prev = cluster_placement_stats.valid_primitives[i]; |
| cur = cluster_placement_stats.valid_primitives[i]->next_primitive; |
| while (cur) { |
| /* remove invalid nodes lazily when encountered */ |
| while (cur && cur->valid == false) { |
| prev->next_primitive = cur->next_primitive; |
| cur->next_primitive = cluster_placement_stats.invalid; |
| cluster_placement_stats.invalid = cur; |
| cur = prev->next_primitive; |
| } |
| if (cur == nullptr) { |
| break; |
| } |
| return true; |
| } |
| } |
| } |
| |
| return false; |
| } |
| |
| |
| void reset_tried_but_unused_cluster_placements( |
| t_cluster_placement_stats& cluster_placement_stats) { |
| flush_intermediate_queues(cluster_placement_stats); |
| } |
| |
| static bool is_force_pack_molecule(const MoleculeStats& molecule_stats, const PackMoleculeId molecule_id) { |
| return molecule_stats.num_blocks(molecule_id) > 1; |
| } |