| // Copyright 2017-2020 The Verible Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #ifndef VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_ |
| #define VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_ |
| |
| #include <functional> |
| #include <iosfwd> |
| #include <map> |
| |
| #include "absl/status/status.h" |
| #include "absl/strings/string_view.h" |
| #include "common/strings/compare.h" |
| #include "common/text/symbol.h" |
| #include "common/util/map_tree.h" |
| #include "common/util/vector_tree.h" |
| #include "verilog/analysis/verilog_project.h" |
| |
| namespace verilog { |
| |
| struct SymbolInfo; // forward declaration, defined below |
| |
| // SymbolTableNode represents a named element in the syntax. |
| // When it represents a scope, it may have named subtrees. |
| // The string_view key carries positional information, it corresponds to a |
| // substring owned by a VerilogSourceFile (which must outlive the symbol table), |
| // and can be used to look up file origin and position within file. |
| using SymbolTableNode = |
| verible::MapTree<absl::string_view, SymbolInfo, verible::StringViewCompare>; |
| |
| std::ostream& SymbolTableNodeFullPath(std::ostream&, const SymbolTableNode&); |
| |
| // Classify what type of element a particular symbol is defining. |
| enum class SymbolMetaType { |
| kRoot, |
| kClass, |
| kModule, |
| kGenerate, // loop or conditional generate block |
| kPackage, |
| kParameter, |
| kTypeAlias, // typedef |
| kDataNetVariableInstance, |
| kFunction, // includes constructors |
| kTask, |
| kStruct, |
| kEnumType, |
| kEnumConstant, |
| kInterface, |
| |
| // The following enums represent classes/groups of the above types, |
| // and are used for validating metatypes of symbol references. |
| |
| kUnspecified, // matches all of the above (any type) |
| kCallable, // matches only kFunction or kTask |
| }; |
| |
| std::ostream& operator<<(std::ostream&, SymbolMetaType); |
| |
| // This classifies the type of reference that a single identifier is. |
| enum class ReferenceType { |
| // The base identifier in any chain. |
| // These symbols are resolved by potentially searching up-scope from the |
| // current context. |
| kUnqualified, |
| |
| // Similar to kUnqualified in that it is in base position, however, this |
| // symbol must be resolved only locally in the current context, without any |
| // upward search. This is suitable for out-of-line definitions, where the |
| // base (in "base::member") must be resolved in only the enclosing scope. |
| kImmediate, |
| |
| // ::id (for packages, and class static members) |
| // These symbols are resolved by searching in the parent symbol's |
| // context (or its imported/inherited namespaces). |
| kDirectMember, |
| |
| // .id (for object of struct/class type members) |
| // These symbols are resolved by searching in the parent object's *type* scope |
| // (or its imported/inherited namespaces). |
| kMemberOfTypeOfParent, |
| }; |
| |
| std::ostream& operator<<(std::ostream&, ReferenceType); |
| |
| // References may form "trees" of dependencies (ReferenceComponentNode). |
| // ReferenceComponent is the data portion of each node an a reference tree. |
| // The overall tree structure drives the order in which references are resolved. |
| // |
| // Examples: |
| // * "a::b" and "a.b", resolving "b" depends on resolving "a" first. |
| // * "foo bar(.x(y));" resolving "x" depends on typeof(bar) -> resolving |
| // "foo", but "y" is only resolved using local context, and forms its own |
| // independent reference chain. |
| struct ReferenceComponent { |
| // This is the token substring of the identifier being referenced. |
| // String memory is expected to be owned by a VerilogSourceFile. |
| // This string_view also carries positional information: it can be used to |
| // locate the originating VerilogSourceFile via |
| // VerilogProject::LookupFileOrigin() and in-file position from the file's |
| // LineColumnMap. |
| const absl::string_view identifier; |
| |
| // What kind of reference is this, and how should it be resolved? |
| // See enum definition above. |
| const ReferenceType ref_type; |
| |
| // Inform the symbol resolver that this symbol must be of a certain metatype. |
| // SymbolInfo::kUnspecified is interpreted as "any metatype" for cases |
| // where it is not known in advance. |
| const SymbolMetaType required_metatype = SymbolMetaType::kUnspecified; |
| |
| // This points to the definition with which this symbol was resolved. |
| // During symbol table construction, this remains nullptr. |
| // If symbol resolution succeeds, this will be updated to non-nullptr. |
| // This may remain as nullptr if symbol resolution fails. |
| // Symbol table merge operations may invalidate these pointers, so make sure |
| // merges are done before attempting symbol resolution. |
| // This should only be set by ResolveSymbol(). |
| // TODO: privatize this member. |
| const SymbolTableNode* resolved_symbol = nullptr; |
| |
| public: |
| absl::Status MatchesMetatype(SymbolMetaType) const; |
| |
| // Resolves this symbol and verifies that metatypes are compatible, which is |
| // reflected in the returned Status. |
| absl::Status ResolveSymbol(const SymbolTableNode&); |
| |
| // Only print ref_type and identifier. |
| std::ostream& PrintPathComponent(std::ostream&) const; |
| |
| // Print everything, showing symbol path if it is resolved. |
| std::ostream& PrintVerbose(std::ostream&) const; |
| |
| // Structural consistency check. |
| void VerifySymbolTableRoot(const SymbolTableNode* root) const; |
| }; |
| |
| std::ostream& operator<<(std::ostream&, const ReferenceComponent&); |
| |
| // A node in a tree of *dependent* hierchical references. |
| // An expression like "x.y.z" will form a linear chain, where resolving 'y' |
| // depends on 'x', and resolving 'z' depends on 'y'. Named ports are manifest as |
| // wide nodes: in "f(.a(...), .b(...))", both 'a' and 'b' depend on resolving |
| // 'f' (and thus are siblings). |
| using ReferenceComponentNode = verible::VectorTree<ReferenceComponent>; |
| |
| // Human-readable representation of a node's path from root. |
| std::ostream& ReferenceNodeFullPath(std::ostream&, |
| const ReferenceComponentNode&); |
| |
| // View a ReferenceComponentNode's children as an ordered map, keyed by |
| // reference string. A single node may have multiple references to the same |
| // child id, and they are expected to resolve the same way, so one of them is |
| // arbitrarily chosen to be included in the map, while the others are dropped. |
| // Primarily for debugging and visualization. |
| using ReferenceComponentMap = |
| std::map<absl::string_view, const ReferenceComponentNode*, |
| verible::StringViewCompare>; |
| ReferenceComponentMap ReferenceComponentNodeMapView( |
| const ReferenceComponentNode&); |
| |
| // Represents any (chained) qualified or unqualified reference. |
| struct DependentReferences { |
| // Sequence of identifiers in a chain like "a.b.c", or "x::y::z". |
| // The first element always has ReferenceType::kUnqualified. |
| // This is wrapped in a unique_ptr to guarantee address stability on-move. |
| std::unique_ptr<ReferenceComponentNode> components; |
| |
| public: |
| DependentReferences() = default; |
| explicit DependentReferences( |
| std::unique_ptr<ReferenceComponentNode> components) |
| : components(std::move(components)) {} |
| // move-only |
| DependentReferences(const DependentReferences&) = delete; |
| DependentReferences(DependentReferences&&) = default; |
| DependentReferences& operator=(const DependentReferences&) = delete; |
| DependentReferences& operator=(DependentReferences&&) = delete; |
| |
| // Returns true of no references were collected. |
| bool Empty() const { return components == nullptr; } |
| |
| // Returns the current terminal descendant. |
| const ReferenceComponentNode* LastLeaf() const; |
| |
| // Returns the last type component of a reference tree. |
| // e.g. from "A#(.B())::C#(.D())" -> "C" |
| const ReferenceComponentNode* LastTypeComponent() const; |
| ReferenceComponentNode* LastTypeComponent(); |
| |
| // Structural consistency check. |
| // When traversing an unqualified or qualified reference, use this to grow a |
| // new leaf node in the reference tree. |
| // Returns a pointer to the new node. |
| ReferenceComponentNode* PushReferenceComponent( |
| const ReferenceComponent& component); |
| |
| // Structural consistency check. |
| void VerifySymbolTableRoot(const SymbolTableNode* root) const; |
| |
| // Attempt to resolve all symbol references. |
| void Resolve(const SymbolTableNode& context, |
| std::vector<absl::Status>* diagnostics) const; |
| |
| // Attempt to resolve only local symbol references. |
| void ResolveLocally(const SymbolTableNode& context) const; |
| |
| // Attempt to only resolve the base of the reference (the first component). |
| absl::StatusOr<SymbolTableNode*> ResolveOnlyBaseLocally( |
| SymbolTableNode* context); |
| }; |
| |
| std::ostream& operator<<(std::ostream&, const DependentReferences&); |
| |
| // Contains information about a type used to declare data/instances/variables. |
| struct DeclarationTypeInfo { |
| // Pointer to the syntax tree origin, e.g. a NodeEnum::kDataType node. |
| // This is useful for diagnostic that detail the relevant text, |
| // which can be recovered by StringSpanOfSymbol(const verible::Symbol&). |
| const verible::Symbol* syntax_origin = nullptr; |
| |
| // Pointer to the reference node that represents a user-defined type, if |
| // applicable. |
| // For built-in and primitive types, this is left as nullptr. |
| // This will be used to lookup named members of instances/objects of this |
| // type. |
| // |
| // These pointers should point to an element inside a |
| // DependentReferences::components in the same SymbolTable. |
| // (Note: to verify this membership at run-time could be expensive because it |
| // would require checking all DeclarationTypeInfos in the SymbolTable |
| // hierarchy.) |
| // |
| // This pointer must remain stable, even as reference trees grow, which |
| // mandates reserve()-ing ReferenceComponentNodes' children one-time in |
| // advance, and only ever moving ReferenceComponents, never copying them. |
| const ReferenceComponentNode* user_defined_type = nullptr; |
| |
| // Indicates that this is implicit declaration. |
| // FIXME(ldk): Check if this could be replaced by user_defined_type pointing |
| // to default implicit type. |
| bool implicit = false; |
| |
| public: |
| // Structural consistency check. |
| void VerifySymbolTableRoot(const SymbolTableNode* root) const; |
| }; |
| |
| std::ostream& operator<<(std::ostream&, const DeclarationTypeInfo&); |
| |
| // This data type holds information about what each SystemVerilog symbol is. |
| // An alternative implementation could be done using an abstract base class, |
| // and subclasses for each element type. |
| struct SymbolInfo { |
| // What is this symbol? (package, module, type, variable, etc.) |
| SymbolMetaType metatype; |
| |
| // This symbol's name is stored in the .Key() of the SymbolTableNode that |
| // encloses this structure. (This is SymbolTableNode::Value().) |
| |
| // In which file is this considered "defined"? |
| // Technically, this can be recovered by looking up a string_view range of |
| // text in VerilogProject (StringSpanOfSymbol(*syntax_origin)). |
| const VerilogSourceFile* file_origin = nullptr; |
| |
| // Pointer to the syntax tree origin. |
| // An easy way to view this text is StringSpanOfSymbol(*syntax_origin). |
| // Reminder: Parts of the syntax tree may originate from included files. |
| const verible::Symbol* syntax_origin = nullptr; |
| |
| // What is the type associated with this symbol? |
| // Only applicable to typed elements: variables, nets, instances, |
| // typedefs, etc. |
| // This is only set after resolving type references. |
| DeclarationTypeInfo declared_type; |
| |
| // Collection of generated scope names that exists for the sake of persistent |
| // string memory storage (since all other symbol table node keys rely on |
| // string_views that belong the source file's string memory buffer). |
| // We cannot use std::string directly due to the move-ability requirements |
| // along with the possibility of short-string-optimization. |
| // std::vector is move-able on reallocation and unique_ptr guarantees |
| // move-stability. |
| std::vector<std::unique_ptr<const std::string>> anonymous_scope_names; |
| |
| // TODO(fangism): symbol attributes |
| // visibility: is this symbol (as a member of its parent) public? |
| // ports and parameters can be public, whereas local variables are not. |
| // Is this class-member static or non-static? |
| // Is this definition complete or only a forward declaration? |
| |
| // TODO(fangism): imported symbols and namespaces |
| // Applicable to packages, classes, modules. |
| // These should be looked up when searching for symbols (without copying). |
| |
| // For elements with inheritance only: this points to a base class. |
| // Currently limited to single-inheritance. |
| // TODO: extend to support multiple-implementation and interface-class |
| // multiple inheritance. |
| DeclarationTypeInfo parent_type; |
| |
| // Collection of references to resolve and bind that appear in the same |
| // context. There is no sequential ordering dependency among these references, |
| // theoretically, they could all be resolved in parallel. |
| // This does not need to be insertion-iterator-stable. |
| std::vector<DependentReferences> local_references_to_bind; |
| // TODO(fangism): make this searchable by substring offsets. |
| |
| // TODO(fangism): cache/memoize the resolution of this node so that resolution |
| // can be triggered on-demand, and also avoid duplicate work. |
| |
| public: // methods |
| SymbolInfo() = default; |
| SymbolInfo(SymbolMetaType metatype, const VerilogSourceFile* file_origin = {}, |
| const verible::Symbol* syntax_origin = {}, |
| DeclarationTypeInfo declared_type = {}) |
| : metatype(metatype), |
| file_origin(file_origin), |
| syntax_origin(syntax_origin), |
| declared_type(declared_type) {} |
| |
| // move-only |
| SymbolInfo(const SymbolInfo&) = delete; |
| SymbolInfo(SymbolInfo&&) = default; |
| SymbolInfo& operator=(const SymbolInfo&) = delete; |
| SymbolInfo& operator=(SymbolInfo&&) = delete; |
| |
| // Generate a scope name whose string memory lives and moves with this object. |
| // 'base' is used as part of the generated name. |
| absl::string_view CreateAnonymousScope(absl::string_view base); |
| |
| // Attempt to resolve all symbol references. |
| void Resolve(const SymbolTableNode& context, |
| std::vector<absl::Status>* diagnostics); |
| |
| // Attempt to resolve only symbols local to 'context' (no upward search). |
| void ResolveLocally(const SymbolTableNode& context); |
| |
| // Internal consistency check. |
| void VerifySymbolTableRoot(const SymbolTableNode* root) const; |
| |
| // Show definition info of this symbol. |
| std::ostream& PrintDefinition(std::ostream& stream, size_t indent = 0) const; |
| |
| // Show references that are to be resolved starting with this node's scope. |
| std::ostream& PrintReferences(std::ostream& stream, size_t indent = 0) const; |
| |
| // Functor to compare string starting address, for positional sorting. |
| struct StringAddressCompare { |
| using is_transparent = void; // heterogeneous lookup |
| |
| static absl::string_view ToString(absl::string_view s) { return s; } |
| static absl::string_view ToString(const DependentReferences* ref) { |
| return ref->components->Value().identifier; |
| } |
| |
| template <typename L, typename R> |
| bool operator()(L l, R r) const { |
| static constexpr std::less<const void*> compare_address; |
| return compare_address(ToString(l).begin(), ToString(r).begin()); |
| } |
| }; |
| |
| typedef std::set<const DependentReferences*, StringAddressCompare> |
| address_ordered_set_type; |
| typedef std::map<absl::string_view, address_ordered_set_type, |
| verible::StringViewCompare> |
| references_map_view_type; |
| |
| // For testing only, quickly find reference candidates by name, and positional |
| // occurence. The outer map is ordered by string contents, and the inner |
| // associative container is ordered by substring memory address as a secondary |
| // key. The nested map is storage-inefficient (compared to a flat |
| // std::vector), and is only suitable for testing. |
| references_map_view_type LocalReferencesMapViewForTesting() const; |
| }; |
| |
| // This map type represents the global namespace of preprocessor macro |
| // definitions. |
| // The string_view key should be a substring of text whose memory is owned by |
| // any VerilogSourceFile that defines the macro. |
| // TODO(fangism): This should come from a preprocessor and possibly maintained |
| // per translation-unit in multi-file compilation mode. |
| using MacroSymbolMap = |
| std::map<absl::string_view, SymbolInfo, verible::StringViewCompare>; |
| |
| // SymbolTable maintains a named hierarchy of named symbols and scopes for |
| // SystemVerilog. This can be built up separately per translation unit, |
| // or in a unified table across all translation units. |
| // |
| // Typical usage: |
| // VerilogProject project(...); |
| // project.OpenTranslationUnit(...); // open files in loop |
| // |
| // SymbolTable symbol_table(&project); |
| // |
| // std::vector<absl::Status> diagnostics; |
| // symbol_table.Build(&diagnostics); |
| // // ... optional work ... |
| // |
| // symbol_table.Resolve(&diagnostics); |
| // // report diagnostics |
| // // navigate results from symbol_table.Root(). |
| // |
| class SymbolTable { |
| public: |
| class Builder; // implementation detail |
| class Tester; // test-only |
| |
| public: |
| // If 'project' is nullptr, caller assumes responsibility for managing files |
| // and string memory, otherwise string memory is owned by 'project'. |
| explicit SymbolTable(VerilogProject* project) |
| : project_(project), |
| symbol_table_root_(SymbolInfo{SymbolMetaType::kRoot}) {} |
| |
| // can become move-able when needed |
| SymbolTable(const SymbolTable&) = delete; |
| SymbolTable(SymbolTable&&) = delete; |
| SymbolTable& operator=(const SymbolTable&) = delete; |
| SymbolTable& operator=(SymbolTable&&) = delete; |
| |
| ~SymbolTable() { CheckIntegrity(); } |
| |
| const SymbolTableNode& Root() const { return symbol_table_root_; } |
| |
| const VerilogProject* Project() const { return project_; } |
| |
| // TODO(fangism): multi-translation-unit merge operation, |
| // to be done before any symbol resolution |
| |
| // Incrementally construct the symbol table from one translation unit. |
| // This gives the user control over the ordering of processing. |
| // It is safe to build the same unit multiple times, subsequent invocations |
| // will not change the symbol table structure, but will give duplicate symbol |
| // diagnostics. |
| void BuildSingleTranslationUnit(absl::string_view referenced_file_name, |
| std::vector<absl::Status>* diagnostics); |
| |
| // Construct symbol table definitions and references hierarchically, but do |
| // not attempt to resolve the symbols. |
| // The ordering of translation units processing is implementation defined, |
| // and should not be relied upon, but this only maatters when there are |
| // duplicate definitions among translation units. |
| void Build(std::vector<absl::Status>* diagnostics); |
| |
| // Lookup all symbol references, and bind references where successful. |
| // Only attempt to resolve after merging symbol tables. |
| void Resolve(std::vector<absl::Status>* diagnostics); |
| |
| // A "weaker" version of Resolve() that only attempts to resolve symbol |
| // references to definitions belonging to the same scope as the reference |
| // (without upward search). |
| // This can dramatically prune the number of unresolved references that |
| // require root-level resolution. |
| // No diagnostics are collected because silent no-change-on-failure behavior |
| // is intended. |
| void ResolveLocallyOnly(); |
| |
| // Print only the information about symbols defined (no references). |
| // This will print the results of Build(). |
| std::ostream& PrintSymbolDefinitions(std::ostream&) const; |
| |
| // Print only the information about symbol references, and possibly resolved |
| // links to definitions. |
| // This will print the results of Build() or Resolve(). |
| std::ostream& PrintSymbolReferences(std::ostream&) const; |
| |
| protected: // methods |
| // Direct mutation is only intended for the Builder implementation. |
| SymbolTableNode& MutableRoot() { return symbol_table_root_; } |
| |
| // Verify internal structural and pointer consistency. |
| void CheckIntegrity() const; |
| |
| private: // data |
| // This owns all files used to construct the symbol table and therefore, |
| // owns all string_views inside the symbol table and outlives objects of |
| // this class. |
| // The project needs to be mutable for the sake of opening `include-d files |
| // encountered during tree traversal. |
| // TODO(fangism): once a preprocessor is implemented, includes can be expanded |
| // before symbol table construction, and this can become read-only. |
| VerilogProject* const project_; |
| |
| // Global symbol table root for SystemVerilog language elements: |
| // modules, packages, classes, tasks, functions, interfaces, etc. |
| // Known limitation: All of the above elements share the same namespace, |
| // but the language actually compartmentalizes namespaces of some element |
| // types. |
| SymbolTableNode symbol_table_root_; |
| |
| // All macro definitions/references interact through this global namespace. |
| MacroSymbolMap macro_symbols_; |
| }; |
| |
| // Construct a partial symbol table and bindings locations from a single source |
| // file. This does not actually resolve symbol references, there is an |
| // opportunity to merge symbol tables across files before resolving references. |
| // 'source' should already be Parse()d in advance, so that its syntax tree can |
| // be accessed. |
| // If 'project' is provided, then it can be used to open preprocessing-included |
| // files, otherwise include directives will be ignored. |
| std::vector<absl::Status> BuildSymbolTable(const VerilogSourceFile& source, |
| SymbolTable* symbol_table, |
| VerilogProject* project = nullptr); |
| |
| } // namespace verilog |
| |
| #endif // VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_ |