// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_
#define VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_

#include <functional>
#include <iosfwd>
#include <map>

#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "common/strings/compare.h"
#include "common/text/symbol.h"
#include "common/util/map_tree.h"
#include "common/util/vector_tree.h"
#include "verilog/analysis/verilog_project.h"

namespace verilog {

struct SymbolInfo;  // forward declaration, defined below

// SymbolTableNode represents a named element in the syntax.
// When it represents a scope, it may have named subtrees.
// The string_view key carries positional information, it corresponds to a
// substring owned by a VerilogSourceFile (which must outlive the symbol table),
// and can be used to look up file origin and position within file.
using SymbolTableNode =
    verible::MapTree<absl::string_view, SymbolInfo, verible::StringViewCompare>;

std::ostream& SymbolTableNodeFullPath(std::ostream&, const SymbolTableNode&);

// Classify what type of element a particular symbol is defining.
enum class SymbolMetaType {
  kRoot,
  kClass,
  kModule,
  kGenerate,  // loop or conditional generate block
  kPackage,
  kParameter,
  kTypeAlias,  // typedef
  kDataNetVariableInstance,
  kFunction,  // includes constructors
  kTask,
  kStruct,
  kEnumType,
  kEnumConstant,
  kInterface,

  // The following enums represent classes/groups of the above types,
  // and are used for validating metatypes of symbol references.

  kUnspecified,  // matches all of the above (any type)
  kCallable,     // matches only kFunction or kTask
};

std::ostream& operator<<(std::ostream&, SymbolMetaType);

// This classifies the type of reference that a single identifier is.
enum class ReferenceType {
  // The base identifier in any chain.
  // These symbols are resolved by potentially searching up-scope from the
  // current context.
  kUnqualified,

  // Similar to kUnqualified in that it is in base position, however, this
  // symbol must be resolved only locally in the current context, without any
  // upward search.  This is suitable for out-of-line definitions, where the
  // base (in "base::member") must be resolved in only the enclosing scope.
  kImmediate,

  // ::id (for packages, and class static members)
  // These symbols are resolved by searching in the parent symbol's
  // context (or its imported/inherited namespaces).
  kDirectMember,

  // .id (for object of struct/class type members)
  // These symbols are resolved by searching in the parent object's *type* scope
  // (or its imported/inherited namespaces).
  kMemberOfTypeOfParent,
};

std::ostream& operator<<(std::ostream&, ReferenceType);

// References may form "trees" of dependencies (ReferenceComponentNode).
// ReferenceComponent is the data portion of each node an a reference tree.
// The overall tree structure drives the order in which references are resolved.
//
// Examples:
//   * "a::b" and "a.b", resolving "b" depends on resolving "a" first.
//   * "foo bar(.x(y));" resolving "x" depends on typeof(bar) -> resolving
//        "foo", but "y" is only resolved using local context, and forms its own
//        independent reference chain.
struct ReferenceComponent {
  // This is the token substring of the identifier being referenced.
  // String memory is expected to be owned by a VerilogSourceFile.
  // This string_view also carries positional information: it can be used to
  // locate the originating VerilogSourceFile via
  // VerilogProject::LookupFileOrigin() and in-file position from the file's
  // LineColumnMap.
  const absl::string_view identifier;

  // What kind of reference is this, and how should it be resolved?
  // See enum definition above.
  const ReferenceType ref_type;

  // Inform the symbol resolver that this symbol must be of a certain metatype.
  // SymbolInfo::kUnspecified is interpreted as "any metatype" for cases
  // where it is not known in advance.
  const SymbolMetaType required_metatype = SymbolMetaType::kUnspecified;

  // This points to the definition with which this symbol was resolved.
  // During symbol table construction, this remains nullptr.
  // If symbol resolution succeeds, this will be updated to non-nullptr.
  // This may remain as nullptr if symbol resolution fails.
  // Symbol table merge operations may invalidate these pointers, so make sure
  // merges are done before attempting symbol resolution.
  // This should only be set by ResolveSymbol().
  // TODO: privatize this member.
  const SymbolTableNode* resolved_symbol = nullptr;

 public:
  absl::Status MatchesMetatype(SymbolMetaType) const;

  // Resolves this symbol and verifies that metatypes are compatible, which is
  // reflected in the returned Status.
  absl::Status ResolveSymbol(const SymbolTableNode&);

  // Only print ref_type and identifier.
  std::ostream& PrintPathComponent(std::ostream&) const;

  // Print everything, showing symbol path if it is resolved.
  std::ostream& PrintVerbose(std::ostream&) const;

  // Structural consistency check.
  void VerifySymbolTableRoot(const SymbolTableNode* root) const;
};

std::ostream& operator<<(std::ostream&, const ReferenceComponent&);

// A node in a tree of *dependent* hierchical references.
// An expression like "x.y.z" will form a linear chain, where resolving 'y'
// depends on 'x', and resolving 'z' depends on 'y'. Named ports are manifest as
// wide nodes: in "f(.a(...), .b(...))", both 'a' and 'b' depend on resolving
// 'f' (and thus are siblings).
using ReferenceComponentNode = verible::VectorTree<ReferenceComponent>;

// Human-readable representation of a node's path from root.
std::ostream& ReferenceNodeFullPath(std::ostream&,
                                    const ReferenceComponentNode&);

// View a ReferenceComponentNode's children as an ordered map, keyed by
// reference string. A single node may have multiple references to the same
// child id, and they are expected to resolve the same way, so one of them is
// arbitrarily chosen to be included in the map, while the others are dropped.
// Primarily for debugging and visualization.
using ReferenceComponentMap =
    std::map<absl::string_view, const ReferenceComponentNode*,
             verible::StringViewCompare>;
ReferenceComponentMap ReferenceComponentNodeMapView(
    const ReferenceComponentNode&);

// Represents any (chained) qualified or unqualified reference.
struct DependentReferences {
  // Sequence of identifiers in a chain like "a.b.c", or "x::y::z".
  // The first element always has ReferenceType::kUnqualified.
  // This is wrapped in a unique_ptr to guarantee address stability on-move.
  std::unique_ptr<ReferenceComponentNode> components;

 public:
  DependentReferences() = default;
  explicit DependentReferences(
      std::unique_ptr<ReferenceComponentNode> components)
      : components(std::move(components)) {}
  // move-only
  DependentReferences(const DependentReferences&) = delete;
  DependentReferences(DependentReferences&&) = default;
  DependentReferences& operator=(const DependentReferences&) = delete;
  DependentReferences& operator=(DependentReferences&&) = delete;

  // Returns true of no references were collected.
  bool Empty() const { return components == nullptr; }

  // Returns the current terminal descendant.
  const ReferenceComponentNode* LastLeaf() const;

  // Returns the last type component of a reference tree.
  // e.g. from "A#(.B())::C#(.D())" -> "C"
  const ReferenceComponentNode* LastTypeComponent() const;
  ReferenceComponentNode* LastTypeComponent();

  // Structural consistency check.
  // When traversing an unqualified or qualified reference, use this to grow a
  // new leaf node in the reference tree.
  // Returns a pointer to the new node.
  ReferenceComponentNode* PushReferenceComponent(
      const ReferenceComponent& component);

  // Structural consistency check.
  void VerifySymbolTableRoot(const SymbolTableNode* root) const;

  // Attempt to resolve all symbol references.
  void Resolve(const SymbolTableNode& context,
               std::vector<absl::Status>* diagnostics) const;

  // Attempt to resolve only local symbol references.
  void ResolveLocally(const SymbolTableNode& context) const;

  // Attempt to only resolve the base of the reference (the first component).
  absl::StatusOr<SymbolTableNode*> ResolveOnlyBaseLocally(
      SymbolTableNode* context);
};

std::ostream& operator<<(std::ostream&, const DependentReferences&);

// Contains information about a type used to declare data/instances/variables.
struct DeclarationTypeInfo {
  // Pointer to the syntax tree origin, e.g. a NodeEnum::kDataType node.
  // This is useful for diagnostic that detail the relevant text,
  // which can be recovered by StringSpanOfSymbol(const verible::Symbol&).
  const verible::Symbol* syntax_origin = nullptr;

  // Pointer to the reference node that represents a user-defined type, if
  // applicable.
  // For built-in and primitive types, this is left as nullptr.
  // This will be used to lookup named members of instances/objects of this
  // type.
  //
  // These pointers should point to an element inside a
  // DependentReferences::components in the same SymbolTable.
  // (Note: to verify this membership at run-time could be expensive because it
  // would require checking all DeclarationTypeInfos in the SymbolTable
  // hierarchy.)
  //
  // This pointer must remain stable, even as reference trees grow, which
  // mandates reserve()-ing ReferenceComponentNodes' children one-time in
  // advance, and only ever moving ReferenceComponents, never copying them.
  const ReferenceComponentNode* user_defined_type = nullptr;

  // Indicates that this is implicit declaration.
  // FIXME(ldk): Check if this could be replaced by user_defined_type pointing
  //    to default implicit type.
  bool implicit = false;

 public:
  // Structural consistency check.
  void VerifySymbolTableRoot(const SymbolTableNode* root) const;
};

std::ostream& operator<<(std::ostream&, const DeclarationTypeInfo&);

// This data type holds information about what each SystemVerilog symbol is.
// An alternative implementation could be done using an abstract base class,
// and subclasses for each element type.
struct SymbolInfo {
  // What is this symbol? (package, module, type, variable, etc.)
  SymbolMetaType metatype;

  // This symbol's name is stored in the .Key() of the SymbolTableNode that
  // encloses this structure.  (This is SymbolTableNode::Value().)

  // In which file is this considered "defined"?
  // Technically, this can be recovered by looking up a string_view range of
  // text in VerilogProject (StringSpanOfSymbol(*syntax_origin)).
  const VerilogSourceFile* file_origin = nullptr;

  // Pointer to the syntax tree origin.
  // An easy way to view this text is StringSpanOfSymbol(*syntax_origin).
  // Reminder: Parts of the syntax tree may originate from included files.
  const verible::Symbol* syntax_origin = nullptr;

  // What is the type associated with this symbol?
  // Only applicable to typed elements: variables, nets, instances,
  // typedefs, etc.
  // This is only set after resolving type references.
  DeclarationTypeInfo declared_type;

  // Collection of generated scope names that exists for the sake of persistent
  // string memory storage (since all other symbol table node keys rely on
  // string_views that belong the source file's string memory buffer).
  // We cannot use std::string directly due to the move-ability requirements
  // along with the possibility of short-string-optimization.
  // std::vector is move-able on reallocation and unique_ptr guarantees
  // move-stability.
  std::vector<std::unique_ptr<const std::string>> anonymous_scope_names;

  // TODO(fangism): symbol attributes
  // visibility: is this symbol (as a member of its parent) public?
  //   ports and parameters can be public, whereas local variables are not.
  // Is this class-member static or non-static?
  // Is this definition complete or only a forward declaration?

  // TODO(fangism): imported symbols and namespaces
  // Applicable to packages, classes, modules.
  // These should be looked up when searching for symbols (without copying).

  // For elements with inheritance only: this points to a base class.
  // Currently limited to single-inheritance.
  // TODO: extend to support multiple-implementation and interface-class
  // multiple inheritance.
  DeclarationTypeInfo parent_type;

  // Collection of references to resolve and bind that appear in the same
  // context. There is no sequential ordering dependency among these references,
  // theoretically, they could all be resolved in parallel.
  // This does not need to be insertion-iterator-stable.
  std::vector<DependentReferences> local_references_to_bind;
  // TODO(fangism): make this searchable by substring offsets.

  // TODO(fangism): cache/memoize the resolution of this node so that resolution
  // can be triggered on-demand, and also avoid duplicate work.

 public:  // methods
  SymbolInfo() = default;
  SymbolInfo(SymbolMetaType metatype, const VerilogSourceFile* file_origin = {},
             const verible::Symbol* syntax_origin = {},
             DeclarationTypeInfo declared_type = {})
      : metatype(metatype),
        file_origin(file_origin),
        syntax_origin(syntax_origin),
        declared_type(declared_type) {}

  // move-only
  SymbolInfo(const SymbolInfo&) = delete;
  SymbolInfo(SymbolInfo&&) = default;
  SymbolInfo& operator=(const SymbolInfo&) = delete;
  SymbolInfo& operator=(SymbolInfo&&) = delete;

  // Generate a scope name whose string memory lives and moves with this object.
  // 'base' is used as part of the generated name.
  absl::string_view CreateAnonymousScope(absl::string_view base);

  // Attempt to resolve all symbol references.
  void Resolve(const SymbolTableNode& context,
               std::vector<absl::Status>* diagnostics);

  // Attempt to resolve only symbols local to 'context' (no upward search).
  void ResolveLocally(const SymbolTableNode& context);

  // Internal consistency check.
  void VerifySymbolTableRoot(const SymbolTableNode* root) const;

  // Show definition info of this symbol.
  std::ostream& PrintDefinition(std::ostream& stream, size_t indent = 0) const;

  // Show references that are to be resolved starting with this node's scope.
  std::ostream& PrintReferences(std::ostream& stream, size_t indent = 0) const;

  // Functor to compare string starting address, for positional sorting.
  struct StringAddressCompare {
    using is_transparent = void;  // heterogeneous lookup

    static absl::string_view ToString(absl::string_view s) { return s; }
    static absl::string_view ToString(const DependentReferences* ref) {
      return ref->components->Value().identifier;
    }

    template <typename L, typename R>
    bool operator()(L l, R r) const {
      static constexpr std::less<const void*> compare_address;
      return compare_address(ToString(l).begin(), ToString(r).begin());
    }
  };

  typedef std::set<const DependentReferences*, StringAddressCompare>
      address_ordered_set_type;
  typedef std::map<absl::string_view, address_ordered_set_type,
                   verible::StringViewCompare>
      references_map_view_type;

  // For testing only, quickly find reference candidates by name, and positional
  // occurence.  The outer map is ordered by string contents, and the inner
  // associative container is ordered by substring memory address as a secondary
  // key. The nested map is storage-inefficient (compared to a flat
  // std::vector), and is only suitable for testing.
  references_map_view_type LocalReferencesMapViewForTesting() const;
};

// This map type represents the global namespace of preprocessor macro
// definitions.
// The string_view key should be a substring of text whose memory is owned by
// any VerilogSourceFile that defines the macro.
// TODO(fangism): This should come from a preprocessor and possibly maintained
// per translation-unit in multi-file compilation mode.
using MacroSymbolMap =
    std::map<absl::string_view, SymbolInfo, verible::StringViewCompare>;

// SymbolTable maintains a named hierarchy of named symbols and scopes for
// SystemVerilog.  This can be built up separately per translation unit,
// or in a unified table across all translation units.
//
// Typical usage:
//   VerilogProject project(...);
//   project.OpenTranslationUnit(...);  // open files in loop
//
//   SymbolTable symbol_table(&project);
//
//   std::vector<absl::Status> diagnostics;
//   symbol_table.Build(&diagnostics);
//   // ... optional work ...
//
//   symbol_table.Resolve(&diagnostics);
//   // report diagnostics
//   // navigate results from symbol_table.Root().
//
class SymbolTable {
 public:
  class Builder;  // implementation detail
  class Tester;   // test-only

 public:
  // If 'project' is nullptr, caller assumes responsibility for managing files
  // and string memory, otherwise string memory is owned by 'project'.
  explicit SymbolTable(VerilogProject* project)
      : project_(project),
        symbol_table_root_(SymbolInfo{SymbolMetaType::kRoot}) {}

  // can become move-able when needed
  SymbolTable(const SymbolTable&) = delete;
  SymbolTable(SymbolTable&&) = delete;
  SymbolTable& operator=(const SymbolTable&) = delete;
  SymbolTable& operator=(SymbolTable&&) = delete;

  ~SymbolTable() { CheckIntegrity(); }

  const SymbolTableNode& Root() const { return symbol_table_root_; }

  const VerilogProject* Project() const { return project_; }

  // TODO(fangism): multi-translation-unit merge operation,
  // to be done before any symbol resolution

  // Incrementally construct the symbol table from one translation unit.
  // This gives the user control over the ordering of processing.
  // It is safe to build the same unit multiple times, subsequent invocations
  // will not change the symbol table structure, but will give duplicate symbol
  // diagnostics.
  void BuildSingleTranslationUnit(absl::string_view referenced_file_name,
                                  std::vector<absl::Status>* diagnostics);

  // Construct symbol table definitions and references hierarchically, but do
  // not attempt to resolve the symbols.
  // The ordering of translation units processing is implementation defined,
  // and should not be relied upon, but this only maatters when there are
  // duplicate definitions among translation units.
  void Build(std::vector<absl::Status>* diagnostics);

  // Lookup all symbol references, and bind references where successful.
  // Only attempt to resolve after merging symbol tables.
  void Resolve(std::vector<absl::Status>* diagnostics);

  // A "weaker" version of Resolve() that only attempts to resolve symbol
  // references to definitions belonging to the same scope as the reference
  // (without upward search).
  // This can dramatically prune the number of unresolved references that
  // require root-level resolution.
  // No diagnostics are collected because silent no-change-on-failure behavior
  // is intended.
  void ResolveLocallyOnly();

  // Print only the information about symbols defined (no references).
  // This will print the results of Build().
  std::ostream& PrintSymbolDefinitions(std::ostream&) const;

  // Print only the information about symbol references, and possibly resolved
  // links to definitions.
  // This will print the results of Build() or Resolve().
  std::ostream& PrintSymbolReferences(std::ostream&) const;

 protected:  // methods
  // Direct mutation is only intended for the Builder implementation.
  SymbolTableNode& MutableRoot() { return symbol_table_root_; }

  // Verify internal structural and pointer consistency.
  void CheckIntegrity() const;

 private:  // data
  // This owns all files used to construct the symbol table and therefore,
  // owns all string_views inside the symbol table and outlives objects of
  // this class.
  // The project needs to be mutable for the sake of opening `include-d files
  // encountered during tree traversal.
  // TODO(fangism): once a preprocessor is implemented, includes can be expanded
  // before symbol table construction, and this can become read-only.
  VerilogProject* const project_;

  // Global symbol table root for SystemVerilog language elements:
  // modules, packages, classes, tasks, functions, interfaces, etc.
  // Known limitation: All of the above elements share the same namespace,
  // but the language actually compartmentalizes namespaces of some element
  // types.
  SymbolTableNode symbol_table_root_;

  // All macro definitions/references interact through this global namespace.
  MacroSymbolMap macro_symbols_;
};

// Construct a partial symbol table and bindings locations from a single source
// file.  This does not actually resolve symbol references, there is an
// opportunity to merge symbol tables across files before resolving references.
// 'source' should already be Parse()d in advance, so that its syntax tree can
// be accessed.
// If 'project' is provided, then it can be used to open preprocessing-included
// files, otherwise include directives will be ignored.
std::vector<absl::Status> BuildSymbolTable(const VerilogSourceFile& source,
                                           SymbolTable* symbol_table,
                                           VerilogProject* project = nullptr);

}  // namespace verilog

#endif  // VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_
