blob: 841eaf46b34f7e4032b6651552c2e94b690fb269 [file] [log] [blame] [edit]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_VERILOG_PARSER_VERILOG_LEXICAL_CONTEXT_H_
#define VERIBLE_VERILOG_PARSER_VERILOG_LEXICAL_CONTEXT_H_
#include <iosfwd>
#include <iterator>
#include <stack>
#include <vector>
#include "common/text/token_info.h"
#include "common/text/token_stream_view.h"
namespace verilog {
// Helper state machine to parse optional labels after certain keywords.
class _KeywordLabelStateMachine {
public:
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
// Returns true if lexical stream is not in any of the internal keyword-label
// states.
bool Done() const { return state_ != kGotColonExpectingLabel; }
private:
enum State {
kNone, // Not looking at any keyword that can accept a label.
kGotKeyword, // Seen a keyword that can accept a label.
kGotColonExpectingLabel,
};
State state_ = kNone;
};
// Helper state machine for tracking constraint_block and constraint_set in the
// grammar.
class _ConstraintBlockStateMachine {
public:
_ConstraintBlockStateMachine() = default;
bool IsActive() const { return !states_.empty(); }
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
// Returns disambiguated enum for '->' token.
int InterpretToken(int token_enum) const;
// Show representation (for debugging).
std::ostream& Dump(std::ostream&) const;
private:
void DeferInvalidToken(int token_enum);
// See grammar for constraint_block_item and constraint_expression.
enum State {
kBeginningOfBlockItemOrExpression, // list item (home state)
// kIgnoreUntilSemicolon is applicable to:
// "soft ...;"
// "unique { ... };"
// "disable soft ...;"
// "solve ... before ...;" (from constraint_block_item)
kIgnoreUntilSemicolon,
// constraint_expression
// : expression_or_dist ;
// | expression -> constraint_set
kExpectingExpressionOrImplication,
kGotIf, // if ...
kGotForeach, // foreach ...
// constraint_set
// : constraint_expression
// | '{' { constraint_expression , }** '}'
//
// This is the final nonterminal for: if-clause, else-clause, foreach-body,
// and RHS of expression -> constraint_set (constraint-implication)
kExpectingConstraintSet,
kInParenExpression, // balance until ')'
kInBraceExpression, // balance until '}'
};
// Constraint sets are nestable, so we need a stack.
// Each level of this stack represents a level of constraint block or
// constraint set, both of which are wrapped in { }.
std::stack<State> states_;
};
inline std::ostream& operator<<(std::ostream& os,
const _ConstraintBlockStateMachine& s) {
return s.Dump(os);
}
// Helper state machine to parse randomize calls.
class _RandomizeCallStateMachine {
public:
bool IsActive() const { return state_ != kNone; }
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
int InterpretToken(int) const;
private:
enum State {
kNone, // Not in a andomize call.
kGotRandomizeKeyword,
kOpenedVariableList,
kClosedVariableList,
kGotWithKeyword,
kInsideWithIdentifierList,
kExpectConstraintBlock,
kInsideConstraintBlock,
};
// TODO(fangism): do we need a stack? can randomize appear inside a
// randomize_call?
State state_ = kNone;
// Nested state machine.
_ConstraintBlockStateMachine constraint_block_tracker_;
};
// Helper state machine to parse (non-extern) constraint declarations.
class _ConstraintDeclarationStateMachine {
public:
bool IsActive() const { return state_ != kNone; }
// Updates the state machine, by looking ahead at the next token's enum.
void UpdateState(int);
int InterpretToken(int) const;
private:
enum State {
kNone,
kGotConstraintKeyword,
kGotConstraintIdentifier,
// TODO(fangism): handle out-of-line definitions: constraint foo::bar ...
kInsideConstraintBlock,
};
State state_ = kNone;
// Nested state machine.
_ConstraintBlockStateMachine constraint_block_tracker_;
};
// This state machine keeps track of semicolons in a range enclosed by
// a pair of (keyword) tokens. This is useful in disambiguating between
// grammatic constructs that can conflict due to optionality of a former
// list. See the definition bodies of property_declaration and
// sequence_declaration for examples.
// For additional fun, both declarations accept an optional ';' right before
// the terminating keyword, but that one should *not* count as the 'last'.
class _LastSemicolonStateMachine {
public:
_LastSemicolonStateMachine(int trigger, int stop, int replacement)
: trigger_token_enum_(trigger),
finish_token_enum_(stop),
semicolon_replacement_(replacement) {}
void UpdateState(verible::TokenInfo*);
protected:
enum State {
kNone,
kActive, // in betwen two keywords
};
// This is the token_enum that activates this state machine.
const int trigger_token_enum_;
// This is the token_enum that de-activates this state machine.
const int finish_token_enum_;
// This is the token_enum that should replace the last ';'.
const int semicolon_replacement_;
State state_ = kNone;
// Keeps track of the last semicolons. Upon de-activation, the last
// semicolon will be replaced. Technically, we only need a two-slot queue,
// but a CircularBuffer is overkill.
std::stack<verible::TokenInfo*> semicolons_;
// One token look-back.
verible::TokenInfo* previous_token_ = nullptr;
};
// A structure for tracking context needed to disambiguate tokens.
// The main input is a token stream coming from a lexer, and the main consumer
// is a parser that accepts a token stream.
// The vast majority of tokens should pass through unchanged.
// The ones that are changed are those that require context-based
// disambiguation.
// This should be designed in a manner that is forgiving of invalid inputs,
// i.e. improperly balanced code should never cause fatal errors.
// This class should maintain just enough state to correctly
// transform token enums on *valid* lexical streams.
//
// Design philosophy: This class itself is a state machine while employing
// smaller, simpler, concurrent state machines.
// The constituent state machines also scan the input token stream and
// update their states accordingly.
// The smaller state machines will be inactive most of the time, and activated
// on certain keywords in certain states.
class LexicalContext {
public:
LexicalContext();
~LexicalContext() = default;
// Not copy-able.
LexicalContext(const LexicalContext&) = delete;
LexicalContext& operator=(const LexicalContext&) = delete;
// Re-writes some token enums in-place using context-sensitivity.
// This function must re-tag tokens enumerated (_TK_*), see verilog.y and
// verilog.lex for all such enumerations.
// This function must accept both valid and invalid inputs, but is only
// required to operate correctly on valid inputs.
// Postcondition: tokens_view's tokens must not be tagged with (_TK_*)
// enumerations.
void TransformVerilogSymbols(
const verible::TokenStreamReferenceView& tokens_view) {
// TODO(fangism): Using a stream interface would further decouple the input
// iteration from output iteration.
for (auto iter : tokens_view) {
_AdvanceToken(&*iter);
}
}
protected: // Allow direct testing of some methods.
// Reads a single token, and may alter it depending on internal state.
void _AdvanceToken(verible::TokenInfo*);
// Changes the enum of a token where disambiguation is needed.
int _InterpretToken(int token_enum) const;
// Changes the enum of a token (in-place) without changing internal state.
void _MutateToken(verible::TokenInfo* token) const {
token->set_token_enum(_InterpretToken(token->token_enum()));
}
// Updates the internally tracked state without touching the token.
void _UpdateState(const verible::TokenInfo& token);
// State functions:
bool ExpectingStatement() const;
bool ExpectingBodyItemStart() const;
bool InFlowControlHeader() const;
bool InModuleDeclarationHeader() const {
return in_module_declaration_ && !in_module_body_;
}
bool InFunctionDeclarationHeader() const {
return in_function_declaration_ && !in_function_body_;
}
bool InTaskDeclarationHeader() const {
return in_task_declaration_ && !in_task_body_;
}
bool InAnyDeclaration() const;
bool InAnyDeclarationHeader() const;
const verible::TokenInfo* previous_token_ = nullptr;
// Non-nestable states can be represented without a stack.
// Do not bother trying to accommodate malformed input token sequences.
bool in_module_declaration_ = false;
bool in_module_body_ = false;
bool in_initial_always_final_construct_ = false;
bool in_function_declaration_ = false;
bool in_function_body_ = false;
bool in_task_declaration_ = false;
bool in_task_body_ = false;
// TODO(fangism): class_declaration, interface_declaration, udp_declaration...
// Extern declarations cannot be nested, so a single bool suffices.
bool in_extern_declaration_ = false;
bool previous_token_finished_header_ = true;
// Nestable states need to be tracked with a stack.
// Tracks if, for, case blocks.
struct FlowControlState {
const verible::TokenInfo* start;
// When this is false, the state is still in the header, which is:
// if (...)
// for (...)
// case (...) (including other case-variants)
bool in_body = false; // starts in header state
explicit FlowControlState(const verible::TokenInfo* token) : start(token) {}
};
std::vector<FlowControlState> flow_control_stack_;
// Tracks optional labels after certain keywords.
_KeywordLabelStateMachine keyword_label_tracker_;
// Tracks parsing state inside randomize_call.
_RandomizeCallStateMachine randomize_call_tracker_;
// Tracks parsing state inside randomize_call.
_ConstraintDeclarationStateMachine constraint_declaration_tracker_;
// Tracks last semicolon in property_declarations so that it can be
// re-enumerated to help disambiguate.
_LastSemicolonStateMachine property_declaration_tracker_;
// Tracks last semicolon in sequence_declarations so that it can be
// re-enumerated to help disambiguate.
_LastSemicolonStateMachine sequence_declaration_tracker_;
// Tracks begin-end paired sequence blocks in all contexts (generate blocks,
// function/task statements, flow-control constructs...).
// Every 'begin' token will be pushed onto this stack.
// Every 'end' token will pop the stack (safely).
// Accepts invalid input, which does not guarantee begin-end balancing.
// Does not care about optional labels after these keywords.
//
// e.g.
// ... // stack initially empty
// begin // pushes onto this stack
// ...
// begin // pushes onto this stack
// ...
// end // pops off of this stack
// ...
// end // pops off of this stack
//
std::vector<const verible::TokenInfo*> block_stack_;
// Tracks open-close paired tokens like parentheses and brackets and braces.
std::vector<const verible::TokenInfo*> balance_stack_;
};
} // namespace verilog
#endif // VERIBLE_VERILOG_PARSER_VERILOG_LEXICAL_CONTEXT_H_