blob: e1d2eba52b5341411167de46dee9ad5b748de53b [file] [log] [blame]
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// lexer_test_util.h defines some templates for testing lexers by
// comparing individual tokens or sequences of tokens.
// By declaring test data as one of the following structures:
// SimpleTestData, GenericTestDataSequence, SynthesizedLexerTestData
// the TestLexer function template will select the appropriate
// implementation to run the tests (using function overloading).
#ifndef VERIBLE_COMMON_LEXER_LEXER_TEST_UTIL_H_
#define VERIBLE_COMMON_LEXER_LEXER_TEST_UTIL_H_
#include <cstddef>
#include <initializer_list>
#include <iosfwd>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "absl/strings/string_view.h"
#include "common/text/constants.h"
#include "common/text/token_info.h"
#include "common/text/token_info_test_util.h"
namespace verible {
// Modeled after the Lexer base class.
class FakeLexer {
protected:
explicit FakeLexer() {}
void SetTokensData(const std::vector<TokenInfo>& tokens);
public:
const TokenInfo& DoNextToken();
protected:
std::vector<TokenInfo> tokens_;
std::vector<TokenInfo>::const_iterator tokens_iter_;
};
// Streamable adaptor for displaying code on error.
// Usage: stream << ShowCode{text};
// Consider this private, only intended for use in this library.
struct ShowCode {
absl::string_view text;
};
std::ostream& operator<<(std::ostream&, const ShowCode&);
// SimpleTestData is used to verify single token values.
struct SimpleTestData {
const char* code;
// Check for ignored token, that is, EOF.
template <class Lexer>
void testIgnored() const {
Lexer lexer(code);
auto token = lexer.DoNextToken();
EXPECT_EQ(TK_EOF, token.token_enum) << ShowCode{code};
}
// Check for a single-character token, then EOF.
template <class Lexer>
void testSingleChar() const {
Lexer lexer(code);
EXPECT_EQ(code[0], lexer.DoNextToken().token_enum()) << ShowCode{code};
EXPECT_EQ(TK_EOF, lexer.DoNextToken().token_enum()) << ShowCode{code};
}
// Check for a single token, then EOF.
template <class Lexer>
void testSingleToken(const int expected_token) const {
Lexer lexer(code);
const TokenInfo& next_token(lexer.DoNextToken());
EXPECT_EQ(expected_token, next_token.token_enum()) << ShowCode{code};
const TokenInfo& last_token(lexer.DoNextToken());
EXPECT_EQ(TK_EOF, last_token.token_enum()) << ShowCode{code};
}
};
// GenericTestDataSequence tests multiple tokens in single string.
// This is useful for testing tokens sensitive to lexer start-conditions.
// TODO(b/139743437): phase this out in favor of SynthesizedLexerTestData.
struct GenericTestDataSequence {
const char* code;
const std::initializer_list<int> expected_tokens;
template <class Lexer>
void test() const {
Lexer lexer(code);
int i = 0;
for (const auto& expected_token_enum : expected_tokens) {
const TokenInfo& next_token(lexer.DoNextToken());
EXPECT_EQ(expected_token_enum, next_token.token_enum())
<< " Code[" << i << "]:" << ShowCode{code}
<< "\n Last token text: \"" << next_token.text() << "\"";
++i;
}
const TokenInfo& last_token(lexer.DoNextToken());
EXPECT_EQ(TK_EOF, last_token.token_enum())
<< " expecting " << (expected_tokens.size() - i)
<< " more tokens: " << ShowCode{code};
}
};
// Encapsulates both input code and expected tokens by concatenating
// expected tokens' text into a single string.
struct SynthesizedLexerTestData : public TokenInfoTestData {
SynthesizedLexerTestData(std::initializer_list<ExpectedTokenInfo> fragments)
: TokenInfoTestData(fragments) {}
// Runs the given Lexer on the synthesized code of this test case.
template <class Lexer>
void test() const {
Lexer lexer(code);
int i = 0;
for (const auto& expected_token : expected_tokens) {
VerifyExpectedToken(&lexer, expected_token);
++i;
}
const TokenInfo& final_token(lexer.DoNextToken());
EXPECT_EQ(TK_EOF, final_token.token_enum())
<< " expecting " << (expected_tokens.size() - i) << " more tokens"
<< ShowCode{code};
}
private:
// A single expected_text can span multiple tokens, when we're only checking
// string contents, and not checking *how* this excerpt is tokenized.
template <class Lexer>
void DontCareMultiTokens(Lexer* lexer,
absl::string_view expected_text) const {
// Consume tokens and compare string fragments against the
// expected_text until the text is fully matched.
while (!expected_text.empty()) {
const TokenInfo& next_token = lexer->DoNextToken();
const size_t token_length = next_token.text().length();
ASSERT_LE(token_length, expected_text.length())
<< "\nlast token: " << next_token << ShowCode{code};
// Verify that the remaining expected_text starts with token's text.
EXPECT_EQ(expected_text.substr(0, token_length), next_token.text())
<< ShowCode{code};
// Trim from the front the token that was just consumed.
expected_text.remove_prefix(token_length);
}
}
// Check lexer output against a single expected_token.
template <class Lexer>
void VerifyExpectedToken(Lexer* lexer,
const TokenInfo& expected_token) const {
switch (expected_token.token_enum()) {
case ExpectedTokenInfo::kDontCare:
DontCareMultiTokens(lexer, expected_token.text());
break;
case ExpectedTokenInfo::kNoToken:
return;
default:
// Compare full TokenInfo, enum, text (exact range).
const TokenInfo& next_token = lexer->DoNextToken();
EXPECT_EQ(expected_token, next_token) << ShowCode{code};
}
}
};
// These types and objects help dispatch the right overload of TestLexer.
struct IgnoredText {};
static const IgnoredText Ignored{};
struct SingleCharTok {};
static const SingleCharTok SingleChar{};
// Test for ignored tokens.
template <class Lexer>
void TestLexer(std::initializer_list<SimpleTestData> test_data,
const IgnoredText& not_used) {
for (const auto& test_case : test_data) {
test_case.testIgnored<Lexer>();
}
}
// Test for single-character tokens (returned value == that character).
template <class Lexer>
void TestLexer(std::initializer_list<SimpleTestData> test_data,
const SingleCharTok& not_used) {
for (const auto& test_case : test_data) {
test_case.testSingleChar<Lexer>();
}
}
// Test for the same kind of token (passed in arg).
template <class Lexer>
void TestLexer(std::initializer_list<SimpleTestData> test_data,
const int expected_token) {
for (const auto& test_case : test_data) {
test_case.testSingleToken<Lexer>(expected_token);
}
}
// Test for sequences of expected tokens.
template <class Lexer>
void TestLexer(std::initializer_list<GenericTestDataSequence> test_data) {
for (const auto& test_case : test_data) {
test_case.test<Lexer>();
}
}
// Test for sequences of expected tokens.
template <class Lexer>
void TestLexer(std::initializer_list<SynthesizedLexerTestData> test_data) {
for (const auto& test_case : test_data) {
test_case.test<Lexer>();
}
}
} // namespace verible
#endif // VERIBLE_COMMON_LEXER_LEXER_TEST_UTIL_H_