forked from jasder/antlr
Clean up Go runtime char stream, common token, and dfa code
This commit is contained in:
parent
c8a9d75cfe
commit
3e621970f5
|
@ -2,7 +2,6 @@ package antlr
|
|||
|
||||
type CharStream interface {
|
||||
IntStream
|
||||
|
||||
GetText(int, int) string
|
||||
GetTextFromTokens(start, end Token) string
|
||||
GetTextFromInterval(*Interval) string
|
||||
|
|
|
@ -1,61 +1,46 @@
|
|||
//
|
||||
// This default implementation of {@link TokenFactory} creates
|
||||
// {@link CommonToken} objects.
|
||||
//
|
||||
|
||||
package antlr
|
||||
|
||||
import "fmt"
|
||||
|
||||
// TokenFactory creates CommonToken objects.
|
||||
type TokenFactory interface {
|
||||
Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token
|
||||
}
|
||||
|
||||
// CommonTokenFactory is the default TokenFactory implementation.
|
||||
type CommonTokenFactory struct {
|
||||
// copyText indicates whether CommonToken.setText should be called after
|
||||
// constructing tokens to explicitly set the text. This is useful for cases
|
||||
// where the input stream might not be able to provide arbitrary substrings of
|
||||
// text from the input after the lexer creates a token (e.g. the
|
||||
// implementation of CharStream.GetText in UnbufferedCharStream panics an
|
||||
// UnsupportedOperationException). Explicitly setting the token text allows
|
||||
// Token.GetText to be called at any time regardless of the input stream
|
||||
// implementation.
|
||||
//
|
||||
// The default value is false to avoid the performance and memory overhead of
|
||||
// copying text for every token unless explicitly requested.
|
||||
copyText bool
|
||||
}
|
||||
|
||||
func NewCommonTokenFactory(copyText bool) *CommonTokenFactory {
|
||||
|
||||
tf := new(CommonTokenFactory)
|
||||
|
||||
// Indicates whether {@link CommonToken//setText} should be called after
|
||||
// constructing tokens to explicitly set the text. This is useful for cases
|
||||
// where the input stream might not be able to provide arbitrary substrings
|
||||
// of text from the input after the lexer creates a token (e.g. the
|
||||
// implementation of {@link CharStream//GetText} in
|
||||
// {@link UnbufferedCharStream} panics an
|
||||
// {@link UnsupportedOperationException}). Explicitly setting the token text
|
||||
// allows {@link Token//GetText} to be called at any time regardless of the
|
||||
// input stream implementation.
|
||||
//
|
||||
// <p>
|
||||
// The default value is {@code false} to avoid the performance and memory
|
||||
// overhead of copying text for every token unless explicitly requested.</p>
|
||||
//
|
||||
tf.copyText = copyText
|
||||
|
||||
return tf
|
||||
return &CommonTokenFactory{copyText: copyText}
|
||||
}
|
||||
|
||||
//
|
||||
// The default {@link CommonTokenFactory} instance.
|
||||
//
|
||||
// <p>
|
||||
// This token factory does not explicitly copy token text when constructing
|
||||
// tokens.</p>
|
||||
//
|
||||
// CommonTokenFactoryDEFAULT is the default CommonTokenFactory. It does not
|
||||
// explicitly copy token text when constructing tokens.
|
||||
var CommonTokenFactoryDEFAULT = NewCommonTokenFactory(false)
|
||||
|
||||
func (c *CommonTokenFactory) Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token {
|
||||
|
||||
if PortDebug {
|
||||
fmt.Println("Token factory creating: " + text)
|
||||
}
|
||||
|
||||
var t = NewCommonToken(source, ttype, channel, start, stop)
|
||||
|
||||
t.line = line
|
||||
t.column = column
|
||||
|
||||
if text != "" {
|
||||
t.SetText(text)
|
||||
} else if c.copyText && source.charStream != nil {
|
||||
|
@ -63,16 +48,16 @@ func (c *CommonTokenFactory) Create(source *TokenSourceCharStreamPair, ttype int
|
|||
}
|
||||
|
||||
return t
|
||||
|
||||
}
|
||||
|
||||
func (c *CommonTokenFactory) createThin(ttype int, text string) Token {
|
||||
|
||||
if PortDebug {
|
||||
fmt.Println("Token factory creating: " + text)
|
||||
}
|
||||
|
||||
var t = NewCommonToken(nil, ttype, TokenDefaultChannel, -1, -1)
|
||||
|
||||
t.SetText(text)
|
||||
|
||||
return t
|
||||
}
|
||||
|
|
|
@ -1,14 +1,3 @@
|
|||
// This implementation of {@link TokenStream} loads tokens from a
|
||||
// {@link TokenSource} on-demand, and places the tokens in a buffer to provide
|
||||
// access to any previous token by index.
|
||||
//
|
||||
// <p>
|
||||
// This token stream ignores the value of {@link Token//getChannel}. If your
|
||||
// parser requires the token stream filter tokens to only those on a particular
|
||||
// channel, such as {@link Token//DEFAULT_CHANNEL} or
|
||||
// {@link Token//HIDDEN_CHANNEL}, use a filtering token stream such a
|
||||
// {@link CommonTokenStream}.</p>
|
||||
|
||||
package antlr
|
||||
|
||||
import (
|
||||
|
@ -16,58 +5,49 @@ import (
|
|||
"strconv"
|
||||
)
|
||||
|
||||
// CommonTokenStream is an implementation of TokenStream that loads tokens from
|
||||
// a TokenSource on-demand and places the tokens in a buffer to provide access
|
||||
// to any previous token by index. This token stream ignores the value of
|
||||
// Token.getChannel. If your parser requires the token stream filter tokens to
|
||||
// only those on a particular channel, such as Token.DEFAULT_CHANNEL or
|
||||
// Token.HIDDEN_CHANNEL, use a filtering token stream such a CommonTokenStream.
|
||||
type CommonTokenStream struct {
|
||||
channel int
|
||||
|
||||
// fetchedEOF indicates whether the Token.EOF token has been fetched from
|
||||
// tokenSource and added to tokens. This field improves performance for the
|
||||
// following cases:
|
||||
//
|
||||
// consume: The lookahead check in consume to preven consuming the EOF symbol is
|
||||
// optimized by checking the values of fetchedEOF and p instead of calling LA.
|
||||
//
|
||||
// fetch: The check to prevent adding multiple EOF symbols into tokens is
|
||||
// trivial with bt field.
|
||||
fetchedEOF bool
|
||||
|
||||
// index indexs into tokens of the current token (next token to consume).
|
||||
// tokens[p] should be LT(1). It is set to -1 when the stream is first
|
||||
// constructed or when SetTokenSource is called, indicating that the first token
|
||||
// has not yet been fetched from the token source. For additional information,
|
||||
// see the documentation of IntStream for a description of initializing methods.
|
||||
index int
|
||||
|
||||
// tokenSource is the TokenSource from which tokens for the bt stream are
|
||||
// fetched.
|
||||
tokenSource TokenSource
|
||||
|
||||
tokens []Token
|
||||
index int
|
||||
fetchedEOF bool
|
||||
channel int
|
||||
// tokens is all tokens fetched from the token source. The list is considered a
|
||||
// complete view of the input once fetchedEOF is set to true.
|
||||
tokens []Token
|
||||
}
|
||||
|
||||
func NewCommonTokenStream(lexer Lexer, channel int) *CommonTokenStream {
|
||||
|
||||
ts := new(CommonTokenStream)
|
||||
|
||||
// The {@link TokenSource} from which tokens for bt stream are fetched.
|
||||
ts.tokenSource = lexer
|
||||
|
||||
// A collection of all tokens fetched from the token source. The list is
|
||||
// considered a complete view of the input once {@link //fetchedEOF} is set
|
||||
// to {@code true}.
|
||||
ts.tokens = make([]Token, 0)
|
||||
|
||||
// The index into {@link //tokens} of the current token (next token to
|
||||
// {@link //consume}). {@link //tokens}{@code [}{@link //p}{@code ]} should
|
||||
// be
|
||||
// {@link //LT LT(1)}.
|
||||
//
|
||||
// <p>This field is set to -1 when the stream is first constructed or when
|
||||
// {@link //SetTokenSource} is called, indicating that the first token has
|
||||
// not yet been fetched from the token source. For additional information,
|
||||
// see the documentation of {@link IntStream} for a description of
|
||||
// Initializing Methods.</p>
|
||||
ts.index = -1
|
||||
|
||||
// Indicates whether the {@link Token//EOF} token has been fetched from
|
||||
// {@link //tokenSource} and added to {@link //tokens}. This field improves
|
||||
// performance for the following cases:
|
||||
//
|
||||
// <ul>
|
||||
// <li>{@link //consume}: The lookahead check in {@link //consume} to
|
||||
// prevent
|
||||
// consuming the EOF symbol is optimized by checking the values of
|
||||
// {@link //fetchedEOF} and {@link //p} instead of calling {@link
|
||||
// //LA}.</li>
|
||||
// <li>{@link //fetch}: The check to prevent adding multiple EOF symbols
|
||||
// into
|
||||
// {@link //tokens} is trivial with bt field.</li>
|
||||
// <ul>
|
||||
ts.fetchedEOF = false
|
||||
|
||||
ts.channel = channel
|
||||
|
||||
return ts
|
||||
return &CommonTokenStream{
|
||||
channel: channel,
|
||||
index: -1,
|
||||
tokenSource: lexer,
|
||||
tokens: make([]Token, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *CommonTokenStream) GetAllTokens() []Token {
|
||||
|
@ -78,9 +58,7 @@ func (c *CommonTokenStream) Mark() int {
|
|||
return 0
|
||||
}
|
||||
|
||||
func (c *CommonTokenStream) Release(marker int) {
|
||||
// no resources to release
|
||||
}
|
||||
func (c *CommonTokenStream) Release(marker int) {}
|
||||
|
||||
func (c *CommonTokenStream) reset() {
|
||||
c.Seek(0)
|
||||
|
@ -93,61 +71,64 @@ func (c *CommonTokenStream) Seek(index int) {
|
|||
|
||||
func (c *CommonTokenStream) Get(index int) Token {
|
||||
c.lazyInit()
|
||||
|
||||
return c.tokens[index]
|
||||
}
|
||||
|
||||
func (c *CommonTokenStream) Consume() {
|
||||
var SkipEOFCheck = false
|
||||
|
||||
if c.index >= 0 {
|
||||
if c.fetchedEOF {
|
||||
// the last token in tokens is EOF. Skip check if p indexes any
|
||||
// fetched token except the last.
|
||||
// The last token in tokens is EOF. Skip the check if p indexes any fetched.
|
||||
// token except the last.
|
||||
SkipEOFCheck = c.index < len(c.tokens)-1
|
||||
} else {
|
||||
// no EOF token in tokens. Skip check if p indexes a fetched token.
|
||||
// No EOF token in tokens. Skip the check if p indexes a fetched token.
|
||||
SkipEOFCheck = c.index < len(c.tokens)
|
||||
}
|
||||
} else {
|
||||
// not yet initialized
|
||||
// Not yet initialized
|
||||
SkipEOFCheck = false
|
||||
}
|
||||
|
||||
if PortDebug {
|
||||
fmt.Println("Consume 1")
|
||||
}
|
||||
|
||||
if !SkipEOFCheck && c.LA(1) == TokenEOF {
|
||||
panic("cannot consume EOF")
|
||||
}
|
||||
|
||||
if c.Sync(c.index + 1) {
|
||||
if PortDebug {
|
||||
fmt.Println("Consume 2")
|
||||
}
|
||||
|
||||
c.index = c.adjustSeekIndex(c.index + 1)
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure index {@code i} in tokens has a token.
|
||||
//
|
||||
// @return {@code true} if a token is located at index {@code i}, otherwise
|
||||
// {@code false}.
|
||||
// @see //Get(int i)
|
||||
// /
|
||||
// Sync makes sure index i in tokens has a token and returns true if a token is
|
||||
// located at index i and otherwise false.
|
||||
func (c *CommonTokenStream) Sync(i int) bool {
|
||||
var n = i - len(c.tokens) + 1 // how many more elements we need?
|
||||
var n = i - len(c.tokens) + 1 // TODO: How many more elements do we need?
|
||||
|
||||
if n > 0 {
|
||||
var fetched = c.fetch(n)
|
||||
|
||||
if PortDebug {
|
||||
fmt.Println("Sync done")
|
||||
}
|
||||
|
||||
return fetched >= n
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Add {@code n} elements to buffer.
|
||||
//
|
||||
// @return The actual number of elements added to the buffer.
|
||||
// /
|
||||
// fetch adds n elements to buffer and returns the actual number of elements
|
||||
// added to the buffer.
|
||||
func (c *CommonTokenStream) fetch(n int) int {
|
||||
if c.fetchedEOF {
|
||||
return 0
|
||||
|
@ -155,13 +136,17 @@ func (c *CommonTokenStream) fetch(n int) int {
|
|||
|
||||
for i := 0; i < n; i++ {
|
||||
var t = c.tokenSource.NextToken()
|
||||
|
||||
if PortDebug {
|
||||
fmt.Println("fetch loop")
|
||||
}
|
||||
|
||||
t.SetTokenIndex(len(c.tokens))
|
||||
c.tokens = append(c.tokens, t)
|
||||
|
||||
if t.GetTokenType() == TokenEOF {
|
||||
c.fetchedEOF = true
|
||||
|
||||
return i + 1
|
||||
}
|
||||
}
|
||||
|
@ -169,29 +154,36 @@ func (c *CommonTokenStream) fetch(n int) int {
|
|||
if PortDebug {
|
||||
fmt.Println("fetch done")
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
// Get all tokens from start..stop inclusively///
|
||||
// GetTokens gets all tokens from start to stop inclusive.
|
||||
func (c *CommonTokenStream) GetTokens(start int, stop int, types *IntervalSet) []Token {
|
||||
|
||||
if start < 0 || stop < 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
c.lazyInit()
|
||||
|
||||
var subset = make([]Token, 0)
|
||||
|
||||
if stop >= len(c.tokens) {
|
||||
stop = len(c.tokens) - 1
|
||||
}
|
||||
|
||||
for i := start; i < stop; i++ {
|
||||
var t = c.tokens[i]
|
||||
|
||||
if t.GetTokenType() == TokenEOF {
|
||||
break
|
||||
}
|
||||
|
||||
if types == nil || types.contains(t.GetTokenType()) {
|
||||
subset = append(subset, t)
|
||||
}
|
||||
}
|
||||
|
||||
return subset
|
||||
}
|
||||
|
||||
|
@ -214,86 +206,103 @@ func (c *CommonTokenStream) GetTokenSource() TokenSource {
|
|||
return c.tokenSource
|
||||
}
|
||||
|
||||
// Reset c token stream by setting its token source.///
|
||||
// SetTokenSource resets the c token stream by setting its token source.
|
||||
func (c *CommonTokenStream) SetTokenSource(tokenSource TokenSource) {
|
||||
c.tokenSource = tokenSource
|
||||
c.tokens = make([]Token, 0)
|
||||
c.index = -1
|
||||
}
|
||||
|
||||
// Given a starting index, return the index of the next token on channel.
|
||||
// Return i if tokens[i] is on channel. Return -1 if there are no tokens
|
||||
// on channel between i and EOF.
|
||||
// /
|
||||
// NextTokenOnChannel returns the index of the next token on channel given a
|
||||
// starting index. Returns i if tokens[i] is on channel. Returns -1 if there are
|
||||
// no tokens on channel between i and EOF.
|
||||
func (c *CommonTokenStream) NextTokenOnChannel(i, channel int) int {
|
||||
c.Sync(i)
|
||||
|
||||
if i >= len(c.tokens) {
|
||||
return -1
|
||||
}
|
||||
|
||||
var token = c.tokens[i]
|
||||
|
||||
for token.GetChannel() != c.channel {
|
||||
if token.GetTokenType() == TokenEOF {
|
||||
return -1
|
||||
}
|
||||
|
||||
i++
|
||||
c.Sync(i)
|
||||
token = c.tokens[i]
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
// Given a starting index, return the index of the previous token on channel.
|
||||
// Return i if tokens[i] is on channel. Return -1 if there are no tokens
|
||||
// on channel between i and 0.
|
||||
// previousTokenOnChannel returns the index of the previous token on channel
|
||||
// given a starting index. Returns i if tokens[i] is on channel. Returns -1 if
|
||||
// there are no tokens on channel between i and 0.
|
||||
func (c *CommonTokenStream) previousTokenOnChannel(i, channel int) int {
|
||||
for i >= 0 && c.tokens[i].GetChannel() != channel {
|
||||
i--
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
// Collect all tokens on specified channel to the right of
|
||||
// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
|
||||
// EOF. If channel is -1, find any non default channel token.
|
||||
// getHiddenTokensToRight collects all tokens on a specified channel to the
|
||||
// right of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL
|
||||
// or EOF. If channel is -1, it finds any non-default channel token.
|
||||
func (c *CommonTokenStream) getHiddenTokensToRight(tokenIndex, channel int) []Token {
|
||||
c.lazyInit()
|
||||
|
||||
if tokenIndex < 0 || tokenIndex >= len(c.tokens) {
|
||||
panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1))
|
||||
}
|
||||
|
||||
var nextOnChannel = c.NextTokenOnChannel(tokenIndex+1, LexerDefaultTokenChannel)
|
||||
var from = tokenIndex + 1
|
||||
// if none onchannel to right, nextOnChannel=-1 so set to = last token
|
||||
|
||||
// If no onchannel to the right, then nextOnChannel == -1, so set to to last token
|
||||
var to int
|
||||
|
||||
if nextOnChannel == -1 {
|
||||
to = len(c.tokens) - 1
|
||||
} else {
|
||||
to = nextOnChannel
|
||||
}
|
||||
|
||||
return c.filterForChannel(from, to, channel)
|
||||
}
|
||||
|
||||
// Collect all tokens on specified channel to the left of
|
||||
// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
|
||||
// If channel is -1, find any non default channel token.
|
||||
// getHiddenTokensToLeft collects all tokens on channel to the left of the
|
||||
// current token until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is
|
||||
// -1, it finds any non default channel token.
|
||||
func (c *CommonTokenStream) getHiddenTokensToLeft(tokenIndex, channel int) []Token {
|
||||
c.lazyInit()
|
||||
|
||||
if tokenIndex < 0 || tokenIndex >= len(c.tokens) {
|
||||
panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1))
|
||||
}
|
||||
|
||||
var prevOnChannel = c.previousTokenOnChannel(tokenIndex-1, LexerDefaultTokenChannel)
|
||||
|
||||
if prevOnChannel == tokenIndex-1 {
|
||||
return nil
|
||||
}
|
||||
// if none on channel to left, prevOnChannel=-1 then from=0
|
||||
|
||||
// If there are none on channel to the left and prevOnChannel == -1 then from = 0
|
||||
var from = prevOnChannel + 1
|
||||
var to = tokenIndex - 1
|
||||
|
||||
return c.filterForChannel(from, to, channel)
|
||||
}
|
||||
|
||||
func (c *CommonTokenStream) filterForChannel(left, right, channel int) []Token {
|
||||
var hidden = make([]Token, 0)
|
||||
|
||||
for i := left; i < right+1; i++ {
|
||||
var t = c.tokens[i]
|
||||
|
||||
if channel == -1 {
|
||||
if t.GetChannel() != LexerDefaultTokenChannel {
|
||||
hidden = append(hidden, t)
|
||||
|
@ -302,9 +311,11 @@ func (c *CommonTokenStream) filterForChannel(left, right, channel int) []Token {
|
|||
hidden = append(hidden, t)
|
||||
}
|
||||
}
|
||||
|
||||
if len(hidden) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return hidden
|
||||
}
|
||||
|
||||
|
@ -337,37 +348,43 @@ func (c *CommonTokenStream) GetTextFromRuleContext(interval RuleContext) string
|
|||
}
|
||||
|
||||
func (c *CommonTokenStream) GetTextFromInterval(interval *Interval) string {
|
||||
|
||||
c.lazyInit()
|
||||
c.Fill()
|
||||
|
||||
if interval == nil {
|
||||
interval = NewInterval(0, len(c.tokens)-1)
|
||||
}
|
||||
|
||||
var start = interval.start
|
||||
var stop = interval.stop
|
||||
|
||||
if start < 0 || stop < 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
if stop >= len(c.tokens) {
|
||||
stop = len(c.tokens) - 1
|
||||
}
|
||||
|
||||
var s = ""
|
||||
|
||||
for i := start; i < stop+1; i++ {
|
||||
var t = c.tokens[i]
|
||||
|
||||
if t.GetTokenType() == TokenEOF {
|
||||
break
|
||||
}
|
||||
|
||||
s += t.GetText()
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// Get all tokens from lexer until EOF///
|
||||
// Fill gets all tokens from the lexer until EOF.
|
||||
func (c *CommonTokenStream) Fill() {
|
||||
c.lazyInit()
|
||||
|
||||
for c.fetch(1000) == 1000 {
|
||||
continue
|
||||
}
|
||||
|
@ -378,57 +395,71 @@ func (c *CommonTokenStream) adjustSeekIndex(i int) int {
|
|||
}
|
||||
|
||||
func (c *CommonTokenStream) LB(k int) Token {
|
||||
|
||||
if k == 0 || c.index-k < 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var i = c.index
|
||||
var n = 1
|
||||
// find k good tokens looking backwards
|
||||
|
||||
// Find k good tokens looking backward
|
||||
for n <= k {
|
||||
// Skip off-channel tokens
|
||||
i = c.previousTokenOnChannel(i-1, c.channel)
|
||||
n++
|
||||
}
|
||||
|
||||
if i < 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
return c.tokens[i]
|
||||
}
|
||||
|
||||
func (c *CommonTokenStream) LT(k int) Token {
|
||||
c.lazyInit()
|
||||
|
||||
if k == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if k < 0 {
|
||||
return c.LB(-k)
|
||||
}
|
||||
|
||||
var i = c.index
|
||||
var n = 1 // we know tokens[pos] is a good one
|
||||
// find k good tokens
|
||||
var n = 1 // We know tokens[n] is valid
|
||||
|
||||
// Find k good tokens
|
||||
for n < k {
|
||||
// Skip off-channel tokens, but make sure to not look past EOF
|
||||
if c.Sync(i + 1) {
|
||||
i = c.NextTokenOnChannel(i+1, c.channel)
|
||||
}
|
||||
|
||||
n++
|
||||
}
|
||||
|
||||
return c.tokens[i]
|
||||
}
|
||||
|
||||
// Count EOF just once.///
|
||||
// getNumberOfOnChannelTokens counts EOF once.
|
||||
func (c *CommonTokenStream) getNumberOfOnChannelTokens() int {
|
||||
var n = 0
|
||||
var n int
|
||||
|
||||
c.Fill()
|
||||
|
||||
for i := 0; i < len(c.tokens); i++ {
|
||||
var t = c.tokens[i]
|
||||
|
||||
if t.GetChannel() == c.channel {
|
||||
n++
|
||||
}
|
||||
|
||||
if t.GetTokenType() == TokenEOF {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
|
|
@ -3,104 +3,80 @@ package antlr
|
|||
import "sort"
|
||||
|
||||
type DFA struct {
|
||||
// atnStartState is the ATN state in which this was created
|
||||
atnStartState DecisionState
|
||||
decision int
|
||||
states map[string]*DFAState
|
||||
s0 *DFAState
|
||||
|
||||
decision int
|
||||
|
||||
// states is all the DFA states. Use Map to get the old state back; Set can only
|
||||
// indicate whether it is there.
|
||||
states map[string]*DFAState
|
||||
|
||||
s0 *DFAState
|
||||
|
||||
// precedenceDfa is the backing field for isPrecedenceDfa and setPrecedenceDfa.
|
||||
// True if the DFA is for a precedence decision and false otherwise.
|
||||
precedenceDfa bool
|
||||
}
|
||||
|
||||
func NewDFA(atnStartState DecisionState, decision int) *DFA {
|
||||
|
||||
d := new(DFA)
|
||||
|
||||
// From which ATN state did we create d DFA?
|
||||
d.atnStartState = atnStartState
|
||||
d.decision = decision
|
||||
// A set of all DFA states. Use {@link Map} so we can get old state back
|
||||
// ({@link Set} only allows you to see if it's there).
|
||||
d.states = make(map[string]*DFAState)
|
||||
d.s0 = nil
|
||||
// {@code true} if d DFA is for a precedence decision otherwise,
|
||||
// {@code false}. This is the backing field for {@link //isPrecedenceDfa},
|
||||
// {@link //setPrecedenceDfa}.
|
||||
d.precedenceDfa = false
|
||||
|
||||
return d
|
||||
return &DFA{
|
||||
atnStartState: atnStartState,
|
||||
decision: decision,
|
||||
states: make(map[string]*DFAState),
|
||||
}
|
||||
}
|
||||
|
||||
// Get the start state for a specific precedence value.
|
||||
//
|
||||
// @param precedence The current precedence.
|
||||
// @return The start state corresponding to the specified precedence, or
|
||||
// {@code nil} if no start state exists for the specified precedence.
|
||||
//
|
||||
// @panics IllegalStateException if d is not a precedence DFA.
|
||||
// @see //isPrecedenceDfa()
|
||||
|
||||
// getPrecedenceStartState gets the start state for the current precedence and
|
||||
// returns the start state corresponding to the specified precedence if a start
|
||||
// state exists for the specified precedence and nil otherwise. d must be a
|
||||
// precedence DFA. See also isPrecedenceDfa.
|
||||
func (d *DFA) getPrecedenceStartState(precedence int) *DFAState {
|
||||
if !(d.precedenceDfa) {
|
||||
panic("Only precedence DFAs may contain a precedence start state.")
|
||||
if !d.precedenceDfa {
|
||||
panic("only precedence DFAs may contain a precedence start state")
|
||||
}
|
||||
|
||||
// s0.edges is never nil for a precedence DFA
|
||||
if precedence < 0 || precedence >= len(d.s0.edges) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return d.s0.edges[precedence]
|
||||
}
|
||||
|
||||
// Set the start state for a specific precedence value.
|
||||
//
|
||||
// @param precedence The current precedence.
|
||||
// @param startState The start state corresponding to the specified
|
||||
// precedence.
|
||||
//
|
||||
// @panics IllegalStateException if d is not a precedence DFA.
|
||||
// @see //isPrecedenceDfa()
|
||||
//
|
||||
// setPrecedenceStartState sets the start state for the current precedence. d
|
||||
// must be a precedence DFA. See also isPrecedenceDfa.
|
||||
func (d *DFA) setPrecedenceStartState(precedence int, startState *DFAState) {
|
||||
if !(d.precedenceDfa) {
|
||||
panic("Only precedence DFAs may contain a precedence start state.")
|
||||
if !d.precedenceDfa {
|
||||
panic("only precedence DFAs may contain a precedence start state")
|
||||
}
|
||||
|
||||
if precedence < 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Synchronization on s0 here is ok. when the DFA is turned into a
|
||||
// precedence DFA, s0 will be initialized once and not updated again
|
||||
// s0.edges is never nil for a precedence DFA
|
||||
|
||||
// s0.edges is never null for a precedence DFA
|
||||
// Synchronization on s0 here is ok. When the DFA is turned into a
|
||||
// precedence DFA, s0 will be initialized once and not updated again. s0.edges
|
||||
// is never nil for a precedence DFA.
|
||||
if precedence >= len(d.s0.edges) {
|
||||
// enlarge the slice
|
||||
d.s0.edges = append(d.s0.edges, make([]*DFAState, precedence+1-len(d.s0.edges))...)
|
||||
}
|
||||
|
||||
d.s0.edges[precedence] = startState
|
||||
}
|
||||
|
||||
//
|
||||
// Sets whether d is a precedence DFA. If the specified value differs
|
||||
// from the current DFA configuration, the following actions are taken
|
||||
// otherwise no changes are made to the current DFA.
|
||||
//
|
||||
// <ul>
|
||||
// <li>The {@link //states} map is cleared</li>
|
||||
// <li>If {@code precedenceDfa} is {@code false}, the initial state
|
||||
// {@link //s0} is set to {@code nil} otherwise, it is initialized to a new
|
||||
// {@link DFAState} with an empty outgoing {@link DFAState//edges} array to
|
||||
// store the start states for individual precedence values.</li>
|
||||
// <li>The {@link //precedenceDfa} field is updated</li>
|
||||
// </ul>
|
||||
//
|
||||
// @param precedenceDfa {@code true} if d is a precedence DFA otherwise,
|
||||
// {@code false}
|
||||
|
||||
// setPrecedenceDfa sets whether d is a precedence DFA. If precedenceDfa differs
|
||||
// from the current DFA configuration, then d.states is cleared, the initial
|
||||
// state s0 is set to a new DFAState with an empty outgoing DFAState.edges to
|
||||
// store the start states for individual precedence values if precedenceDfa is
|
||||
// true or nil otherwise, and d.precedenceDfa is updated.
|
||||
func (d *DFA) setPrecedenceDfa(precedenceDfa bool) {
|
||||
if d.precedenceDfa != precedenceDfa {
|
||||
d.states = make(map[string]*DFAState)
|
||||
|
||||
if precedenceDfa {
|
||||
var precedenceState = NewDFAState(-1, NewBaseATNConfigSet(false))
|
||||
|
||||
precedenceState.edges = make([]*DFAState, 0)
|
||||
precedenceState.isAcceptState = false
|
||||
precedenceState.requiresFullContext = false
|
||||
|
@ -108,6 +84,7 @@ func (d *DFA) setPrecedenceDfa(precedenceDfa bool) {
|
|||
} else {
|
||||
d.s0 = nil
|
||||
}
|
||||
|
||||
d.precedenceDfa = precedenceDfa
|
||||
}
|
||||
}
|
||||
|
@ -118,22 +95,20 @@ func (d *DFA) GetStates() map[string]*DFAState {
|
|||
|
||||
type DFAStateList []*DFAState
|
||||
|
||||
func (a DFAStateList) Len() int { return len(a) }
|
||||
func (a DFAStateList) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a DFAStateList) Less(i, j int) bool { return a[i].stateNumber < a[j].stateNumber }
|
||||
func (d DFAStateList) Len() int { return len(d) }
|
||||
func (d DFAStateList) Less(i, j int) bool { return d[i].stateNumber < d[j].stateNumber }
|
||||
func (d DFAStateList) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
|
||||
|
||||
// Return a list of all states in d DFA, ordered by state number.
|
||||
// sortedStates returns the states in d sorted by their state number.
|
||||
func (d *DFA) sortedStates() []*DFAState {
|
||||
var vs = make([]*DFAState, 0, len(d.states))
|
||||
|
||||
// extract the values
|
||||
vs := make([]*DFAState, len(d.states))
|
||||
i := 0
|
||||
for _, v := range d.states {
|
||||
vs[i] = v
|
||||
i++
|
||||
vs = append(vs, v)
|
||||
}
|
||||
|
||||
sort.Sort(DFAStateList(vs))
|
||||
|
||||
return vs
|
||||
}
|
||||
|
||||
|
@ -141,14 +116,14 @@ func (d *DFA) String(literalNames []string, symbolicNames []string) string {
|
|||
if d.s0 == nil {
|
||||
return ""
|
||||
}
|
||||
var serializer = NewDFASerializer(d, literalNames, symbolicNames)
|
||||
return serializer.String()
|
||||
|
||||
return NewDFASerializer(d, literalNames, symbolicNames).String()
|
||||
}
|
||||
|
||||
func (d *DFA) ToLexerString() string {
|
||||
if d.s0 == nil {
|
||||
return ""
|
||||
}
|
||||
var serializer = NewLexerDFASerializer(d)
|
||||
return serializer.String()
|
||||
|
||||
return NewLexerDFASerializer(d).String()
|
||||
}
|
||||
|
|
|
@ -5,15 +5,15 @@ import (
|
|||
"strconv"
|
||||
)
|
||||
|
||||
// A DFA walker that knows how to dump them to serialized strings.
|
||||
|
||||
// DFASerializer is a DFA walker that knows how to dump them to serialized
|
||||
// strings.
|
||||
type DFASerializer struct {
|
||||
dfa *DFA
|
||||
literalNames, symbolicNames []string
|
||||
dfa *DFA
|
||||
literalNames []string
|
||||
symbolicNames []string
|
||||
}
|
||||
|
||||
func NewDFASerializer(dfa *DFA, literalNames, symbolicNames []string) *DFASerializer {
|
||||
|
||||
if literalNames == nil {
|
||||
literalNames = make([]string, 0)
|
||||
}
|
||||
|
@ -22,28 +22,28 @@ func NewDFASerializer(dfa *DFA, literalNames, symbolicNames []string) *DFASerial
|
|||
symbolicNames = make([]string, 0)
|
||||
}
|
||||
|
||||
d := new(DFASerializer)
|
||||
|
||||
d.dfa = dfa
|
||||
d.literalNames = literalNames
|
||||
d.symbolicNames = symbolicNames
|
||||
|
||||
return d
|
||||
return &DFASerializer{
|
||||
dfa: dfa,
|
||||
literalNames: literalNames,
|
||||
symbolicNames: symbolicNames,
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DFASerializer) String() string {
|
||||
|
||||
if d.dfa.s0 == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
var buf = ""
|
||||
var states = d.dfa.sortedStates()
|
||||
|
||||
for _, s := range states {
|
||||
if s.edges != nil {
|
||||
var n = len(s.edges)
|
||||
|
||||
for j := 0; j < n; j++ {
|
||||
var t = s.edges[j]
|
||||
|
||||
if t != nil && t.stateNumber != 0x7FFFFFFF {
|
||||
buf += d.GetStateString(s)
|
||||
buf += "-"
|
||||
|
@ -55,6 +55,7 @@ func (d *DFASerializer) String() string {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(buf) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
@ -75,7 +76,6 @@ func (d *DFASerializer) getEdgeLabel(i int) string {
|
|||
}
|
||||
|
||||
func (d *DFASerializer) GetStateString(s *DFAState) string {
|
||||
|
||||
var a, b string
|
||||
|
||||
if s.isAcceptState {
|
||||
|
@ -87,6 +87,7 @@ func (d *DFASerializer) GetStateString(s *DFAState) string {
|
|||
}
|
||||
|
||||
var baseStateStr = a + "s" + strconv.Itoa(s.stateNumber) + b
|
||||
|
||||
if s.isAcceptState {
|
||||
if s.predicates != nil {
|
||||
return baseStateStr + "=>" + fmt.Sprint(s.predicates)
|
||||
|
@ -103,12 +104,7 @@ type LexerDFASerializer struct {
|
|||
}
|
||||
|
||||
func NewLexerDFASerializer(dfa *DFA) *LexerDFASerializer {
|
||||
|
||||
l := new(LexerDFASerializer)
|
||||
|
||||
l.DFASerializer = NewDFASerializer(dfa, nil, nil)
|
||||
|
||||
return l
|
||||
return &LexerDFASerializer{DFASerializer: NewDFASerializer(dfa, nil, nil)}
|
||||
}
|
||||
|
||||
func (l *LexerDFASerializer) getEdgeLabel(i int) string {
|
||||
|
@ -116,19 +112,22 @@ func (l *LexerDFASerializer) getEdgeLabel(i int) string {
|
|||
}
|
||||
|
||||
func (l *LexerDFASerializer) String() string {
|
||||
|
||||
if l.dfa.s0 == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
var buf = ""
|
||||
var states = l.dfa.sortedStates()
|
||||
|
||||
for i := 0; i < len(states); i++ {
|
||||
var s = states[i]
|
||||
|
||||
if s.edges != nil {
|
||||
var n = len(s.edges)
|
||||
|
||||
for j := 0; j < n; j++ {
|
||||
var t = s.edges[j]
|
||||
|
||||
if t != nil && t.stateNumber != 0x7FFFFFFF {
|
||||
buf += l.GetStateString(s)
|
||||
buf += "-"
|
||||
|
@ -140,6 +139,7 @@ func (l *LexerDFASerializer) String() string {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(buf) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
|
|
@ -5,114 +5,97 @@ import (
|
|||
"strconv"
|
||||
)
|
||||
|
||||
// Map a predicate to a predicted alternative.///
|
||||
|
||||
// PredPrediction maps a predicate to a predicted alternative.
|
||||
type PredPrediction struct {
|
||||
alt int
|
||||
pred SemanticContext
|
||||
}
|
||||
|
||||
func NewPredPrediction(pred SemanticContext, alt int) *PredPrediction {
|
||||
p := new(PredPrediction)
|
||||
|
||||
p.alt = alt
|
||||
p.pred = pred
|
||||
|
||||
return p
|
||||
return &PredPrediction{alt: alt, pred: pred}
|
||||
}
|
||||
|
||||
func (p *PredPrediction) String() string {
|
||||
return "(" + fmt.Sprint(p.pred) + ", " + fmt.Sprint(p.alt) + ")"
|
||||
}
|
||||
|
||||
// A DFA state represents a set of possible ATN configurations.
|
||||
// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
|
||||
// to keep track of all possible states the ATN can be in after
|
||||
// reading each input symbol. That is to say, after reading
|
||||
// input a1a2..an, the DFA is in a state that represents the
|
||||
// subset T of the states of the ATN that are reachable from the
|
||||
// ATN's start state along some path labeled a1a2..an."
|
||||
// In conventional NFA&rarrDFA conversion, therefore, the subset T
|
||||
// would be a bitset representing the set of states the
|
||||
// ATN could be in. We need to track the alt predicted by each
|
||||
// state as well, however. More importantly, we need to maintain
|
||||
// a stack of states, tracking the closure operations as they
|
||||
// jump from rule to rule, emulating rule invocations (method calls).
|
||||
// I have to add a stack to simulate the proper lookahead sequences for
|
||||
// the underlying LL grammar from which the ATN was derived.
|
||||
// DFAState represents a set of possible ATN configurations. As Aho, Sethi,
|
||||
// Ullman p. 117 says: "The DFA uses its state to keep track of all possible
|
||||
// states the ATN can be in after reading each input symbol. That is to say,
|
||||
// after reading input a1a2..an, the DFA is in a state that represents the
|
||||
// subset T of the states of the ATN that are reachable from the ATN's start
|
||||
// state along some path labeled a1a2..an." In conventional NFA-to-DFA
|
||||
// conversion, therefore, the subset T would be a bitset representing the set of
|
||||
// states the ATN could be in. We need to track the alt predicted by each state
|
||||
// as well, however. More importantly, we need to maintain a stack of states,
|
||||
// tracking the closure operations as they jump from rule to rule, emulating
|
||||
// rule invocations (method calls). I have to add a stack to simulate the proper
|
||||
// lookahead sequences for the underlying LL grammar from which the ATN was
|
||||
// derived.
|
||||
//
|
||||
// <p>I use a set of ATNConfig objects not simple states. An ATNConfig
|
||||
// is both a state (ala normal conversion) and a RuleContext describing
|
||||
// the chain of rules (if any) followed to arrive at that state.</p>
|
||||
// I use a set of ATNConfig objects, not simple states. An ATNConfig is both a
|
||||
// state (ala normal conversion) and a RuleContext describing the chain of rules
|
||||
// (if any) followed to arrive at that state.
|
||||
//
|
||||
// <p>A DFA state may have multiple references to a particular state,
|
||||
// but with different ATN contexts (with same or different alts)
|
||||
// meaning that state was reached via a different set of rule invocations.</p>
|
||||
// /
|
||||
|
||||
// A DFAState may have multiple references to a particular state, but with
|
||||
// different ATN contexts (with same or different alts) meaning that state was
|
||||
// reached via a different set of rule invocations.
|
||||
type DFAState struct {
|
||||
stateNumber int
|
||||
configs ATNConfigSet
|
||||
edges []*DFAState
|
||||
isAcceptState bool
|
||||
prediction int
|
||||
stateNumber int
|
||||
configs ATNConfigSet
|
||||
|
||||
// edges elements point to the target of the symbol. Shift up by 1 so (-1)
|
||||
// Token.EOF maps to the first element.
|
||||
edges []*DFAState
|
||||
|
||||
isAcceptState bool
|
||||
|
||||
// prediction is the ttype we match or alt we predict if the state is accept.
|
||||
// Set to ATN.INVALID_ALT_NUMBER when predicates != nil or
|
||||
// requiresFullContext.
|
||||
prediction int
|
||||
|
||||
lexerActionExecutor *LexerActionExecutor
|
||||
|
||||
// requiresFullContext indicates it was created during an SLL prediction that
|
||||
// discovered a conflict between the configurations in the state. Future
|
||||
// ParserATNSimulator.execATN invocations immediately jump doing
|
||||
// full context prediction if true.
|
||||
requiresFullContext bool
|
||||
predicates []*PredPrediction
|
||||
|
||||
// predicates is the predicates associated with the ATN configurations of the
|
||||
// DFA state during SLL parsing. When we have predicates, requiresFullContext
|
||||
// is false, since full context prediction evaluates predicates on-the-fly. If
|
||||
// d is
|
||||
// not nil, then prediction is ATN.INVALID_ALT_NUMBER.
|
||||
//
|
||||
// We only use these for non-requiresFullContext but conflicting states. That
|
||||
// means we know from the context (it's $ or we don't dip into outer context)
|
||||
// that it's an ambiguity not a conflict.
|
||||
//
|
||||
// This list is computed by
|
||||
// ParserATNSimulator.predicateDFAState.
|
||||
predicates []*PredPrediction
|
||||
}
|
||||
|
||||
func NewDFAState(stateNumber int, configs ATNConfigSet) *DFAState {
|
||||
|
||||
if configs == nil {
|
||||
configs = NewBaseATNConfigSet(false)
|
||||
}
|
||||
|
||||
d := new(DFAState)
|
||||
|
||||
d.stateNumber = stateNumber
|
||||
d.configs = configs
|
||||
// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1)
|
||||
// {@link Token//EOF} maps to {@code edges[0]}.
|
||||
d.edges = nil
|
||||
d.isAcceptState = false
|
||||
// if accept state, what ttype do we Match or alt do we predict?
|
||||
// This is set to {@link ATN//INVALID_ALT_NUMBER} when {@link
|
||||
// //predicates}{@code !=nil} or
|
||||
// {@link //requiresFullContext}.
|
||||
d.prediction = 0
|
||||
d.lexerActionExecutor = nil
|
||||
// Indicates that d state was created during SLL prediction that
|
||||
// discovered a conflict between the configurations in the state. Future
|
||||
// {@link ParserATNSimulator//execATN} invocations immediately jumped doing
|
||||
// full context prediction if d field is true.
|
||||
d.requiresFullContext = false
|
||||
// During SLL parsing, d is a list of predicates associated with the
|
||||
// ATN configurations of the DFA state. When we have predicates,
|
||||
// {@link //requiresFullContext} is {@code false} since full context
|
||||
// prediction evaluates predicates
|
||||
// on-the-fly. If d is not nil, then {@link //prediction} is
|
||||
// {@link ATN//INVALID_ALT_NUMBER}.
|
||||
//
|
||||
// <p>We only use these for non-{@link //requiresFullContext} but
|
||||
// conflicting states. That
|
||||
// means we know from the context (it's $ or we don't dip into outer
|
||||
// context) that it's an ambiguity not a conflict.</p>
|
||||
//
|
||||
// <p>This list is computed by {@link
|
||||
// ParserATNSimulator//predicateDFAState}.</p>
|
||||
d.predicates = nil
|
||||
return d
|
||||
return &DFAState{configs: configs, stateNumber: stateNumber}
|
||||
}
|
||||
|
||||
// Get the set of all alts mentioned by all ATN configurations in d
|
||||
// DFA state.
|
||||
// GetAltSet gets the set of all alts mentioned by all ATN configurations in d.
|
||||
func (d *DFAState) GetAltSet() *Set {
|
||||
var alts = NewSet(nil, nil)
|
||||
|
||||
if d.configs != nil {
|
||||
for _, c := range d.configs.GetItems() {
|
||||
alts.add(c.GetAlt())
|
||||
}
|
||||
}
|
||||
|
||||
if alts.length() == 0 {
|
||||
return nil
|
||||
}
|
||||
|
@ -124,20 +107,18 @@ func (d *DFAState) setPrediction(v int) {
|
|||
d.prediction = v
|
||||
}
|
||||
|
||||
// Two {@link DFAState} instances are equal if their ATN configuration sets
|
||||
// are the same. This method is used to see if a state already exists.
|
||||
// equals returns whether d equals other. Two DFAStates are equal if their ATN
|
||||
// configuration sets are the same. This method is used to see if a state
|
||||
// already exists.
|
||||
//
|
||||
// <p>Because the number of alternatives and number of ATN configurations are
|
||||
// finite, there is a finite number of DFA states that can be processed.
|
||||
// This is necessary to show that the algorithm terminates.</p>
|
||||
// Because the number of alternatives and number of ATN configurations are
|
||||
// finite, there is a finite number of DFA states that can be processed. This is
|
||||
// necessary to show that the algorithm terminates.
|
||||
//
|
||||
// <p>Cannot test the DFA state numbers here because in
|
||||
// {@link ParserATNSimulator//addDFAState} we need to know if any other state
|
||||
// exists that has d exact set of ATN configurations. The
|
||||
// {@link //stateNumber} is irrelevant.</p>
|
||||
|
||||
// Cannot test the DFA state numbers here because in
|
||||
// ParserATNSimulator.addDFAState we need to know if any other state exists that
|
||||
// has d exact set of ATN configurations. The stateNumber is irrelevant.
|
||||
func (d *DFAState) equals(other interface{}) bool {
|
||||
|
||||
if d == other {
|
||||
return true
|
||||
} else if _, ok := other.(*DFAState); !ok {
|
||||
|
@ -152,8 +133,8 @@ func (d *DFAState) String() string {
|
|||
}
|
||||
|
||||
func (d *DFAState) Hash() string {
|
||||
|
||||
var s string
|
||||
|
||||
if d.isAcceptState {
|
||||
if d.predicates != nil {
|
||||
s = "=>" + fmt.Sprint(d.predicates)
|
||||
|
|
Loading…
Reference in New Issue