Clean up Go runtime char stream, common token, and dfa code

This commit is contained in:
Will Faught 2016-06-16 11:14:12 -07:00
parent c8a9d75cfe
commit 3e621970f5
6 changed files with 293 additions and 322 deletions

View File

@ -2,7 +2,6 @@ package antlr
type CharStream interface {
IntStream
GetText(int, int) string
GetTextFromTokens(start, end Token) string
GetTextFromInterval(*Interval) string

View File

@ -1,61 +1,46 @@
//
// This default implementation of {@link TokenFactory} creates
// {@link CommonToken} objects.
//
package antlr
import "fmt"
// TokenFactory creates CommonToken objects.
type TokenFactory interface {
Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token
}
// CommonTokenFactory is the default TokenFactory implementation.
type CommonTokenFactory struct {
// copyText indicates whether CommonToken.setText should be called after
// constructing tokens to explicitly set the text. This is useful for cases
// where the input stream might not be able to provide arbitrary substrings of
// text from the input after the lexer creates a token (e.g. the
// implementation of CharStream.GetText in UnbufferedCharStream panics an
// UnsupportedOperationException). Explicitly setting the token text allows
// Token.GetText to be called at any time regardless of the input stream
// implementation.
//
// The default value is false to avoid the performance and memory overhead of
// copying text for every token unless explicitly requested.
copyText bool
}
func NewCommonTokenFactory(copyText bool) *CommonTokenFactory {
tf := new(CommonTokenFactory)
// Indicates whether {@link CommonToken//setText} should be called after
// constructing tokens to explicitly set the text. This is useful for cases
// where the input stream might not be able to provide arbitrary substrings
// of text from the input after the lexer creates a token (e.g. the
// implementation of {@link CharStream//GetText} in
// {@link UnbufferedCharStream} panics an
// {@link UnsupportedOperationException}). Explicitly setting the token text
// allows {@link Token//GetText} to be called at any time regardless of the
// input stream implementation.
//
// <p>
// The default value is {@code false} to avoid the performance and memory
// overhead of copying text for every token unless explicitly requested.</p>
//
tf.copyText = copyText
return tf
return &CommonTokenFactory{copyText: copyText}
}
//
// The default {@link CommonTokenFactory} instance.
//
// <p>
// This token factory does not explicitly copy token text when constructing
// tokens.</p>
//
// CommonTokenFactoryDEFAULT is the default CommonTokenFactory. It does not
// explicitly copy token text when constructing tokens.
var CommonTokenFactoryDEFAULT = NewCommonTokenFactory(false)
func (c *CommonTokenFactory) Create(source *TokenSourceCharStreamPair, ttype int, text string, channel, start, stop, line, column int) Token {
if PortDebug {
fmt.Println("Token factory creating: " + text)
}
var t = NewCommonToken(source, ttype, channel, start, stop)
t.line = line
t.column = column
if text != "" {
t.SetText(text)
} else if c.copyText && source.charStream != nil {
@ -63,16 +48,16 @@ func (c *CommonTokenFactory) Create(source *TokenSourceCharStreamPair, ttype int
}
return t
}
func (c *CommonTokenFactory) createThin(ttype int, text string) Token {
if PortDebug {
fmt.Println("Token factory creating: " + text)
}
var t = NewCommonToken(nil, ttype, TokenDefaultChannel, -1, -1)
t.SetText(text)
return t
}

View File

@ -1,14 +1,3 @@
// This implementation of {@link TokenStream} loads tokens from a
// {@link TokenSource} on-demand, and places the tokens in a buffer to provide
// access to any previous token by index.
//
// <p>
// This token stream ignores the value of {@link Token//getChannel}. If your
// parser requires the token stream filter tokens to only those on a particular
// channel, such as {@link Token//DEFAULT_CHANNEL} or
// {@link Token//HIDDEN_CHANNEL}, use a filtering token stream such a
// {@link CommonTokenStream}.</p>
package antlr
import (
@ -16,58 +5,49 @@ import (
"strconv"
)
// CommonTokenStream is an implementation of TokenStream that loads tokens from
// a TokenSource on-demand and places the tokens in a buffer to provide access
// to any previous token by index. This token stream ignores the value of
// Token.getChannel. If your parser requires the token stream filter tokens to
// only those on a particular channel, such as Token.DEFAULT_CHANNEL or
// Token.HIDDEN_CHANNEL, use a filtering token stream such a CommonTokenStream.
type CommonTokenStream struct {
channel int
// fetchedEOF indicates whether the Token.EOF token has been fetched from
// tokenSource and added to tokens. This field improves performance for the
// following cases:
//
// consume: The lookahead check in consume to preven consuming the EOF symbol is
// optimized by checking the values of fetchedEOF and p instead of calling LA.
//
// fetch: The check to prevent adding multiple EOF symbols into tokens is
// trivial with bt field.
fetchedEOF bool
// index indexs into tokens of the current token (next token to consume).
// tokens[p] should be LT(1). It is set to -1 when the stream is first
// constructed or when SetTokenSource is called, indicating that the first token
// has not yet been fetched from the token source. For additional information,
// see the documentation of IntStream for a description of initializing methods.
index int
// tokenSource is the TokenSource from which tokens for the bt stream are
// fetched.
tokenSource TokenSource
tokens []Token
index int
fetchedEOF bool
channel int
// tokens is all tokens fetched from the token source. The list is considered a
// complete view of the input once fetchedEOF is set to true.
tokens []Token
}
func NewCommonTokenStream(lexer Lexer, channel int) *CommonTokenStream {
ts := new(CommonTokenStream)
// The {@link TokenSource} from which tokens for bt stream are fetched.
ts.tokenSource = lexer
// A collection of all tokens fetched from the token source. The list is
// considered a complete view of the input once {@link //fetchedEOF} is set
// to {@code true}.
ts.tokens = make([]Token, 0)
// The index into {@link //tokens} of the current token (next token to
// {@link //consume}). {@link //tokens}{@code [}{@link //p}{@code ]} should
// be
// {@link //LT LT(1)}.
//
// <p>This field is set to -1 when the stream is first constructed or when
// {@link //SetTokenSource} is called, indicating that the first token has
// not yet been fetched from the token source. For additional information,
// see the documentation of {@link IntStream} for a description of
// Initializing Methods.</p>
ts.index = -1
// Indicates whether the {@link Token//EOF} token has been fetched from
// {@link //tokenSource} and added to {@link //tokens}. This field improves
// performance for the following cases:
//
// <ul>
// <li>{@link //consume}: The lookahead check in {@link //consume} to
// prevent
// consuming the EOF symbol is optimized by checking the values of
// {@link //fetchedEOF} and {@link //p} instead of calling {@link
// //LA}.</li>
// <li>{@link //fetch}: The check to prevent adding multiple EOF symbols
// into
// {@link //tokens} is trivial with bt field.</li>
// <ul>
ts.fetchedEOF = false
ts.channel = channel
return ts
return &CommonTokenStream{
channel: channel,
index: -1,
tokenSource: lexer,
tokens: make([]Token, 0),
}
}
func (c *CommonTokenStream) GetAllTokens() []Token {
@ -78,9 +58,7 @@ func (c *CommonTokenStream) Mark() int {
return 0
}
func (c *CommonTokenStream) Release(marker int) {
// no resources to release
}
func (c *CommonTokenStream) Release(marker int) {}
func (c *CommonTokenStream) reset() {
c.Seek(0)
@ -93,61 +71,64 @@ func (c *CommonTokenStream) Seek(index int) {
func (c *CommonTokenStream) Get(index int) Token {
c.lazyInit()
return c.tokens[index]
}
func (c *CommonTokenStream) Consume() {
var SkipEOFCheck = false
if c.index >= 0 {
if c.fetchedEOF {
// the last token in tokens is EOF. Skip check if p indexes any
// fetched token except the last.
// The last token in tokens is EOF. Skip the check if p indexes any fetched.
// token except the last.
SkipEOFCheck = c.index < len(c.tokens)-1
} else {
// no EOF token in tokens. Skip check if p indexes a fetched token.
// No EOF token in tokens. Skip the check if p indexes a fetched token.
SkipEOFCheck = c.index < len(c.tokens)
}
} else {
// not yet initialized
// Not yet initialized
SkipEOFCheck = false
}
if PortDebug {
fmt.Println("Consume 1")
}
if !SkipEOFCheck && c.LA(1) == TokenEOF {
panic("cannot consume EOF")
}
if c.Sync(c.index + 1) {
if PortDebug {
fmt.Println("Consume 2")
}
c.index = c.adjustSeekIndex(c.index + 1)
}
}
// Make sure index {@code i} in tokens has a token.
//
// @return {@code true} if a token is located at index {@code i}, otherwise
// {@code false}.
// @see //Get(int i)
// /
// Sync makes sure index i in tokens has a token and returns true if a token is
// located at index i and otherwise false.
func (c *CommonTokenStream) Sync(i int) bool {
var n = i - len(c.tokens) + 1 // how many more elements we need?
var n = i - len(c.tokens) + 1 // TODO: How many more elements do we need?
if n > 0 {
var fetched = c.fetch(n)
if PortDebug {
fmt.Println("Sync done")
}
return fetched >= n
}
return true
}
// Add {@code n} elements to buffer.
//
// @return The actual number of elements added to the buffer.
// /
// fetch adds n elements to buffer and returns the actual number of elements
// added to the buffer.
func (c *CommonTokenStream) fetch(n int) int {
if c.fetchedEOF {
return 0
@ -155,13 +136,17 @@ func (c *CommonTokenStream) fetch(n int) int {
for i := 0; i < n; i++ {
var t = c.tokenSource.NextToken()
if PortDebug {
fmt.Println("fetch loop")
}
t.SetTokenIndex(len(c.tokens))
c.tokens = append(c.tokens, t)
if t.GetTokenType() == TokenEOF {
c.fetchedEOF = true
return i + 1
}
}
@ -169,29 +154,36 @@ func (c *CommonTokenStream) fetch(n int) int {
if PortDebug {
fmt.Println("fetch done")
}
return n
}
// Get all tokens from start..stop inclusively///
// GetTokens gets all tokens from start to stop inclusive.
func (c *CommonTokenStream) GetTokens(start int, stop int, types *IntervalSet) []Token {
if start < 0 || stop < 0 {
return nil
}
c.lazyInit()
var subset = make([]Token, 0)
if stop >= len(c.tokens) {
stop = len(c.tokens) - 1
}
for i := start; i < stop; i++ {
var t = c.tokens[i]
if t.GetTokenType() == TokenEOF {
break
}
if types == nil || types.contains(t.GetTokenType()) {
subset = append(subset, t)
}
}
return subset
}
@ -214,86 +206,103 @@ func (c *CommonTokenStream) GetTokenSource() TokenSource {
return c.tokenSource
}
// Reset c token stream by setting its token source.///
// SetTokenSource resets the c token stream by setting its token source.
func (c *CommonTokenStream) SetTokenSource(tokenSource TokenSource) {
c.tokenSource = tokenSource
c.tokens = make([]Token, 0)
c.index = -1
}
// Given a starting index, return the index of the next token on channel.
// Return i if tokens[i] is on channel. Return -1 if there are no tokens
// on channel between i and EOF.
// /
// NextTokenOnChannel returns the index of the next token on channel given a
// starting index. Returns i if tokens[i] is on channel. Returns -1 if there are
// no tokens on channel between i and EOF.
func (c *CommonTokenStream) NextTokenOnChannel(i, channel int) int {
c.Sync(i)
if i >= len(c.tokens) {
return -1
}
var token = c.tokens[i]
for token.GetChannel() != c.channel {
if token.GetTokenType() == TokenEOF {
return -1
}
i++
c.Sync(i)
token = c.tokens[i]
}
return i
}
// Given a starting index, return the index of the previous token on channel.
// Return i if tokens[i] is on channel. Return -1 if there are no tokens
// on channel between i and 0.
// previousTokenOnChannel returns the index of the previous token on channel
// given a starting index. Returns i if tokens[i] is on channel. Returns -1 if
// there are no tokens on channel between i and 0.
func (c *CommonTokenStream) previousTokenOnChannel(i, channel int) int {
for i >= 0 && c.tokens[i].GetChannel() != channel {
i--
}
return i
}
// Collect all tokens on specified channel to the right of
// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or
// EOF. If channel is -1, find any non default channel token.
// getHiddenTokensToRight collects all tokens on a specified channel to the
// right of the current token up until we see a token on DEFAULT_TOKEN_CHANNEL
// or EOF. If channel is -1, it finds any non-default channel token.
func (c *CommonTokenStream) getHiddenTokensToRight(tokenIndex, channel int) []Token {
c.lazyInit()
if tokenIndex < 0 || tokenIndex >= len(c.tokens) {
panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1))
}
var nextOnChannel = c.NextTokenOnChannel(tokenIndex+1, LexerDefaultTokenChannel)
var from = tokenIndex + 1
// if none onchannel to right, nextOnChannel=-1 so set to = last token
// If no onchannel to the right, then nextOnChannel == -1, so set to to last token
var to int
if nextOnChannel == -1 {
to = len(c.tokens) - 1
} else {
to = nextOnChannel
}
return c.filterForChannel(from, to, channel)
}
// Collect all tokens on specified channel to the left of
// the current token up until we see a token on DEFAULT_TOKEN_CHANNEL.
// If channel is -1, find any non default channel token.
// getHiddenTokensToLeft collects all tokens on channel to the left of the
// current token until we see a token on DEFAULT_TOKEN_CHANNEL. If channel is
// -1, it finds any non default channel token.
func (c *CommonTokenStream) getHiddenTokensToLeft(tokenIndex, channel int) []Token {
c.lazyInit()
if tokenIndex < 0 || tokenIndex >= len(c.tokens) {
panic(strconv.Itoa(tokenIndex) + " not in 0.." + strconv.Itoa(len(c.tokens)-1))
}
var prevOnChannel = c.previousTokenOnChannel(tokenIndex-1, LexerDefaultTokenChannel)
if prevOnChannel == tokenIndex-1 {
return nil
}
// if none on channel to left, prevOnChannel=-1 then from=0
// If there are none on channel to the left and prevOnChannel == -1 then from = 0
var from = prevOnChannel + 1
var to = tokenIndex - 1
return c.filterForChannel(from, to, channel)
}
func (c *CommonTokenStream) filterForChannel(left, right, channel int) []Token {
var hidden = make([]Token, 0)
for i := left; i < right+1; i++ {
var t = c.tokens[i]
if channel == -1 {
if t.GetChannel() != LexerDefaultTokenChannel {
hidden = append(hidden, t)
@ -302,9 +311,11 @@ func (c *CommonTokenStream) filterForChannel(left, right, channel int) []Token {
hidden = append(hidden, t)
}
}
if len(hidden) == 0 {
return nil
}
return hidden
}
@ -337,37 +348,43 @@ func (c *CommonTokenStream) GetTextFromRuleContext(interval RuleContext) string
}
func (c *CommonTokenStream) GetTextFromInterval(interval *Interval) string {
c.lazyInit()
c.Fill()
if interval == nil {
interval = NewInterval(0, len(c.tokens)-1)
}
var start = interval.start
var stop = interval.stop
if start < 0 || stop < 0 {
return ""
}
if stop >= len(c.tokens) {
stop = len(c.tokens) - 1
}
var s = ""
for i := start; i < stop+1; i++ {
var t = c.tokens[i]
if t.GetTokenType() == TokenEOF {
break
}
s += t.GetText()
}
return s
}
// Get all tokens from lexer until EOF///
// Fill gets all tokens from the lexer until EOF.
func (c *CommonTokenStream) Fill() {
c.lazyInit()
for c.fetch(1000) == 1000 {
continue
}
@ -378,57 +395,71 @@ func (c *CommonTokenStream) adjustSeekIndex(i int) int {
}
func (c *CommonTokenStream) LB(k int) Token {
if k == 0 || c.index-k < 0 {
return nil
}
var i = c.index
var n = 1
// find k good tokens looking backwards
// Find k good tokens looking backward
for n <= k {
// Skip off-channel tokens
i = c.previousTokenOnChannel(i-1, c.channel)
n++
}
if i < 0 {
return nil
}
return c.tokens[i]
}
func (c *CommonTokenStream) LT(k int) Token {
c.lazyInit()
if k == 0 {
return nil
}
if k < 0 {
return c.LB(-k)
}
var i = c.index
var n = 1 // we know tokens[pos] is a good one
// find k good tokens
var n = 1 // We know tokens[n] is valid
// Find k good tokens
for n < k {
// Skip off-channel tokens, but make sure to not look past EOF
if c.Sync(i + 1) {
i = c.NextTokenOnChannel(i+1, c.channel)
}
n++
}
return c.tokens[i]
}
// Count EOF just once.///
// getNumberOfOnChannelTokens counts EOF once.
func (c *CommonTokenStream) getNumberOfOnChannelTokens() int {
var n = 0
var n int
c.Fill()
for i := 0; i < len(c.tokens); i++ {
var t = c.tokens[i]
if t.GetChannel() == c.channel {
n++
}
if t.GetTokenType() == TokenEOF {
break
}
}
return n
}

View File

@ -3,104 +3,80 @@ package antlr
import "sort"
type DFA struct {
// atnStartState is the ATN state in which this was created
atnStartState DecisionState
decision int
states map[string]*DFAState
s0 *DFAState
decision int
// states is all the DFA states. Use Map to get the old state back; Set can only
// indicate whether it is there.
states map[string]*DFAState
s0 *DFAState
// precedenceDfa is the backing field for isPrecedenceDfa and setPrecedenceDfa.
// True if the DFA is for a precedence decision and false otherwise.
precedenceDfa bool
}
func NewDFA(atnStartState DecisionState, decision int) *DFA {
d := new(DFA)
// From which ATN state did we create d DFA?
d.atnStartState = atnStartState
d.decision = decision
// A set of all DFA states. Use {@link Map} so we can get old state back
// ({@link Set} only allows you to see if it's there).
d.states = make(map[string]*DFAState)
d.s0 = nil
// {@code true} if d DFA is for a precedence decision otherwise,
// {@code false}. This is the backing field for {@link //isPrecedenceDfa},
// {@link //setPrecedenceDfa}.
d.precedenceDfa = false
return d
return &DFA{
atnStartState: atnStartState,
decision: decision,
states: make(map[string]*DFAState),
}
}
// Get the start state for a specific precedence value.
//
// @param precedence The current precedence.
// @return The start state corresponding to the specified precedence, or
// {@code nil} if no start state exists for the specified precedence.
//
// @panics IllegalStateException if d is not a precedence DFA.
// @see //isPrecedenceDfa()
// getPrecedenceStartState gets the start state for the current precedence and
// returns the start state corresponding to the specified precedence if a start
// state exists for the specified precedence and nil otherwise. d must be a
// precedence DFA. See also isPrecedenceDfa.
func (d *DFA) getPrecedenceStartState(precedence int) *DFAState {
if !(d.precedenceDfa) {
panic("Only precedence DFAs may contain a precedence start state.")
if !d.precedenceDfa {
panic("only precedence DFAs may contain a precedence start state")
}
// s0.edges is never nil for a precedence DFA
if precedence < 0 || precedence >= len(d.s0.edges) {
return nil
}
return d.s0.edges[precedence]
}
// Set the start state for a specific precedence value.
//
// @param precedence The current precedence.
// @param startState The start state corresponding to the specified
// precedence.
//
// @panics IllegalStateException if d is not a precedence DFA.
// @see //isPrecedenceDfa()
//
// setPrecedenceStartState sets the start state for the current precedence. d
// must be a precedence DFA. See also isPrecedenceDfa.
func (d *DFA) setPrecedenceStartState(precedence int, startState *DFAState) {
if !(d.precedenceDfa) {
panic("Only precedence DFAs may contain a precedence start state.")
if !d.precedenceDfa {
panic("only precedence DFAs may contain a precedence start state")
}
if precedence < 0 {
return
}
// Synchronization on s0 here is ok. when the DFA is turned into a
// precedence DFA, s0 will be initialized once and not updated again
// s0.edges is never nil for a precedence DFA
// s0.edges is never null for a precedence DFA
// Synchronization on s0 here is ok. When the DFA is turned into a
// precedence DFA, s0 will be initialized once and not updated again. s0.edges
// is never nil for a precedence DFA.
if precedence >= len(d.s0.edges) {
// enlarge the slice
d.s0.edges = append(d.s0.edges, make([]*DFAState, precedence+1-len(d.s0.edges))...)
}
d.s0.edges[precedence] = startState
}
//
// Sets whether d is a precedence DFA. If the specified value differs
// from the current DFA configuration, the following actions are taken
// otherwise no changes are made to the current DFA.
//
// <ul>
// <li>The {@link //states} map is cleared</li>
// <li>If {@code precedenceDfa} is {@code false}, the initial state
// {@link //s0} is set to {@code nil} otherwise, it is initialized to a new
// {@link DFAState} with an empty outgoing {@link DFAState//edges} array to
// store the start states for individual precedence values.</li>
// <li>The {@link //precedenceDfa} field is updated</li>
// </ul>
//
// @param precedenceDfa {@code true} if d is a precedence DFA otherwise,
// {@code false}
// setPrecedenceDfa sets whether d is a precedence DFA. If precedenceDfa differs
// from the current DFA configuration, then d.states is cleared, the initial
// state s0 is set to a new DFAState with an empty outgoing DFAState.edges to
// store the start states for individual precedence values if precedenceDfa is
// true or nil otherwise, and d.precedenceDfa is updated.
func (d *DFA) setPrecedenceDfa(precedenceDfa bool) {
if d.precedenceDfa != precedenceDfa {
d.states = make(map[string]*DFAState)
if precedenceDfa {
var precedenceState = NewDFAState(-1, NewBaseATNConfigSet(false))
precedenceState.edges = make([]*DFAState, 0)
precedenceState.isAcceptState = false
precedenceState.requiresFullContext = false
@ -108,6 +84,7 @@ func (d *DFA) setPrecedenceDfa(precedenceDfa bool) {
} else {
d.s0 = nil
}
d.precedenceDfa = precedenceDfa
}
}
@ -118,22 +95,20 @@ func (d *DFA) GetStates() map[string]*DFAState {
type DFAStateList []*DFAState
func (a DFAStateList) Len() int { return len(a) }
func (a DFAStateList) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a DFAStateList) Less(i, j int) bool { return a[i].stateNumber < a[j].stateNumber }
func (d DFAStateList) Len() int { return len(d) }
func (d DFAStateList) Less(i, j int) bool { return d[i].stateNumber < d[j].stateNumber }
func (d DFAStateList) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
// Return a list of all states in d DFA, ordered by state number.
// sortedStates returns the states in d sorted by their state number.
func (d *DFA) sortedStates() []*DFAState {
var vs = make([]*DFAState, 0, len(d.states))
// extract the values
vs := make([]*DFAState, len(d.states))
i := 0
for _, v := range d.states {
vs[i] = v
i++
vs = append(vs, v)
}
sort.Sort(DFAStateList(vs))
return vs
}
@ -141,14 +116,14 @@ func (d *DFA) String(literalNames []string, symbolicNames []string) string {
if d.s0 == nil {
return ""
}
var serializer = NewDFASerializer(d, literalNames, symbolicNames)
return serializer.String()
return NewDFASerializer(d, literalNames, symbolicNames).String()
}
func (d *DFA) ToLexerString() string {
if d.s0 == nil {
return ""
}
var serializer = NewLexerDFASerializer(d)
return serializer.String()
return NewLexerDFASerializer(d).String()
}

View File

@ -5,15 +5,15 @@ import (
"strconv"
)
// A DFA walker that knows how to dump them to serialized strings.
// DFASerializer is a DFA walker that knows how to dump them to serialized
// strings.
type DFASerializer struct {
dfa *DFA
literalNames, symbolicNames []string
dfa *DFA
literalNames []string
symbolicNames []string
}
func NewDFASerializer(dfa *DFA, literalNames, symbolicNames []string) *DFASerializer {
if literalNames == nil {
literalNames = make([]string, 0)
}
@ -22,28 +22,28 @@ func NewDFASerializer(dfa *DFA, literalNames, symbolicNames []string) *DFASerial
symbolicNames = make([]string, 0)
}
d := new(DFASerializer)
d.dfa = dfa
d.literalNames = literalNames
d.symbolicNames = symbolicNames
return d
return &DFASerializer{
dfa: dfa,
literalNames: literalNames,
symbolicNames: symbolicNames,
}
}
func (d *DFASerializer) String() string {
if d.dfa.s0 == nil {
return ""
}
var buf = ""
var states = d.dfa.sortedStates()
for _, s := range states {
if s.edges != nil {
var n = len(s.edges)
for j := 0; j < n; j++ {
var t = s.edges[j]
if t != nil && t.stateNumber != 0x7FFFFFFF {
buf += d.GetStateString(s)
buf += "-"
@ -55,6 +55,7 @@ func (d *DFASerializer) String() string {
}
}
}
if len(buf) == 0 {
return ""
}
@ -75,7 +76,6 @@ func (d *DFASerializer) getEdgeLabel(i int) string {
}
func (d *DFASerializer) GetStateString(s *DFAState) string {
var a, b string
if s.isAcceptState {
@ -87,6 +87,7 @@ func (d *DFASerializer) GetStateString(s *DFAState) string {
}
var baseStateStr = a + "s" + strconv.Itoa(s.stateNumber) + b
if s.isAcceptState {
if s.predicates != nil {
return baseStateStr + "=>" + fmt.Sprint(s.predicates)
@ -103,12 +104,7 @@ type LexerDFASerializer struct {
}
func NewLexerDFASerializer(dfa *DFA) *LexerDFASerializer {
l := new(LexerDFASerializer)
l.DFASerializer = NewDFASerializer(dfa, nil, nil)
return l
return &LexerDFASerializer{DFASerializer: NewDFASerializer(dfa, nil, nil)}
}
func (l *LexerDFASerializer) getEdgeLabel(i int) string {
@ -116,19 +112,22 @@ func (l *LexerDFASerializer) getEdgeLabel(i int) string {
}
func (l *LexerDFASerializer) String() string {
if l.dfa.s0 == nil {
return ""
}
var buf = ""
var states = l.dfa.sortedStates()
for i := 0; i < len(states); i++ {
var s = states[i]
if s.edges != nil {
var n = len(s.edges)
for j := 0; j < n; j++ {
var t = s.edges[j]
if t != nil && t.stateNumber != 0x7FFFFFFF {
buf += l.GetStateString(s)
buf += "-"
@ -140,6 +139,7 @@ func (l *LexerDFASerializer) String() string {
}
}
}
if len(buf) == 0 {
return ""
}

View File

@ -5,114 +5,97 @@ import (
"strconv"
)
// Map a predicate to a predicted alternative.///
// PredPrediction maps a predicate to a predicted alternative.
type PredPrediction struct {
alt int
pred SemanticContext
}
func NewPredPrediction(pred SemanticContext, alt int) *PredPrediction {
p := new(PredPrediction)
p.alt = alt
p.pred = pred
return p
return &PredPrediction{alt: alt, pred: pred}
}
func (p *PredPrediction) String() string {
return "(" + fmt.Sprint(p.pred) + ", " + fmt.Sprint(p.alt) + ")"
}
// A DFA state represents a set of possible ATN configurations.
// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
// to keep track of all possible states the ATN can be in after
// reading each input symbol. That is to say, after reading
// input a1a2..an, the DFA is in a state that represents the
// subset T of the states of the ATN that are reachable from the
// ATN's start state along some path labeled a1a2..an."
// In conventional NFA&rarrDFA conversion, therefore, the subset T
// would be a bitset representing the set of states the
// ATN could be in. We need to track the alt predicted by each
// state as well, however. More importantly, we need to maintain
// a stack of states, tracking the closure operations as they
// jump from rule to rule, emulating rule invocations (method calls).
// I have to add a stack to simulate the proper lookahead sequences for
// the underlying LL grammar from which the ATN was derived.
// DFAState represents a set of possible ATN configurations. As Aho, Sethi,
// Ullman p. 117 says: "The DFA uses its state to keep track of all possible
// states the ATN can be in after reading each input symbol. That is to say,
// after reading input a1a2..an, the DFA is in a state that represents the
// subset T of the states of the ATN that are reachable from the ATN's start
// state along some path labeled a1a2..an." In conventional NFA-to-DFA
// conversion, therefore, the subset T would be a bitset representing the set of
// states the ATN could be in. We need to track the alt predicted by each state
// as well, however. More importantly, we need to maintain a stack of states,
// tracking the closure operations as they jump from rule to rule, emulating
// rule invocations (method calls). I have to add a stack to simulate the proper
// lookahead sequences for the underlying LL grammar from which the ATN was
// derived.
//
// <p>I use a set of ATNConfig objects not simple states. An ATNConfig
// is both a state (ala normal conversion) and a RuleContext describing
// the chain of rules (if any) followed to arrive at that state.</p>
// I use a set of ATNConfig objects, not simple states. An ATNConfig is both a
// state (ala normal conversion) and a RuleContext describing the chain of rules
// (if any) followed to arrive at that state.
//
// <p>A DFA state may have multiple references to a particular state,
// but with different ATN contexts (with same or different alts)
// meaning that state was reached via a different set of rule invocations.</p>
// /
// A DFAState may have multiple references to a particular state, but with
// different ATN contexts (with same or different alts) meaning that state was
// reached via a different set of rule invocations.
type DFAState struct {
stateNumber int
configs ATNConfigSet
edges []*DFAState
isAcceptState bool
prediction int
stateNumber int
configs ATNConfigSet
// edges elements point to the target of the symbol. Shift up by 1 so (-1)
// Token.EOF maps to the first element.
edges []*DFAState
isAcceptState bool
// prediction is the ttype we match or alt we predict if the state is accept.
// Set to ATN.INVALID_ALT_NUMBER when predicates != nil or
// requiresFullContext.
prediction int
lexerActionExecutor *LexerActionExecutor
// requiresFullContext indicates it was created during an SLL prediction that
// discovered a conflict between the configurations in the state. Future
// ParserATNSimulator.execATN invocations immediately jump doing
// full context prediction if true.
requiresFullContext bool
predicates []*PredPrediction
// predicates is the predicates associated with the ATN configurations of the
// DFA state during SLL parsing. When we have predicates, requiresFullContext
// is false, since full context prediction evaluates predicates on-the-fly. If
// d is
// not nil, then prediction is ATN.INVALID_ALT_NUMBER.
//
// We only use these for non-requiresFullContext but conflicting states. That
// means we know from the context (it's $ or we don't dip into outer context)
// that it's an ambiguity not a conflict.
//
// This list is computed by
// ParserATNSimulator.predicateDFAState.
predicates []*PredPrediction
}
func NewDFAState(stateNumber int, configs ATNConfigSet) *DFAState {
if configs == nil {
configs = NewBaseATNConfigSet(false)
}
d := new(DFAState)
d.stateNumber = stateNumber
d.configs = configs
// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1)
// {@link Token//EOF} maps to {@code edges[0]}.
d.edges = nil
d.isAcceptState = false
// if accept state, what ttype do we Match or alt do we predict?
// This is set to {@link ATN//INVALID_ALT_NUMBER} when {@link
// //predicates}{@code !=nil} or
// {@link //requiresFullContext}.
d.prediction = 0
d.lexerActionExecutor = nil
// Indicates that d state was created during SLL prediction that
// discovered a conflict between the configurations in the state. Future
// {@link ParserATNSimulator//execATN} invocations immediately jumped doing
// full context prediction if d field is true.
d.requiresFullContext = false
// During SLL parsing, d is a list of predicates associated with the
// ATN configurations of the DFA state. When we have predicates,
// {@link //requiresFullContext} is {@code false} since full context
// prediction evaluates predicates
// on-the-fly. If d is not nil, then {@link //prediction} is
// {@link ATN//INVALID_ALT_NUMBER}.
//
// <p>We only use these for non-{@link //requiresFullContext} but
// conflicting states. That
// means we know from the context (it's $ or we don't dip into outer
// context) that it's an ambiguity not a conflict.</p>
//
// <p>This list is computed by {@link
// ParserATNSimulator//predicateDFAState}.</p>
d.predicates = nil
return d
return &DFAState{configs: configs, stateNumber: stateNumber}
}
// Get the set of all alts mentioned by all ATN configurations in d
// DFA state.
// GetAltSet gets the set of all alts mentioned by all ATN configurations in d.
func (d *DFAState) GetAltSet() *Set {
var alts = NewSet(nil, nil)
if d.configs != nil {
for _, c := range d.configs.GetItems() {
alts.add(c.GetAlt())
}
}
if alts.length() == 0 {
return nil
}
@ -124,20 +107,18 @@ func (d *DFAState) setPrediction(v int) {
d.prediction = v
}
// Two {@link DFAState} instances are equal if their ATN configuration sets
// are the same. This method is used to see if a state already exists.
// equals returns whether d equals other. Two DFAStates are equal if their ATN
// configuration sets are the same. This method is used to see if a state
// already exists.
//
// <p>Because the number of alternatives and number of ATN configurations are
// finite, there is a finite number of DFA states that can be processed.
// This is necessary to show that the algorithm terminates.</p>
// Because the number of alternatives and number of ATN configurations are
// finite, there is a finite number of DFA states that can be processed. This is
// necessary to show that the algorithm terminates.
//
// <p>Cannot test the DFA state numbers here because in
// {@link ParserATNSimulator//addDFAState} we need to know if any other state
// exists that has d exact set of ATN configurations. The
// {@link //stateNumber} is irrelevant.</p>
// Cannot test the DFA state numbers here because in
// ParserATNSimulator.addDFAState we need to know if any other state exists that
// has d exact set of ATN configurations. The stateNumber is irrelevant.
func (d *DFAState) equals(other interface{}) bool {
if d == other {
return true
} else if _, ok := other.(*DFAState); !ok {
@ -152,8 +133,8 @@ func (d *DFAState) String() string {
}
func (d *DFAState) Hash() string {
var s string
if d.isAcceptState {
if d.predicates != nil {
s = "=>" + fmt.Sprint(d.predicates)