372 lines
16 KiB
C
372 lines
16 KiB
C
/** \file
|
|
* Defines the basic structure to support recognizing by either a lexer,
|
|
* parser, or tree parser.
|
|
* \addtogroup ANTLR3_BASE_RECOGNIZER
|
|
* @{
|
|
*/
|
|
#ifndef _ANTLR3_BASERECOGNIZER_H
|
|
#define _ANTLR3_BASERECOGNIZER_H
|
|
|
|
// [The "BSD licence"]
|
|
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
|
|
// http://www.temporal-wave.com
|
|
// http://www.linkedin.com/in/jimidle
|
|
//
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
// 3. The name of the author may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
#include <antlr3defs.h>
|
|
#include <antlr3exception.h>
|
|
#include <antlr3input.h>
|
|
#include <antlr3tokenstream.h>
|
|
#include <antlr3commontoken.h>
|
|
#include <antlr3commontreenodestream.h>
|
|
#include <antlr3debugeventlistener.h>
|
|
#include <antlr3recognizersharedstate.h>
|
|
|
|
/** Type indicator for a lexer recognizer
|
|
*/
|
|
#define ANTLR3_TYPE_LEXER 0x0001
|
|
|
|
/** Type indicator for a parser recognizer
|
|
*/
|
|
#define ANTLR3_TYPE_PARSER 0x0002
|
|
|
|
/** Type indicator for a tree parser recognizer
|
|
*/
|
|
#define ANTLR3_TYPE_TREE_PARSER 0x0004
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/** \brief Base tracking context structure for all types of
|
|
* recognizers.
|
|
*/
|
|
typedef struct ANTLR3_BASE_RECOGNIZER_struct
|
|
{
|
|
/// Whatever super structure is providing this interface needs a pointer to itself
|
|
/// so that this can be passed back to it whenever the api functions
|
|
/// are called back from here.
|
|
///
|
|
void * super;
|
|
|
|
/// Indicates the type of recognizer that we are an instance of.
|
|
/// The programmer may set this to anything of course, but the default
|
|
/// implementations of the interface only really understand the built in
|
|
/// types, so new error handlers etc would probably be required to as well.
|
|
///
|
|
/// Valid types are:
|
|
///
|
|
/// - #ANTLR3_TYPE_LEXER
|
|
/// - #ANTLR3_TYPE_PARSER
|
|
/// - #ANTLR3_TYPE_TREE_PARSER
|
|
///
|
|
ANTLR3_UINT32 type;
|
|
|
|
/// A pointer to the shared recognizer state, such that multiple
|
|
/// recognizers can use the same inputs streams and so on (in
|
|
/// the case of grammar inheritance for instance.
|
|
///
|
|
pANTLR3_RECOGNIZER_SHARED_STATE state;
|
|
|
|
/// If set to something other than NULL, then this structure is
|
|
/// points to an instance of the debugger interface. In general, the
|
|
/// debugger is only referenced internally in recovery/error operations
|
|
/// so that it does not cause overhead by having to check this pointer
|
|
/// in every function/method
|
|
///
|
|
pANTLR3_DEBUG_EVENT_LISTENER debugger;
|
|
|
|
|
|
/// Pointer to a function that matches the current input symbol
|
|
/// against the supplied type. the function causes an error if a
|
|
/// match is not found and the default implementation will also
|
|
/// attempt to perform one token insertion or deletion if that is
|
|
/// possible with the input stream. You can override the default
|
|
/// implementation by installing a pointer to your own function
|
|
/// in this interface after the recognizer has initialized. This can
|
|
/// perform different recovery options or not recover at all and so on.
|
|
/// To ignore recovery altogether, see the comments in the default
|
|
/// implementation of this function in antlr3baserecognizer.c
|
|
///
|
|
/// Note that errors are signalled by setting the error flag below
|
|
/// and creating a new exception structure and installing it in the
|
|
/// exception pointer below (you can chain these if you like and handle them
|
|
/// in some customized way).
|
|
///
|
|
void * (*match) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
|
|
|
|
/// Pointer to a function that matches the next token/char in the input stream
|
|
/// regardless of what it actually is.
|
|
///
|
|
void (*matchAny) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/// Pointer to a function that decides if the token ahead of the current one is the
|
|
/// one we were loking for, in which case the curernt one is very likely extraneous
|
|
/// and can be reported that way.
|
|
///
|
|
ANTLR3_BOOLEAN
|
|
(*mismatchIsUnwantedToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, ANTLR3_UINT32 ttype);
|
|
|
|
/// Pointer to a function that decides if the current token is one that can logically
|
|
/// follow the one we were looking for, in which case the one we were looking for is
|
|
/// probably missing from the input.
|
|
///
|
|
ANTLR3_BOOLEAN
|
|
(*mismatchIsMissingToken) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_INT_STREAM input, pANTLR3_BITSET_LIST follow);
|
|
|
|
/** Pointer to a function that works out what to do when a token mismatch
|
|
* occurs, so that Tree parsers can behave differently to other recognizers.
|
|
*/
|
|
void (*mismatch) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow);
|
|
|
|
/** Pointer to a function to call to report a recognition problem. You may override
|
|
* this function with your own function, but refer to the standard implementation
|
|
* in antlr3baserecognizer.c for guidance. The function should recognize whether
|
|
* error recovery is in force, so that it does not print out more than one error messages
|
|
* for the same error. From the java comments in BaseRecognizer.java:
|
|
*
|
|
* This method sets errorRecovery to indicate the parser is recovering
|
|
* not parsing. Once in recovery mode, no errors are generated.
|
|
* To get out of recovery mode, the parser must successfully match
|
|
* a token (after a resync). So it will go:
|
|
*
|
|
* 1. error occurs
|
|
* 2. enter recovery mode, report error
|
|
* 3. consume until token found in resynch set
|
|
* 4. try to resume parsing
|
|
* 5. next match() will reset errorRecovery mode
|
|
*/
|
|
void (*reportError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function that is called to display a recognition error message. You may
|
|
* override this function independently of (*reportError)() above as that function calls
|
|
* this one to do the actual exception printing.
|
|
*/
|
|
void (*displayRecognitionError) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, pANTLR3_UINT8 * tokenNames);
|
|
|
|
/// Get number of recognition errors (lexer, parser, tree parser). Each
|
|
/// recognizer tracks its own number. So parser and lexer each have
|
|
/// separate count. Does not count the spurious errors found between
|
|
/// an error and next valid token match
|
|
///
|
|
/// \see reportError()
|
|
///
|
|
ANTLR3_UINT32
|
|
(*getNumberOfSyntaxErrors) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function that recovers from an error found in the input stream.
|
|
* Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also
|
|
* be from a mismatched token that the (*match)() could not recover from.
|
|
*/
|
|
void (*recover) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function that is a hook to listen to token consumption during error recovery.
|
|
* This is mainly used by the debug parser to send events to the listener.
|
|
*/
|
|
void (*beginResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function that is a hook to listen to token consumption during error recovery.
|
|
* This is mainly used by the debug parser to send events to the listener.
|
|
*/
|
|
void (*endResync) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function that is a hook to listen to token consumption during error recovery.
|
|
* This is mainly used by the debug parser to send events to the listener.
|
|
*/
|
|
void (*beginBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level);
|
|
|
|
/** Pointer to a function that is a hook to listen to token consumption during error recovery.
|
|
* This is mainly used by the debug parser to send events to the listener.
|
|
*/
|
|
void (*endBacktrack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, ANTLR3_UINT32 level, ANTLR3_BOOLEAN successful);
|
|
|
|
/** Pointer to a function to computer the error recovery set for the current rule.
|
|
* \see antlr3ComputeErrorRecoverySet() for details.
|
|
*/
|
|
pANTLR3_BITSET (*computeErrorRecoverySet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function that computes the context-sensitive FOLLOW set for the
|
|
* current rule.
|
|
* \see antlr3ComputeCSRuleFollow() for details.
|
|
*/
|
|
pANTLR3_BITSET (*computeCSRuleFollow) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function to combine follow bitsets.
|
|
* \see antlr3CombineFollows() for details.
|
|
*/
|
|
pANTLR3_BITSET (*combineFollows) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_BOOLEAN exact);
|
|
|
|
/** Pointer to a function that recovers from a mismatched token in the input stream.
|
|
* \see antlr3RecoverMismatch() for details.
|
|
*/
|
|
void * (*recoverFromMismatchedToken)
|
|
(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_UINT32 ttype,
|
|
pANTLR3_BITSET_LIST follow);
|
|
|
|
/** Pointer to a function that recovers from a mismatched set in the token stream, in a similar manner
|
|
* to (*recoverFromMismatchedToken)
|
|
*/
|
|
void * (*recoverFromMismatchedSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
pANTLR3_BITSET_LIST follow);
|
|
|
|
/** Pointer to common routine to handle single token insertion for recovery functions.
|
|
*/
|
|
ANTLR3_BOOLEAN (*recoverFromMismatchedElement)
|
|
(struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
pANTLR3_BITSET_LIST follow);
|
|
|
|
/** Pointer to function that consumes input until the next token matches
|
|
* the given token.
|
|
*/
|
|
void (*consumeUntil) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_UINT32 tokenType);
|
|
|
|
/** Pointer to function that consumes input until the next token matches
|
|
* one in the given set.
|
|
*/
|
|
void (*consumeUntilSet) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
pANTLR3_BITSET set);
|
|
|
|
/** Pointer to function that returns an ANTLR3_LIST of the strings that identify
|
|
* the rules in the parser that got you to this point. Can be overridden by installing your
|
|
* own function set.
|
|
*
|
|
* \todo Document how to override invocation stack functions.
|
|
*/
|
|
pANTLR3_STACK (*getRuleInvocationStack) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
pANTLR3_STACK (*getRuleInvocationStackNamed) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
pANTLR3_UINT8 name);
|
|
|
|
/** Pointer to a function that converts an ANLR3_LIST of tokens to an ANTLR3_LIST of
|
|
* string token names. As this is mostly used in string template processing it may not be useful
|
|
* in the C runtime.
|
|
*/
|
|
pANTLR3_HASH_TABLE (*toStrings) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
pANTLR3_HASH_TABLE);
|
|
|
|
/** Pointer to a function to return whether the rule has parsed input starting at the supplied
|
|
* start index before. If the rule has not parsed input starting from the supplied start index,
|
|
* then it will return ANTLR3_MEMO_RULE_UNKNOWN. If it has parsed from the suppled start point
|
|
* then it will return the point where it last stopped parsing after that start point.
|
|
*/
|
|
ANTLR3_MARKER (*getRuleMemoization) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_INTKEY ruleIndex,
|
|
ANTLR3_MARKER ruleParseStart);
|
|
|
|
/** Pointer to function that determines whether the rule has parsed input at the current index
|
|
* in the input stream
|
|
*/
|
|
ANTLR3_BOOLEAN (*alreadyParsedRule) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_MARKER ruleIndex);
|
|
|
|
/** Pointer to function that records whether the rule has parsed the input at a
|
|
* current position successfully or not.
|
|
*/
|
|
void (*memoize) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
ANTLR3_MARKER ruleIndex,
|
|
ANTLR3_MARKER ruleParseStart);
|
|
|
|
/// Pointer to a function that returns the current input symbol.
|
|
/// The is placed into any label for the associated token ref; e.g., x=ID. Token
|
|
/// and tree parsers need to return different objects. Rather than test
|
|
/// for input stream type or change the IntStream interface, I use
|
|
/// a simple method to ask the recognizer to tell me what the current
|
|
/// input symbol is.
|
|
///
|
|
/// This is ignored for lexers and the lexer implementation of this
|
|
/// function should return NULL.
|
|
///
|
|
void * (*getCurrentInputSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
pANTLR3_INT_STREAM istream);
|
|
|
|
/// Conjure up a missing token during error recovery.
|
|
///
|
|
/// The recognizer attempts to recover from single missing
|
|
/// symbols. But, actions might refer to that missing symbol.
|
|
/// For example, x=ID {f($x);}. The action clearly assumes
|
|
/// that there has been an identifier matched previously and that
|
|
/// $x points at that token. If that token is missing, but
|
|
/// the next token in the stream is what we want we assume that
|
|
/// this token is missing and we keep going. Because we
|
|
/// have to return some token to replace the missing token,
|
|
/// we have to conjure one up. This method gives the user control
|
|
/// over the tokens returned for missing tokens. Mostly,
|
|
/// you will want to create something special for identifier
|
|
/// tokens. For literals such as '{' and ',', the default
|
|
/// action in the parser or tree parser works. It simply creates
|
|
/// a CommonToken of the appropriate type. The text will be the token.
|
|
/// If you change what tokens must be created by the lexer,
|
|
/// override this method to create the appropriate tokens.
|
|
///
|
|
void * (*getMissingSymbol) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer,
|
|
pANTLR3_INT_STREAM istream,
|
|
pANTLR3_EXCEPTION e,
|
|
ANTLR3_UINT32 expectedTokenType,
|
|
pANTLR3_BITSET_LIST follow);
|
|
|
|
/** Pointer to a function that returns whether the supplied grammar function
|
|
* will parse the current input stream or not. This is the way that syntactic
|
|
* predicates are evaluated. Unlike java, C is perfectly happy to invoke code
|
|
* via a pointer to a function (hence that's what all the ANTLR3 C interfaces
|
|
* do.
|
|
*/
|
|
ANTLR3_BOOLEAN (*synpred) ( struct ANTLR3_BASE_RECOGNIZER_struct * recognizer, void * ctx,
|
|
void (*predicate)(void * ctx));
|
|
|
|
/** Pointer to a function that can construct a generic exception structure
|
|
* with such information as the input stream can provide.
|
|
*/
|
|
void (*exConstruct) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Reset the recognizer
|
|
*/
|
|
void (*reset) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
/** Pointer to a function that knows how to free the resources of a base recognizer.
|
|
*/
|
|
void (*free) (struct ANTLR3_BASE_RECOGNIZER_struct * recognizer);
|
|
|
|
}
|
|
ANTLR3_BASE_RECOGNIZER;
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#include <antlr3lexer.h>
|
|
#include <antlr3parser.h>
|
|
#include <antlr3treeparser.h>
|
|
|
|
/// @}
|
|
///
|
|
|
|
#endif /* _ANTLR3_BASERECOGNIZER_H */
|
|
|