From da0864a2f81116a3e5e32ef8b833bdb8c031b4b1 Mon Sep 17 00:00:00 2001
From: Sam Harwell <sam@tunnelvisionlabs.com>
Date: Fri, 17 Jan 2014 08:30:04 -0600
Subject: [PATCH] Support grammar files encoded with UTF-8 with a byte order
 mark (fixes #175)

---
 tool/src/org/antlr/v4/parse/ANTLRLexer.g | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tool/src/org/antlr/v4/parse/ANTLRLexer.g b/tool/src/org/antlr/v4/parse/ANTLRLexer.g
index dd190b5eb..975c8d21b 100644
--- a/tool/src/org/antlr/v4/parse/ANTLRLexer.g
+++ b/tool/src/org/antlr/v4/parse/ANTLRLexer.g
@@ -541,7 +541,8 @@ NameStartChar
             |   '\u2C00'..'\u2FEF'
             |   '\u3001'..'\uD7FF'
             |   '\uF900'..'\uFDCF'
-            |   '\uFDF0'..'\uFFFD'
+            |   '\uFDF0'..'\uFEFE'
+            |   '\uFF00'..'\uFFFD'
             ; // ignores | ['\u10000-'\uEFFFF] ;
 
 // ----------------------------
@@ -757,6 +758,15 @@ WSNLCHARS
     : ' ' | '\t' | '\f' | '\n' | '\r'
     ;
 
+// This rule allows ANTLR 4 to parse grammars using the UTF-8 encoding with a
+// byte order mark. Since this Unicode character doesn't appear as a token
+// anywhere else in the grammar, we can simply skip all instances of it without
+// problem. This rule will not break usage of \uFEFF inside a LEXER_CHAR_SET or
+// STRING_LITERAL.
+UnicodeBOM
+    :   '\uFEFF' {skip();}
+    ;
+
 // -----------------
 // Illegal Character
 //