New unit tests for Interval + MurmurHash.

While testing Interval() and Interval::of() I found that the latter is twice as slow as the normal object creation. Seems caching single element intervals doesn't have the same impact as in Java (quite the opposite), so I removed Interval::off and the interval cache.

The MurmurHash implementation was actually for a 32bit platform, so I added a 64 bit version too (stripped down from 128 bit MurmurHash3). Tests cannot directly check the correctness of the algorithm, but duplicate checks over 300K hashs (for short input, which is more prone to duplicates than longer input) showed there are no duplicates. So I take it that the code is good.

Fixed a hash creation bug in PredictionContext.cpp.
This commit is contained in:
Mike Lischke 2016-03-28 18:15:50 +02:00
parent 3f78367457
commit baef9b0b32
19 changed files with 365 additions and 124 deletions

View File

@ -34,6 +34,7 @@
#include "Interval.h"
using namespace org::antlr::v4::runtime;
using namespace org::antlr::v4::runtime::misc;
@interface InputHandlingTests : XCTestCase
@ -156,10 +157,4 @@ using namespace org::antlr::v4::runtime;
XCTAssertEqual(stream.getSourceName(), "unit tests");
}
- (void)testPerformanceExample {
[self measureBlock: ^{
// Put the code you want to measure the time of here.
}];
}
@end

View File

@ -0,0 +1,233 @@
/*
* [The "BSD license"]
* Copyright (c) 2016 Mike Lischke
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#import <XCTest/XCTest.h>
#include "MurmurHash.h"
#include "Interval.h"
#include "IntervalSet.h"
using namespace org::antlr::v4::runtime;
using namespace org::antlr::v4::runtime::misc;
@interface MiscClassTests : XCTestCase
@end
@implementation MiscClassTests
- (void)setUp {
[super setUp];
// Put setup code here. This method is called before the invocation of each test method in the class.
}
- (void)tearDown {
// Put teardown code here. This method is called after the invocation of each test method in the class.
[super tearDown];
}
- (void)testMurmurHash {
XCTAssertEqual(MurmurHash::initialize(), (size_t)0);
XCTAssertEqual(MurmurHash::initialize(31), (size_t)31);
XCTAssertEqual(MurmurHash::hashCode<size_t>({}, 0, 0), (size_t)0);
// In absence of real test vectors (64bit) for murmurhash I instead check if I can find duplicate hash values
// in a deterministic and a random sequence of 100K values each.
std::set<size_t> hashs;
for (size_t i = 0; i < 100000; ++i) {
size_t data[] = { i, (size_t)(i * M_PI), arc4random()};
size_t hash = MurmurHash::hashCode(data, 3, 0);
hashs.insert(hash);
}
XCTAssertEqual(hashs.size(), (size_t)100000, @"At least one duplicat hash found.");
hashs.clear();
for (size_t i = 0; i < 100000; ++i) {
size_t data[] = { i, (size_t)(i * M_PI)};
size_t hash = MurmurHash::hashCode(data, 2, 0);
hashs.insert(hash);
}
XCTAssertEqual(hashs.size(), (size_t)100000, @"At least one duplicat hash found.");
// Another test with fixed input but varying seeds.
// Note: the higher the seed the less LSDs are in the result (for small input data).
hashs.clear();
for (size_t i = 0; i < 100000; ++i) {
size_t data[] = { L'µ', 'a', '@', '1' };
size_t hash = MurmurHash::hashCode(data, 4, i);
hashs.insert(hash);
}
XCTAssertEqual(hashs.size(), (size_t)100000, @"At least one duplicat hash found.");
}
- (void)testInterval {
// The Interval class contains no error handling (checks for invalid intervals), hence some of the results
// look strange as we test of course such intervals as well.
XCTAssertEqual(Interval().length(), 0);
XCTAssertEqual(Interval(0, 0).length(), 1); // Remember: it's an inclusive interval.
XCTAssertEqual(Interval(100, 100).length(), 1);
XCTAssertEqual(Interval(-1, -1).length(), 1); // Unwanted behavior: negative ranges.
XCTAssertEqual(Interval(-1, -2).length(), 0);
XCTAssertEqual(Interval(100, 50).length(), 0);
XCTAssert(Interval() == Interval(-1, -2));
XCTAssert(Interval(0, 0) == Interval(0, 0));
XCTAssertFalse(Interval(0, 1) == Interval(1, 2));
XCTAssertEqual(Interval().hashCode(), (size_t)22070);
XCTAssertEqual(Interval(0, 0).hashCode(), (size_t)22103);
XCTAssertEqual(Interval(10, 2000).hashCode(), (size_t)24413);
// Results for the interval test functions in this order:
// startsBeforeDisjoint
// startsBeforeNonDisjoint
// startsAfter
// startsAfterDisjoint
// startsAfterNonDisjoint
// disjoint
// adjacent
// properlyContains
typedef std::vector<bool> TestResults;
struct TestEntry { size_t runningNumber; Interval interval1, interval2; TestResults results; };
std::vector<TestEntry> testData = {
// Extreme cases + invalid intervals.
{ 0, Interval(), Interval(10, 20), { true, false, false, false, false, true, false, false } },
{ 1, Interval(1, 1), Interval(1, 1), { false, true, false, false, false, false, false, true } },
{ 2, Interval(10000, 10000), Interval(10000, 10000), { false, true, false, false, false, false, false, true } },
{ 3, Interval(100, 10), Interval(100, 10), { false, false, false, true, false, true, false, true } },
{ 4, Interval(100, 10), Interval(10, 100), { false, false, true, false, true, false, false, false } },
{ 5, Interval(10, 100), Interval(100, 10), { false, true, false, false, false, false, false, true } },
// First starts before second. End varies.
{ 20, Interval(10, 12), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 21, Interval(10, 12), Interval(13, 100), { true, false, false, false, false, true, true, false } },
{ 22, Interval(10, 12), Interval(14, 100), { true, false, false, false, false, true, false, false } },
{ 23, Interval(10, 13), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 24, Interval(10, 14), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 25, Interval(10, 99), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 26, Interval(10, 100), Interval(12, 100), { false, true, false, false, false, false, false, true } },
{ 27, Interval(10, 101), Interval(12, 100), { false, true, false, false, false, false, false, true } },
{ 28, Interval(10, 1000), Interval(12, 100), { false, true, false, false, false, false, false, true } },
// First and second start equal. End varies.
{ 30, Interval(12, 12), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 31, Interval(12, 12), Interval(13, 100), { true, false, false, false, false, true, true, false } },
{ 32, Interval(12, 12), Interval(14, 100), { true, false, false, false, false, true, false, false } },
{ 33, Interval(12, 13), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 34, Interval(12, 14), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 35, Interval(12, 99), Interval(12, 100), { false, true, false, false, false, false, false, false } },
{ 36, Interval(12, 100), Interval(12, 100), { false, true, false, false, false, false, false, true } },
{ 37, Interval(12, 101), Interval(12, 100), { false, true, false, false, false, false, false, true } },
{ 38, Interval(12, 1000), Interval(12, 100), { false, true, false, false, false, false, false, true } },
// First starts after second. End varies.
{ 40, Interval(15, 12), Interval(12, 100), { false, false, true, false, true, false, false, false } },
{ 41, Interval(15, 12), Interval(13, 100), { false, false, true, false, true, false, true, false } },
{ 42, Interval(15, 12), Interval(14, 100), { false, false, true, false, true, false, false, false } },
{ 43, Interval(15, 13), Interval(12, 100), { false, false, true, false, true, false, false, false } },
{ 44, Interval(15, 14), Interval(12, 100), { false, false, true, false, true, false, false, false } },
{ 45, Interval(15, 99), Interval(12, 100), { false, false, true, false, true, false, false, false } },
{ 46, Interval(15, 100), Interval(12, 100), { false, false, true, false, true, false, false, false } },
{ 47, Interval(15, 101), Interval(12, 100), { false, false, true, false, true, false, false, false } },
{ 48, Interval(15, 1000), Interval(12, 100), { false, false, true, false, true, false, false, false } },
// First ends before second. Start varies.
{ 50, Interval(10, 90), Interval(20, 100), { false, true, false, false, false, false, false, false } },
{ 51, Interval(19, 90), Interval(20, 100), { false, true, false, false, false, false, false, false } },
{ 52, Interval(20, 90), Interval(20, 100), { false, true, false, false, false, false, false, false } },
{ 53, Interval(21, 90), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 54, Interval(98, 90), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 55, Interval(99, 90), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 56, Interval(100, 90), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 57, Interval(101, 90), Interval(20, 100), { false, false, true, true, false, true, true, false } },
{ 58, Interval(1000, 90), Interval(20, 100), { false, false, true, true, false, true, false, false } },
// First and second end equal. Start varies.
{ 60, Interval(10, 100), Interval(20, 100), { false, true, false, false, false, false, false, true } },
{ 61, Interval(19, 100), Interval(20, 100), { false, true, false, false, false, false, false, true } },
{ 62, Interval(20, 100), Interval(20, 100), { false, true, false, false, false, false, false, true } },
{ 63, Interval(21, 100), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 64, Interval(98, 100), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 65, Interval(99, 100), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 66, Interval(100, 100), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 67, Interval(101, 100), Interval(20, 100), { false, false, true, true, false, true, true, false } },
{ 68, Interval(1000, 100), Interval(20, 100), { false, false, true, true, false, true, false, false } },
// First ends after second. Start varies.
{ 70, Interval(10, 1000), Interval(20, 100), { false, true, false, false, false, false, false, true } },
{ 71, Interval(19, 1000), Interval(20, 100), { false, true, false, false, false, false, false, true } },
{ 72, Interval(20, 1000), Interval(20, 100), { false, true, false, false, false, false, false, true } },
{ 73, Interval(21, 1000), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 74, Interval(98, 1000), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 75, Interval(99, 1000), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 76, Interval(100, 1000), Interval(20, 100), { false, false, true, false, true, false, false, false } },
{ 77, Interval(101, 1000), Interval(20, 100), { false, false, true, true, false, true, true, false } },
{ 78, Interval(1000, 1000), Interval(20, 100), { false, false, true, true, false, true, false, false } },
// It's possible to add more tests with borders that touch each other (e.g. first starts before/on/after second
// and first ends directly before/after second. However, such cases are not handled differently in the Interval class
// (only adjacent intervals, where first ends directly before second starts and vice versa. So I ommitted them here.
};
for (auto &entry : testData) {
XCTAssert(entry.interval1.startsBeforeDisjoint(entry.interval2) == entry.results[0], @"entry: %zu", entry.runningNumber);
XCTAssert(entry.interval1.startsBeforeNonDisjoint(entry.interval2) == entry.results[1], @"entry: %zu", entry.runningNumber);
XCTAssert(entry.interval1.startsAfter(entry.interval2) == entry.results[2], @"entry: %zu", entry.runningNumber);
XCTAssert(entry.interval1.startsAfterDisjoint(entry.interval2) == entry.results[3], @"entry: %zu", entry.runningNumber);
XCTAssert(entry.interval1.startsAfterNonDisjoint(entry.interval2) == entry.results[4], @"entry: %zu", entry.runningNumber);
XCTAssert(entry.interval1.disjoint(entry.interval2) == entry.results[5], @"entry: %zu", entry.runningNumber);
XCTAssert(entry.interval1.adjacent(entry.interval2) == entry.results[6], @"entry: %zu", entry.runningNumber);
XCTAssert(entry.interval1.properlyContains(entry.interval2) == entry.results[7], @"entry: %zu", entry.runningNumber);
}
XCTAssert(Interval().Union(Interval(10, 100)) == Interval(-1, 100));
XCTAssert(Interval(10, 10).Union(Interval(10, 100)) == Interval(10, 100));
XCTAssert(Interval(10, 11).Union(Interval(10, 100)) == Interval(10, 100));
XCTAssert(Interval(10, 1000).Union(Interval(10, 100)) == Interval(10, 1000));
XCTAssert(Interval(1000, 30).Union(Interval(10, 100)) == Interval(10, 100));
XCTAssert(Interval(1000, 2000).Union(Interval(10, 100)) == Interval(10, 2000));
XCTAssert(Interval(500, 2000).Union(Interval(10, 1000)) == Interval(10, 2000));
XCTAssert(Interval().intersection(Interval(10, 100)) == Interval(10, -2));
XCTAssert(Interval(10, 10).intersection(Interval(10, 100)) == Interval(10, 10));
XCTAssert(Interval(10, 11).intersection(Interval(10, 100)) == Interval(10, 11));
XCTAssert(Interval(10, 1000).intersection(Interval(10, 100)) == Interval(10, 100));
XCTAssert(Interval(1000, 30).intersection(Interval(10, 100)) == Interval(1000, 30));
XCTAssert(Interval(1000, 2000).intersection(Interval(10, 100)) == Interval(1000, 100));
XCTAssert(Interval(500, 2000).intersection(Interval(10, 1000)) == Interval(500, 1000));
XCTAssert(Interval().toString() == L"-1..-2");
XCTAssert(Interval(10, 10).toString() == L"10..10");
XCTAssert(Interval(1000, 2000).toString() == L"1000..2000");
XCTAssert(Interval(500, INT_MAX).toString() == L"500.." + std::to_wstring(INT_MAX));
}
@end

View File

@ -356,7 +356,7 @@ std::string BufferedTokenStream::getSourceName()
std::wstring BufferedTokenStream::getText() {
lazyInit();
fill();
return getText(misc::Interval::of(0, (int)size() - 1));
return getText(misc::Interval(0, (int)size() - 1));
}
std::wstring BufferedTokenStream::getText(const misc::Interval &interval) {
@ -387,7 +387,7 @@ std::wstring BufferedTokenStream::getText(RuleContext *ctx) {
std::wstring BufferedTokenStream::getText(Token *start, Token *stop) {
if (start != nullptr && stop != nullptr) {
return getText(misc::Interval::of(start->getTokenIndex(), stop->getTokenIndex()));
return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex()));
}
return L"";

View File

@ -103,7 +103,7 @@ std::wstring CommonToken::getText() {
}
size_t n = input->size();
if ((size_t)start < n && (size_t)stop < n) {
return input->getText(misc::Interval::of(start,stop));
return input->getText(misc::Interval(start,stop));
} else {
return L"<EOF>";
}

View File

@ -52,7 +52,7 @@ CommonToken *CommonTokenFactory::create(std::pair<TokenSource*, CharStream*> *so
if (text != L"") {
t->setText(text);
} else if (copyText && source->second != nullptr) {
t->setText(source->second->getText(misc::Interval::of(start,stop)));
t->setText(source->second->getText(misc::Interval(start, stop)));
}
return t;

View File

@ -56,7 +56,7 @@ void DiagnosticErrorListener::reportAmbiguity(Parser *recognizer, dfa::DFA *dfa,
wchar_t buf[16];
std::wstring decision = getDecisionDescription(recognizer, dfa);
antlrcpp::BitSet *conflictingAlts = getConflictingAlts(ambigAlts, configs);
std::wstring text = recognizer->getTokenStream()->getText(misc::Interval::of((int)startIndex, (int)stopIndex));
std::wstring text = recognizer->getTokenStream()->getText(misc::Interval((int)startIndex, (int)stopIndex));
std::wstring message = L"reportAmbiguity d=" + decision + L": ambigAlts=" + conflictingAlts->toString() + L", input='" + text + L"'";
swprintf(buf, sizeof(buf) / sizeof(*buf), L"%d", 5);
recognizer->notifyErrorListeners(message);
@ -65,7 +65,7 @@ void DiagnosticErrorListener::reportAmbiguity(Parser *recognizer, dfa::DFA *dfa,
void DiagnosticErrorListener::reportAttemptingFullContext(Parser *recognizer, dfa::DFA *dfa, size_t startIndex,
size_t stopIndex, antlrcpp::BitSet *conflictingAlts, atn::ATNConfigSet *configs) {
std::wstring decision = getDecisionDescription(recognizer, dfa);
std::wstring text = recognizer->getTokenStream()->getText(misc::Interval::of((int)startIndex, (int)stopIndex));
std::wstring text = recognizer->getTokenStream()->getText(misc::Interval((int)startIndex, (int)stopIndex));
std::wstring message = L"reportAttemptingFullContext d=" + decision + L", input='" + text + L"'";
recognizer->notifyErrorListeners(message);
}
@ -73,7 +73,7 @@ void DiagnosticErrorListener::reportAttemptingFullContext(Parser *recognizer, df
void DiagnosticErrorListener::reportContextSensitivity(Parser *recognizer, dfa::DFA *dfa, size_t startIndex,
size_t stopIndex, int prediction, atn::ATNConfigSet *configs) {
std::wstring decision = getDecisionDescription(recognizer, dfa);
std::wstring text = recognizer->getTokenStream()->getText(misc::Interval::of((int)startIndex, (int)stopIndex));
std::wstring text = recognizer->getTokenStream()->getText(misc::Interval((int)startIndex, (int)stopIndex));
std::wstring message = L"reportContextSensitivity d=" + decision + L", input='" + text + L"'";
recognizer->notifyErrorListeners(message);
}

View File

@ -286,7 +286,7 @@ void Lexer::recover(LexerNoViableAltException *e) {
}
void Lexer::notifyListeners(LexerNoViableAltException *e) {
std::wstring text = _input->getText(misc::Interval::of(_tokenStartCharIndex, (int)_input->index()));
std::wstring text = _input->getText(misc::Interval(_tokenStartCharIndex, (int)_input->index()));
std::wstring msg = std::wstring(L"token recognition error at: '") + getErrorDisplay(text) + std::wstring(L"'");
ANTLRErrorListener *listener = getErrorListenerDispatch();

View File

@ -55,7 +55,7 @@ CharStream *LexerNoViableAltException::getInputStream() {
std::wstring LexerNoViableAltException::toString() {
std::wstring symbol = L"";
if (startIndex < getInputStream()->size()) {
symbol = getInputStream()->getText(misc::Interval::of((int)startIndex, (int)startIndex));
symbol = getInputStream()->getText(misc::Interval((int)startIndex, (int)startIndex));
symbol = antlrcpp::escapeWhitespace(symbol, false);
}
std::wstring format = L"LexerNoViableAltException('" + symbol + L"')";

View File

@ -165,7 +165,7 @@ misc::Interval ParserRuleContext::getSourceInterval() {
if (start == nullptr || stop == nullptr) {
return misc::Interval::INVALID;
}
return misc::Interval::of(start->getTokenIndex(), stop->getTokenIndex());
return misc::Interval(start->getTokenIndex(), stop->getTokenIndex());
}
Token *ParserRuleContext::getStart() {

View File

@ -253,7 +253,7 @@ std::vector<TokenStreamRewriter::RewriteOperation*> TokenStreamRewriter::initial
}
std::wstring TokenStreamRewriter::getText() {
return getText(DEFAULT_PROGRAM_NAME, Interval::of(0, (int)tokens->size() - 1));
return getText(DEFAULT_PROGRAM_NAME, Interval(0, (int)tokens->size() - 1));
}
std::wstring TokenStreamRewriter::getText(const Interval &interval) {

View File

@ -108,7 +108,7 @@ namespace runtime {
std::wstring UnbufferedTokenStream::getText(Token* start, Token* stop)
{
return getText(misc::Interval::of(start->getTokenIndex(), stop->getTokenIndex()));
return getText(misc::Interval(start->getTokenIndex(), stop->getTokenIndex()));
}
void UnbufferedTokenStream::consume()

View File

@ -559,7 +559,7 @@ dfa::DFA *LexerATNSimulator::getDFA(size_t mode) {
std::wstring LexerATNSimulator::getText(CharStream *input) {
// index is first lookahead char, don't include.
return input->getText(misc::Interval::of((int)_startIndex, (int)input->index() - 1));
return input->getText(misc::Interval((int)_startIndex, (int)input->index() - 1));
}
size_t LexerATNSimulator::getLine() const {

View File

@ -1105,7 +1105,7 @@ dfa::DFAState *ParserATNSimulator::addDFAState(dfa::DFA *dfa, dfa::DFAState *D)
void ParserATNSimulator::reportAttemptingFullContext(dfa::DFA *dfa, BitSet *conflictingAlts, ATNConfigSet *configs,
size_t startIndex, size_t stopIndex) {
if (debug || retry_debug) {
misc::Interval interval = misc::Interval::of((int)startIndex, (int)stopIndex);
misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex);
std::wcout << L"reportAttemptingFullContext decision=" << dfa->decision << L":" << configs << L", input=" << parser->getTokenStream()->getText(interval) << std::endl;
}
if (parser != nullptr) {
@ -1116,7 +1116,7 @@ void ParserATNSimulator::reportAttemptingFullContext(dfa::DFA *dfa, BitSet *conf
void ParserATNSimulator::reportContextSensitivity(dfa::DFA *dfa, int prediction, ATNConfigSet *configs, size_t startIndex,
size_t stopIndex) {
if (debug || retry_debug) {
misc::Interval interval = misc::Interval::of((int)startIndex, (int)stopIndex);
misc::Interval interval = misc::Interval((int)startIndex, (int)stopIndex);
std::wcout << L"reportContextSensitivity decision=" << dfa->decision << L":" << configs << L", input=" << parser->getTokenStream()->getText(interval) << std::endl;
}
if (parser != nullptr) {
@ -1143,7 +1143,7 @@ void ParserATNSimulator::reportAmbiguity(dfa::DFA *dfa, dfa::DFAState *D, size_t
// }
// i++;
// }
misc::Interval interval = misc::Interval::of(startIndex, stopIndex);
misc::Interval interval = misc::Interval(startIndex, stopIndex);
std::wcout << L"reportAmbiguity " << ambigAlts << L":" << configs << L", input=" << parser->getTokenStream()->getText(interval) << std::endl;
}
if (parser != nullptr) {

View File

@ -103,17 +103,12 @@ size_t PredictionContext::calculateHashCode(const std::vector<PredictionContext*
for (auto parent : parents) {
hash = MurmurHash::update(hash, (size_t)parent);
}
for (std::vector<PredictionContext*>::size_type i = 0; i < parents.size() ; i++) {
PredictionContext * parent = parents[i];
hash = MurmurHash::update(hash, (size_t)parent);
}
for (auto returnState : returnStates) {
hash = MurmurHash::update(hash, (size_t)returnState);
}
hash = MurmurHash::finish(hash, 2 * sizeof(parents) / sizeof(parents[0]));
return hash;
return MurmurHash::finish(hash, parents.size() + returnStates.size());
}
PredictionContext *PredictionContext::merge(PredictionContext *a, PredictionContext *b,

View File

@ -34,7 +34,6 @@
using namespace org::antlr::v4::runtime::misc;
Interval const Interval::INVALID;
std::map<int, Interval> Interval::cache;
int Interval::creates = 0;
int Interval::misses = 0;
@ -45,23 +44,10 @@ Interval::Interval() : Interval(-1, -2) {
}
Interval::Interval(int a_, int b_) {
InitializeInstanceFields();
a = a_;
b = b_;
}
Interval Interval::of(int a_, int b_) {
// cache just a..a
if (a_ != b_ || a_ < 0) {
return Interval(a_, b_);
}
if (cache.find(a_) == cache.end()) {
cache[a_] = Interval(a_, a_);
}
return cache[a_];
}
int Interval::length() const {
if (b < a) {
return 0;
@ -97,7 +83,7 @@ bool Interval::startsAfterDisjoint(const Interval &other) const {
}
bool Interval::startsAfterNonDisjoint(const Interval &other) const {
return a > other.a && a <= other.b; // this.b>=other.b implied
return a > other.a && a <= other.b; // b >= other.b implied
}
bool Interval::disjoint(const Interval &other) const {
@ -113,34 +99,13 @@ bool Interval::properlyContains(const Interval &other) const {
}
Interval Interval::Union(const Interval &other) const {
return Interval::of(std::min(a, other.a), std::max(b, other.b));
return Interval(std::min(a, other.a), std::max(b, other.b));
}
Interval Interval::intersection(const Interval &other) const {
return Interval::of(std::max(a, other.a), std::min(b, other.b));
}
Interval Interval::differenceNotProperlyContained(const Interval &other) const {
Interval diff;
// other.a to left of this.a (or same)
if (other.startsBeforeNonDisjoint(*this)) {
diff = Interval::of(std::max(a, other.b + 1), b);
}
// other.a to right of this.a
else if (other.startsAfterNonDisjoint(*this)) {
diff = Interval::of(a, other.a - 1);
}
return diff;
return Interval(std::max(a, other.a), std::min(b, other.b));
}
std::wstring Interval::toString() const {
return std::to_wstring(a) + L".." + std::to_wstring(b);
}
void Interval::InitializeInstanceFields() {
a = 0;
b = 0;
}

View File

@ -53,15 +53,6 @@ namespace misc {
Interval();
Interval(int a_, int b_);
/// <summary>
/// Interval objects are used readonly so share all with the
/// same single value a==b up to some max size. Use an array as a perfect hash.
/// Return shared object for 0..INTERVAL_POOL_MAX_VALUE or a new
/// Interval object with a..a in it. On Java.g4, 218623 IntervalSets
/// have a..a (set with 1 element).
/// </summary>
static Interval of(int a_, int b_);
/// <summary>
/// return number of elements between a and b inclusively. x..x is length 1.
/// if b < a, then length is 0. 9..10 has length 2.
@ -110,19 +101,9 @@ namespace misc {
/// Return the interval in common between this and o </summary>
virtual Interval intersection(const Interval &other) const;
/// <summary>
/// Return the interval with elements from this not in other;
/// other must not be totally enclosed (properly contained)
/// within this, which would result in two disjoint intervals
/// instead of the single one returned by this method.
/// </summary>
virtual Interval differenceNotProperlyContained(const Interval &other) const;
virtual std::wstring toString() const;
private:
void InitializeInstanceFields();
static std::map<int, Interval> cache;
};
} // namespace atn

View File

@ -84,7 +84,7 @@ void IntervalSet::add(int el) {
}
void IntervalSet::add(int a, int b) {
add(Interval::of(a, b));
add(Interval(a, b));
}
void IntervalSet::add(const Interval &addition) {

View File

@ -33,6 +33,48 @@
using namespace org::antlr::v4::runtime::misc;
// A variation of the MurmurHash3 implementation (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp)
// Here we unrolled the loop used there into individual calls to update(), as we usually hash object fields
// instead of entire buffers.
// Platform-specific functions and macros
// Microsoft Visual Studio
#if defined(_MSC_VER)
#define FORCE_INLINE __forceinline
#include <stdlib.h>
#define ROTL32(x,y) _rotl(x,y)
#define ROTL64(x,y) _rotl64(x,y)
#define BIG_CONSTANT(x) (x)
#else // defined(_MSC_VER)
// Other compilers
#define FORCE_INLINE inline __attribute__((always_inline))
inline uint32_t rotl32 (uint32_t x, int8_t r)
{
return (x << r) | (x >> (32 - r));
}
inline uint64_t rotl64 (uint64_t x, int8_t r)
{
return (x << r) | (x >> (64 - r));
}
#define ROTL32(x,y) rotl32(x,y)
#define ROTL64(x,y) rotl64(x,y)
#define BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
MurmurHash::MurmurHash() {
}
@ -44,34 +86,78 @@ size_t MurmurHash::initialize(size_t seed) {
return seed;
}
size_t MurmurHash::update(size_t hash, size_t seed) {
#if _WIN32 || _WIN64
#if _WIN64
#define ENVIRONMENT64
#else
#define ENVIRONMENT32
#endif
#endif
#if __GNUC__
#if __x86_64__ || __ppc64__
#define ENVIRONMENT64
#else
#define ENVIRONMENT32
#endif
#endif
#if defined(ENVIRONMENT32)
size_t MurmurHash::update(size_t hash, size_t value) {
static const size_t c1 = 0xCC9E2D51;
static const size_t c2 = 0x1B873593;
static const size_t r1 = 15;
static const size_t r2 = 13;
static const size_t m = 5;
static const size_t n = 0xE6546B64;
size_t k = seed;
k = k * c1;
k = (k << r1) | k >> (32 - r1);
k = k * c2;
size_t k1 = value;
k1 *= c1;
k1 = ROTL32(k1, 15);
k1 *= c2;
hash = hash ^ k;
hash = (hash << r2) | hash >> (32 - r2);
hash = hash * m + n;
hash ^= k1;
hash = ROTL32(hash, 13);
hash = hash * 5 + 0xE6546B64;
return hash;
}
size_t MurmurHash::finish(size_t hash, size_t numberOfWords) {
hash = hash ^ (numberOfWords * 4);
hash = hash ^ hash >> 16;
hash = hash * 0x85EBCA6B;
hash = hash ^ hash >> 13;
hash = hash * 0xC2B2AE35;
hash = hash ^ hash >> 16;
size_t MurmurHash::finish(size_t hash, size_t entryCount) {
hash ^= entryCount * 4;
hash ^= hash >> 16;
hash *= 0x85EBCA6B;
hash ^= hash >> 13;
hash *= 0xC2B2AE35;
hash ^= hash >> 16;
return hash;
}
#else
size_t MurmurHash::update(size_t hash, size_t value) {
static const size_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
static const size_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
size_t k1 = value;
k1 *= c1;
k1 = ROTL64(k1, 31);
k1 *= c2;
hash ^= k1;
hash = ROTL64(hash, 27);
hash = hash * 5 + 0x52dce729;
return hash;
}
size_t MurmurHash::finish(size_t hash, size_t entryCount) {
hash ^= entryCount * 8;
hash ^= hash >> 33;
hash *= 0xff51afd7ed558ccd;
hash ^= hash >> 33;
hash *= 0xc4ceb9fe1a85ec53;
hash ^= hash >> 33;
return hash;
}
#endif

View File

@ -56,39 +56,25 @@ namespace misc {
/// Returns the updated intermediate hash value.
static size_t update(size_t hash, size_t value);
/// Update the intermediate hash value for the next input {@code value}.
/// <param name="hash"> the intermediate hash value </param>
/// <param name="value"> the value to add to the current hash </param>
/// Returns the updated intermediate hash value.
template<typename T>
static int update(size_t hash, T *value) {
std::hash<T> hashFunction;
return update(hash, value != nullptr ? hashFunction(*value) : 0);
}
/// <summary>
/// Apply the final computation steps to the intermediate value {@code hash}
/// to form the final result of the MurmurHash 3 hash function.
/// </summary>
/// <param name="hash"> the intermediate hash value </param>
/// <param name="numberOfWords"> the number of integer values added to the hash </param>
/// <param name="entryCount"> the number of calls to update() before calling finish() </param>
/// <returns> the final hash result </returns>
static size_t finish(size_t hash, size_t numberOfWords);
static size_t finish(size_t hash, size_t entryCount);
/// <summary>
/// Utility function to compute the hash code of an array using the
/// MurmurHash algorithm.
/// </summary>
/// Utility function to compute the hash code of an array using the MurmurHash3 algorithm.
///
/// @param <T> the array element type </param>
/// <param name="data"> the array data </param>
/// <param name="seed"> the seed for the MurmurHash algorithm </param>
/// <returns> the hash code of the data </returns>
template<typename T> // where T is C array type
static size_t hashCode(const T *data, std::size_t size, size_t seed) {
size_t hash = initialize(seed);
for(size_t i = 0; i < size; i++) {
for (size_t i = 0; i < size; i++) {
hash = update(hash, (size_t)data[i]);
}