Update PredictionContext to use the MurmurHash 3 hash algorithm

This commit is contained in:
Sam Harwell 2013-05-05 13:24:19 -05:00
parent a7d2838838
commit fc21b41afb
4 changed files with 175 additions and 48 deletions

View File

@ -58,49 +58,6 @@ public class ArrayPredictionContext extends PredictionContext {
this.returnStates = returnStates;
}
//ArrayPredictionContext(@NotNull PredictionContext[] parents, int[] returnStates, int parentHashCode, int returnStateHashCode) {
// super(calculateHashCode(parentHashCode, returnStateHashCode));
// assert parents.length == returnStates.length;
// assert returnStates.length > 1 || returnStates[0] != EMPTY_FULL_STATE_KEY : "Should be using PredictionContext.EMPTY instead.";
//
// this.parents = parents;
// this.returnStates = returnStates;
// }
//
//ArrayPredictionContext(@NotNull PredictionContext[] parents, int[] returnStates, int hashCode) {
// super(hashCode);
// assert parents.length == returnStates.length;
// assert returnStates.length > 1 || returnStates[0] != EMPTY_FULL_STATE_KEY : "Should be using PredictionContext.EMPTY instead.";
//
// this.parents = parents;
// this.returnStates = returnStates;
// }
protected static int calculateHashCode(PredictionContext[] parents, int[] returnStates) {
return calculateHashCode(calculateParentHashCode(parents),
calculateReturnStatesHashCode(returnStates));
}
protected static int calculateParentHashCode(PredictionContext[] parents) {
int hashCode = 1;
for (PredictionContext p : parents) {
if ( p!=null ) { // can be null for full ctx stack in ArrayPredictionContext
hashCode = hashCode * 31 ^ p.hashCode();
}
}
return hashCode;
}
protected static int calculateReturnStatesHashCode(int[] returnStates) {
int hashCode = 1;
for (int state : returnStates) {
hashCode = hashCode * 31 ^ state;
}
return hashCode;
}
@Override
public Iterator<SingletonPredictionContext> iterator() {
return new Iterator<SingletonPredictionContext>() {

View File

@ -33,6 +33,7 @@ package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.misc.DoubleKeyMap;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -61,9 +62,32 @@ public abstract class PredictionContext implements Iterable<SingletonPredictionC
*/
public static final int EMPTY_RETURN_STATE = Integer.MAX_VALUE;
private static final int INITIAL_HASH = 1;
public static int globalNodeCount = 0;
public final int id = globalNodeCount++;
/**
* Stores the computed hash code of this {@link PredictionContext}. The hash
* code is computed in parts to match the following reference algorithm.
*
* <pre>
* private int referenceHashCode() {
* int hash = {@link MurmurHash#initialize}({@link #INITIAL_HASH});
*
* for (int i = 0; i < {@link #size()}; i++) {
* hash = {@link MurmurHash#update}(hash, {@link #getParent}(i));
* }
*
* for (int i = 0; i < {@link #size()}; i++) {
* hash = {@link MurmurHash#update}(hash, {@link #getReturnState}(i));
* }
*
* hash = {@link MurmurHash#finish}(hash, 2 * {@link #size()});
* return hash;
* }
* </pre>
*/
public final int cachedHashCode;
protected PredictionContext(int cachedHashCode) {
@ -117,12 +141,40 @@ public abstract class PredictionContext implements Iterable<SingletonPredictionC
}
@Override
public int hashCode() {
public final int hashCode() {
return cachedHashCode;
}
protected static int calculateHashCode(int parentHashCode, int returnStateHashCode) {
return 5 * 5 * 7 + 5 * parentHashCode + returnStateHashCode;
@Override
public abstract boolean equals(Object obj);
protected static int calculateEmptyHashCode() {
int hash = MurmurHash.initialize(INITIAL_HASH);
hash = MurmurHash.finish(hash, 0);
return hash;
}
protected static int calculateHashCode(PredictionContext parent, int returnState) {
int hash = MurmurHash.initialize(INITIAL_HASH);
hash = MurmurHash.update(hash, parent);
hash = MurmurHash.update(hash, returnState);
hash = MurmurHash.finish(hash, 2);
return hash;
}
protected static int calculateHashCode(PredictionContext[] parents, int[] returnStates) {
int hash = MurmurHash.initialize(INITIAL_HASH);
for (PredictionContext parent : parents) {
hash = MurmurHash.update(hash, parent);
}
for (int returnState : returnStates) {
hash = MurmurHash.update(hash, returnState);
}
hash = MurmurHash.finish(hash, 2 * parents.length);
return hash;
}
// dispatch

View File

@ -37,8 +37,7 @@ public class SingletonPredictionContext extends PredictionContext {
public final int returnState;
SingletonPredictionContext(PredictionContext parent, int returnState) {
super(calculateHashCode(parent!=null ? 31 ^ parent.hashCode() : 1,
31 ^ returnState));
super(parent != null ? calculateHashCode(parent, returnState) : calculateEmptyHashCode());
assert returnState!=ATNState.INVALID_STATE_NUMBER;
this.parent = parent;
this.returnState = returnState;

View File

@ -0,0 +1,119 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.misc;
/**
*
* @author Sam Harwell
*/
public final class MurmurHash {
private static final int DEFAULT_SEED = 0;
/**
* Initialize the hash using the default seed value.
*
* @return the intermediate hash value
*/
public static int initialize() {
return initialize(DEFAULT_SEED);
}
/**
* Initialize the hash using the specified {@code seed}.
*
* @param seed the seed
* @return the intermediate hash value
*/
public static int initialize(int seed) {
return seed;
}
/**
* Update the intermediate hash value for the next input {@code value}.
*
* @param hash the intermediate hash value
* @param value the value to add to the current hash
* @return the updated intermediate hash value
*/
public static int update(int hash, int value) {
final int c1 = 0xCC9E2D51;
final int c2 = 0x1B873593;
final int r1 = 15;
final int r2 = 13;
final int m = 5;
final int n = 0xE6546B64;
int k = value;
k = k * c1;
k = (k << r1) | (k >>> (32 - r1));
k = k * c2;
hash = hash ^ k;
hash = (hash << r2) | (hash >>> (32 - r2));
hash = hash * m + n;
return hash;
}
/**
* Update the intermediate hash value for the next input {@code value}.
*
* @param hash the intermediate hash value
* @param value the value to add to the current hash
* @return the updated intermediate hash value
*/
public static int update(int hash, Object value) {
return update(hash, value != null ? value.hashCode() : 0);
}
/**
* Apply the final computation steps to the intermediate value {@code hash}
* to form the final result of the MurmurHash 3 hash function.
*
* @param hash the intermediate hash value
* @param numberOfWords the number of integer values added to the hash
* @return the final hash result
*/
public static int finish(int hash, int numberOfWords) {
hash = hash ^ (numberOfWords * 4);
hash = hash ^ (hash >>> 16);
hash = hash * 0x85EBCA6B;
hash = hash ^ (hash >>> 13);
hash = hash * 0xC2B2AE35;
hash = hash ^ (hash >>> 16);
return hash;
}
private MurmurHash() {
}
}