Merge pull request #246 from sharwell/murmurhash

MurmurHash
This commit is contained in:
Terence Parr 2013-05-05 12:32:44 -07:00
commit e1dfa24b8d
13 changed files with 252 additions and 84 deletions

View File

@ -31,6 +31,7 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -159,11 +160,12 @@ public class ATNConfig {
@Override
public int hashCode() {
int hashCode = 7;
hashCode = 5 * hashCode + state.stateNumber;
hashCode = 5 * hashCode + alt;
hashCode = 5 * hashCode + (context != null ? context.hashCode() : 0);
hashCode = 5 * hashCode + semanticContext.hashCode();
int hashCode = MurmurHash.initialize(7);
hashCode = MurmurHash.update(hashCode, state.stateNumber);
hashCode = MurmurHash.update(hashCode, alt);
hashCode = MurmurHash.update(hashCode, context);
hashCode = MurmurHash.update(hashCode, semanticContext);
hashCode = MurmurHash.finish(hashCode, 4);
return hashCode;
}

View File

@ -58,49 +58,6 @@ public class ArrayPredictionContext extends PredictionContext {
this.returnStates = returnStates;
}
//ArrayPredictionContext(@NotNull PredictionContext[] parents, int[] returnStates, int parentHashCode, int returnStateHashCode) {
// super(calculateHashCode(parentHashCode, returnStateHashCode));
// assert parents.length == returnStates.length;
// assert returnStates.length > 1 || returnStates[0] != EMPTY_FULL_STATE_KEY : "Should be using PredictionContext.EMPTY instead.";
//
// this.parents = parents;
// this.returnStates = returnStates;
// }
//
//ArrayPredictionContext(@NotNull PredictionContext[] parents, int[] returnStates, int hashCode) {
// super(hashCode);
// assert parents.length == returnStates.length;
// assert returnStates.length > 1 || returnStates[0] != EMPTY_FULL_STATE_KEY : "Should be using PredictionContext.EMPTY instead.";
//
// this.parents = parents;
// this.returnStates = returnStates;
// }
protected static int calculateHashCode(PredictionContext[] parents, int[] returnStates) {
return calculateHashCode(calculateParentHashCode(parents),
calculateReturnStatesHashCode(returnStates));
}
protected static int calculateParentHashCode(PredictionContext[] parents) {
int hashCode = 1;
for (PredictionContext p : parents) {
if ( p!=null ) { // can be null for full ctx stack in ArrayPredictionContext
hashCode = hashCode * 31 ^ p.hashCode();
}
}
return hashCode;
}
protected static int calculateReturnStatesHashCode(int[] returnStates) {
int hashCode = 1;
for (int state : returnStates) {
hashCode = hashCode * 31 ^ state;
}
return hashCode;
}
@Override
public Iterator<SingletonPredictionContext> iterator() {
return new Iterator<SingletonPredictionContext>() {

View File

@ -30,6 +30,7 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -84,8 +85,13 @@ public class LexerATNConfig extends ATNConfig {
@Override
public int hashCode() {
int hashCode = super.hashCode();
hashCode = 35 * hashCode ^ (passedThroughNonGreedyDecision ? 1 : 0);
int hashCode = MurmurHash.initialize(7);
hashCode = MurmurHash.update(hashCode, state.stateNumber);
hashCode = MurmurHash.update(hashCode, alt);
hashCode = MurmurHash.update(hashCode, context);
hashCode = MurmurHash.update(hashCode, semanticContext);
hashCode = MurmurHash.update(hashCode, passedThroughNonGreedyDecision ? 1 : 0);
hashCode = MurmurHash.finish(hashCode, 5);
return hashCode;
}

View File

@ -33,6 +33,7 @@ package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.misc.DoubleKeyMap;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -61,9 +62,32 @@ public abstract class PredictionContext implements Iterable<SingletonPredictionC
*/
public static final int EMPTY_RETURN_STATE = Integer.MAX_VALUE;
private static final int INITIAL_HASH = 1;
public static int globalNodeCount = 0;
public final int id = globalNodeCount++;
/**
* Stores the computed hash code of this {@link PredictionContext}. The hash
* code is computed in parts to match the following reference algorithm.
*
* <pre>
* private int referenceHashCode() {
* int hash = {@link MurmurHash#initialize}({@link #INITIAL_HASH});
*
* for (int i = 0; i < {@link #size()}; i++) {
* hash = {@link MurmurHash#update}(hash, {@link #getParent}(i));
* }
*
* for (int i = 0; i < {@link #size()}; i++) {
* hash = {@link MurmurHash#update}(hash, {@link #getReturnState}(i));
* }
*
* hash = {@link MurmurHash#finish}(hash, 2 * {@link #size()});
* return hash;
* }
* </pre>
*/
public final int cachedHashCode;
protected PredictionContext(int cachedHashCode) {
@ -117,12 +141,40 @@ public abstract class PredictionContext implements Iterable<SingletonPredictionC
}
@Override
public int hashCode() {
public final int hashCode() {
return cachedHashCode;
}
protected static int calculateHashCode(int parentHashCode, int returnStateHashCode) {
return 5 * 5 * 7 + 5 * parentHashCode + returnStateHashCode;
@Override
public abstract boolean equals(Object obj);
protected static int calculateEmptyHashCode() {
int hash = MurmurHash.initialize(INITIAL_HASH);
hash = MurmurHash.finish(hash, 0);
return hash;
}
protected static int calculateHashCode(PredictionContext parent, int returnState) {
int hash = MurmurHash.initialize(INITIAL_HASH);
hash = MurmurHash.update(hash, parent);
hash = MurmurHash.update(hash, returnState);
hash = MurmurHash.finish(hash, 2);
return hash;
}
protected static int calculateHashCode(PredictionContext[] parents, int[] returnStates) {
int hash = MurmurHash.initialize(INITIAL_HASH);
for (PredictionContext parent : parents) {
hash = MurmurHash.update(hash, parent);
}
for (int returnState : returnStates) {
hash = MurmurHash.update(hash, returnState);
}
hash = MurmurHash.finish(hash, 2 * parents.length);
return hash;
}
// dispatch

View File

@ -32,6 +32,7 @@ package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.AbstractEqualityComparator;
import org.antlr.v4.runtime.misc.FlexibleHashMap;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.BitSet;
@ -79,9 +80,10 @@ public enum PredictionMode {
/** Code is function of (s, _, ctx, _) */
@Override
public int hashCode(ATNConfig o) {
int hashCode = 7;
hashCode = 31 * hashCode + o.state.stateNumber;
hashCode = 31 * hashCode + o.context.hashCode();
int hashCode = MurmurHash.initialize(7);
hashCode = MurmurHash.update(hashCode, o.state.stateNumber);
hashCode = MurmurHash.update(hashCode, o.context);
hashCode = MurmurHash.finish(hashCode, 2);
return hashCode;
}
@ -89,7 +91,6 @@ public enum PredictionMode {
public boolean equals(ATNConfig a, ATNConfig b) {
if ( a==b ) return true;
if ( a==null || b==null ) return false;
if ( hashCode(a) != hashCode(b) ) return false;
return a.state.stateNumber==b.state.stateNumber
&& a.context.equals(b.context);
}

View File

@ -32,6 +32,7 @@ package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Utils;
@ -91,10 +92,11 @@ public abstract class SemanticContext {
@Override
public int hashCode() {
int hashCode = 1;
hashCode = 31 * hashCode + ruleIndex;
hashCode = 31 * hashCode + predIndex;
hashCode = 31 * hashCode + (isCtxDependent ? 1 : 0);
int hashCode = MurmurHash.initialize();
hashCode = MurmurHash.update(hashCode, ruleIndex);
hashCode = MurmurHash.update(hashCode, predIndex);
hashCode = MurmurHash.update(hashCode, isCtxDependent ? 1 : 0);
hashCode = MurmurHash.finish(hashCode, 3);
return hashCode;
}
@ -137,7 +139,7 @@ public abstract class SemanticContext {
@Override
public int hashCode() {
return Arrays.hashCode(opnds);
return MurmurHash.hashCode(opnds, AND.class.hashCode());
}
@Override
@ -177,7 +179,7 @@ public abstract class SemanticContext {
@Override
public int hashCode() {
return Arrays.hashCode(opnds) + 1; // differ from AND slightly
return MurmurHash.hashCode(opnds, OR.class.hashCode());
}
@Override

View File

@ -37,8 +37,7 @@ public class SingletonPredictionContext extends PredictionContext {
public final int returnState;
SingletonPredictionContext(PredictionContext parent, int returnState) {
super(calculateHashCode(parent!=null ? 31 ^ parent.hashCode() : 1,
31 ^ returnState));
super(parent != null ? calculateHashCode(parent, returnState) : calculateEmptyHashCode());
assert returnState!=ATNState.INVALID_STATE_NUMBER;
this.parent = parent;
this.returnState = returnState;

View File

@ -36,6 +36,7 @@ import org.antlr.v4.runtime.atn.ATNConfig;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.SemanticContext;
import org.antlr.v4.runtime.misc.MurmurHash;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
@ -150,11 +151,10 @@ public class DFAState {
@Override
public int hashCode() {
int h = 7;
if ( configs!=null ) {
h = h * 31 ^ configs.hashCode();
}
return h;
int hash = MurmurHash.initialize(7);
hash = MurmurHash.update(hash, configs.hashCode());
hash = MurmurHash.finish(hash, 1);
return hash;
}
/**

View File

@ -136,15 +136,17 @@ public class Array2DHashSet<T> implements Set<T> {
@Override
public int hashCode() {
int h = 0;
int hash = MurmurHash.initialize();
for (T[] bucket : buckets) {
if ( bucket==null ) continue;
for (T o : bucket) {
if ( o==null ) break;
h += comparator.hashCode(o);
hash = MurmurHash.update(hash, comparator.hashCode(o));
}
}
return h;
hash = MurmurHash.finish(hash, size());
return hash;
}
@Override

View File

@ -183,15 +183,17 @@ public class FlexibleHashMap<K,V> implements Map<K, V> {
@Override
public int hashCode() {
int h = 0;
int hash = MurmurHash.initialize();
for (LinkedList<Entry<K, V>> bucket : buckets) {
if ( bucket==null ) continue;
for (Entry<K, V> e : bucket) {
if ( e==null ) break;
h += comparator.hashCode(e.key);
hash = MurmurHash.update(hash, comparator.hashCode(e.key));
}
}
return h;
hash = MurmurHash.finish(hash, size());
return hash;
}
@Override

View File

@ -424,11 +424,14 @@ public class IntervalSet implements IntSet {
@Override
public int hashCode() {
if ( isNil() ) return 0;
int n = 0;
// just add left edge of intervals
for (Interval I : intervals) n += I.a;
return n;
int hash = MurmurHash.initialize();
for (Interval I : intervals) {
hash = MurmurHash.update(hash, I.a);
hash = MurmurHash.update(hash, I.b);
}
hash = MurmurHash.finish(hash, intervals.size() * 2);
return hash;
}
/** Are two IntervalSets equal? Because all intervals are sorted

View File

@ -0,0 +1,137 @@
/*
* [The "BSD license"]
* Copyright (c) 2012 Terence Parr
* Copyright (c) 2012 Sam Harwell
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.misc;
/**
*
* @author Sam Harwell
*/
public final class MurmurHash {
private static final int DEFAULT_SEED = 0;
/**
* Initialize the hash using the default seed value.
*
* @return the intermediate hash value
*/
public static int initialize() {
return initialize(DEFAULT_SEED);
}
/**
* Initialize the hash using the specified {@code seed}.
*
* @param seed the seed
* @return the intermediate hash value
*/
public static int initialize(int seed) {
return seed;
}
/**
* Update the intermediate hash value for the next input {@code value}.
*
* @param hash the intermediate hash value
* @param value the value to add to the current hash
* @return the updated intermediate hash value
*/
public static int update(int hash, int value) {
final int c1 = 0xCC9E2D51;
final int c2 = 0x1B873593;
final int r1 = 15;
final int r2 = 13;
final int m = 5;
final int n = 0xE6546B64;
int k = value;
k = k * c1;
k = (k << r1) | (k >>> (32 - r1));
k = k * c2;
hash = hash ^ k;
hash = (hash << r2) | (hash >>> (32 - r2));
hash = hash * m + n;
return hash;
}
/**
* Update the intermediate hash value for the next input {@code value}.
*
* @param hash the intermediate hash value
* @param value the value to add to the current hash
* @return the updated intermediate hash value
*/
public static int update(int hash, Object value) {
return update(hash, value != null ? value.hashCode() : 0);
}
/**
* Apply the final computation steps to the intermediate value {@code hash}
* to form the final result of the MurmurHash 3 hash function.
*
* @param hash the intermediate hash value
* @param numberOfWords the number of integer values added to the hash
* @return the final hash result
*/
public static int finish(int hash, int numberOfWords) {
hash = hash ^ (numberOfWords * 4);
hash = hash ^ (hash >>> 16);
hash = hash * 0x85EBCA6B;
hash = hash ^ (hash >>> 13);
hash = hash * 0xC2B2AE35;
hash = hash ^ (hash >>> 16);
return hash;
}
/**
* Utility function to compute the hash code of an array using the
* MurmurHash algorithm.
*
* @param <T> the array element type
* @param data the array data
* @param seed the seed for the MurmurHash algorithm
* @return the hash code of the data
*/
public static <T> int hashCode(T[] data, int seed) {
int hash = initialize(seed);
for (T value : data) {
hash = update(hash, value);
}
hash = finish(hash, data.length);
return hash;
}
private MurmurHash() {
}
}

View File

@ -31,6 +31,7 @@
package org.antlr.v4.codegen.model.decl;
import org.antlr.v4.codegen.OutputModelFactory;
import org.antlr.v4.runtime.misc.MurmurHash;
public abstract class ContextGetterDecl extends Decl {
public ContextGetterDecl(OutputModelFactory factory, String name) {
@ -44,7 +45,11 @@ public abstract class ContextGetterDecl extends Decl {
@Override
public int hashCode() {
return name.hashCode() + getArgType().hashCode();
int hash = MurmurHash.initialize();
hash = MurmurHash.update(hash, name);
hash = MurmurHash.update(hash, getArgType());
hash = MurmurHash.finish(hash, 2);
return hash;
}
/** Make sure that a getter does not equal a label. X() and X are ok.