From fc21b41afb93c197b8c371490c12bb1e87b72c87 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Sun, 5 May 2013 13:24:19 -0500 Subject: [PATCH 1/2] Update PredictionContext to use the MurmurHash 3 hash algorithm --- .../runtime/atn/ArrayPredictionContext.java | 43 ------- .../v4/runtime/atn/PredictionContext.java | 58 ++++++++- .../atn/SingletonPredictionContext.java | 3 +- .../org/antlr/v4/runtime/misc/MurmurHash.java | 119 ++++++++++++++++++ 4 files changed, 175 insertions(+), 48 deletions(-) create mode 100644 runtime/Java/src/org/antlr/v4/runtime/misc/MurmurHash.java diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ArrayPredictionContext.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ArrayPredictionContext.java index 0d1835649..60dd4303d 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ArrayPredictionContext.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ArrayPredictionContext.java @@ -58,49 +58,6 @@ public class ArrayPredictionContext extends PredictionContext { this.returnStates = returnStates; } -//ArrayPredictionContext(@NotNull PredictionContext[] parents, int[] returnStates, int parentHashCode, int returnStateHashCode) { -// super(calculateHashCode(parentHashCode, returnStateHashCode)); -// assert parents.length == returnStates.length; -// assert returnStates.length > 1 || returnStates[0] != EMPTY_FULL_STATE_KEY : "Should be using PredictionContext.EMPTY instead."; -// -// this.parents = parents; -// this.returnStates = returnStates; -// } -// -//ArrayPredictionContext(@NotNull PredictionContext[] parents, int[] returnStates, int hashCode) { -// super(hashCode); -// assert parents.length == returnStates.length; -// assert returnStates.length > 1 || returnStates[0] != EMPTY_FULL_STATE_KEY : "Should be using PredictionContext.EMPTY instead."; -// -// this.parents = parents; -// this.returnStates = returnStates; -// } - - protected static int calculateHashCode(PredictionContext[] parents, int[] returnStates) { - return calculateHashCode(calculateParentHashCode(parents), - calculateReturnStatesHashCode(returnStates)); - } - - protected static int calculateParentHashCode(PredictionContext[] parents) { - int hashCode = 1; - for (PredictionContext p : parents) { - if ( p!=null ) { // can be null for full ctx stack in ArrayPredictionContext - hashCode = hashCode * 31 ^ p.hashCode(); - } - } - - return hashCode; - } - - protected static int calculateReturnStatesHashCode(int[] returnStates) { - int hashCode = 1; - for (int state : returnStates) { - hashCode = hashCode * 31 ^ state; - } - - return hashCode; - } - @Override public Iterator iterator() { return new Iterator() { diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java index b8b7a7d3e..c93088b5f 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionContext.java @@ -33,6 +33,7 @@ package org.antlr.v4.runtime.atn; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.RuleContext; import org.antlr.v4.runtime.misc.DoubleKeyMap; +import org.antlr.v4.runtime.misc.MurmurHash; import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; @@ -61,9 +62,32 @@ public abstract class PredictionContext implements Iterable + * private int referenceHashCode() { + * int hash = {@link MurmurHash#initialize}({@link #INITIAL_HASH}); + * + * for (int i = 0; i < {@link #size()}; i++) { + * hash = {@link MurmurHash#update}(hash, {@link #getParent}(i)); + * } + * + * for (int i = 0; i < {@link #size()}; i++) { + * hash = {@link MurmurHash#update}(hash, {@link #getReturnState}(i)); + * } + * + * hash = {@link MurmurHash#finish}(hash, 2 * {@link #size()}); + * return hash; + * } + * + */ public final int cachedHashCode; protected PredictionContext(int cachedHashCode) { @@ -117,12 +141,40 @@ public abstract class PredictionContext implements Iterable>> (32 - r1)); + k = k * c2; + + hash = hash ^ k; + hash = (hash << r2) | (hash >>> (32 - r2)); + hash = hash * m + n; + + return hash; + } + + /** + * Update the intermediate hash value for the next input {@code value}. + * + * @param hash the intermediate hash value + * @param value the value to add to the current hash + * @return the updated intermediate hash value + */ + public static int update(int hash, Object value) { + return update(hash, value != null ? value.hashCode() : 0); + } + + /** + * Apply the final computation steps to the intermediate value {@code hash} + * to form the final result of the MurmurHash 3 hash function. + * + * @param hash the intermediate hash value + * @param numberOfWords the number of integer values added to the hash + * @return the final hash result + */ + public static int finish(int hash, int numberOfWords) { + hash = hash ^ (numberOfWords * 4); + hash = hash ^ (hash >>> 16); + hash = hash * 0x85EBCA6B; + hash = hash ^ (hash >>> 13); + hash = hash * 0xC2B2AE35; + hash = hash ^ (hash >>> 16); + return hash; + } + + private MurmurHash() { + } + +} From d67d924b0d36a4f84494b79abbb000833749f130 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Sun, 5 May 2013 14:03:48 -0500 Subject: [PATCH 2/2] Update additional classes to use MurmurHash hashing --- .../org/antlr/v4/runtime/atn/ATNConfig.java | 16 ++++++++------- .../antlr/v4/runtime/atn/LexerATNConfig.java | 10 ++++++++-- .../antlr/v4/runtime/atn/PredictionMode.java | 9 +++++---- .../antlr/v4/runtime/atn/SemanticContext.java | 14 +++++++------ .../org/antlr/v4/runtime/dfa/DFAState.java | 10 +++++----- .../antlr/v4/runtime/misc/Array2DHashSet.java | 8 +++++--- .../v4/runtime/misc/FlexibleHashMap.java | 8 +++++--- .../antlr/v4/runtime/misc/IntervalSet.java | 13 +++++++----- .../org/antlr/v4/runtime/misc/MurmurHash.java | 20 ++++++++++++++++++- .../codegen/model/decl/ContextGetterDecl.java | 7 ++++++- 10 files changed, 78 insertions(+), 37 deletions(-) diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java index 0e5b3ddfa..95af660e5 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/ATNConfig.java @@ -31,6 +31,7 @@ package org.antlr.v4.runtime.atn; import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.misc.MurmurHash; import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; @@ -159,13 +160,14 @@ public class ATNConfig { @Override public int hashCode() { - int hashCode = 7; - hashCode = 5 * hashCode + state.stateNumber; - hashCode = 5 * hashCode + alt; - hashCode = 5 * hashCode + (context != null ? context.hashCode() : 0); - hashCode = 5 * hashCode + semanticContext.hashCode(); - return hashCode; - } + int hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, state.stateNumber); + hashCode = MurmurHash.update(hashCode, alt); + hashCode = MurmurHash.update(hashCode, context); + hashCode = MurmurHash.update(hashCode, semanticContext); + hashCode = MurmurHash.finish(hashCode, 4); + return hashCode; + } @Override public String toString() { diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java index 368eebc7c..8cd79cabf 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNConfig.java @@ -30,6 +30,7 @@ package org.antlr.v4.runtime.atn; +import org.antlr.v4.runtime.misc.MurmurHash; import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; @@ -84,8 +85,13 @@ public class LexerATNConfig extends ATNConfig { @Override public int hashCode() { - int hashCode = super.hashCode(); - hashCode = 35 * hashCode ^ (passedThroughNonGreedyDecision ? 1 : 0); + int hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, state.stateNumber); + hashCode = MurmurHash.update(hashCode, alt); + hashCode = MurmurHash.update(hashCode, context); + hashCode = MurmurHash.update(hashCode, semanticContext); + hashCode = MurmurHash.update(hashCode, passedThroughNonGreedyDecision ? 1 : 0); + hashCode = MurmurHash.finish(hashCode, 5); return hashCode; } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java index dbcecd6f0..246f55d11 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/PredictionMode.java @@ -32,6 +32,7 @@ package org.antlr.v4.runtime.atn; import org.antlr.v4.runtime.misc.AbstractEqualityComparator; import org.antlr.v4.runtime.misc.FlexibleHashMap; +import org.antlr.v4.runtime.misc.MurmurHash; import org.antlr.v4.runtime.misc.NotNull; import java.util.BitSet; @@ -79,9 +80,10 @@ public enum PredictionMode { /** Code is function of (s, _, ctx, _) */ @Override public int hashCode(ATNConfig o) { - int hashCode = 7; - hashCode = 31 * hashCode + o.state.stateNumber; - hashCode = 31 * hashCode + o.context.hashCode(); + int hashCode = MurmurHash.initialize(7); + hashCode = MurmurHash.update(hashCode, o.state.stateNumber); + hashCode = MurmurHash.update(hashCode, o.context); + hashCode = MurmurHash.finish(hashCode, 2); return hashCode; } @@ -89,7 +91,6 @@ public enum PredictionMode { public boolean equals(ATNConfig a, ATNConfig b) { if ( a==b ) return true; if ( a==null || b==null ) return false; - if ( hashCode(a) != hashCode(b) ) return false; return a.state.stateNumber==b.state.stateNumber && a.context.equals(b.context); } diff --git a/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java b/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java index 3bc10ab48..40f4c898f 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java +++ b/runtime/Java/src/org/antlr/v4/runtime/atn/SemanticContext.java @@ -32,6 +32,7 @@ package org.antlr.v4.runtime.atn; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.RuleContext; +import org.antlr.v4.runtime.misc.MurmurHash; import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Utils; @@ -91,10 +92,11 @@ public abstract class SemanticContext { @Override public int hashCode() { - int hashCode = 1; - hashCode = 31 * hashCode + ruleIndex; - hashCode = 31 * hashCode + predIndex; - hashCode = 31 * hashCode + (isCtxDependent ? 1 : 0); + int hashCode = MurmurHash.initialize(); + hashCode = MurmurHash.update(hashCode, ruleIndex); + hashCode = MurmurHash.update(hashCode, predIndex); + hashCode = MurmurHash.update(hashCode, isCtxDependent ? 1 : 0); + hashCode = MurmurHash.finish(hashCode, 3); return hashCode; } @@ -137,7 +139,7 @@ public abstract class SemanticContext { @Override public int hashCode() { - return Arrays.hashCode(opnds); + return MurmurHash.hashCode(opnds, AND.class.hashCode()); } @Override @@ -177,7 +179,7 @@ public abstract class SemanticContext { @Override public int hashCode() { - return Arrays.hashCode(opnds) + 1; // differ from AND slightly + return MurmurHash.hashCode(opnds, OR.class.hashCode()); } @Override diff --git a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java index 5cd35e58b..4cf7579d1 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java +++ b/runtime/Java/src/org/antlr/v4/runtime/dfa/DFAState.java @@ -36,6 +36,7 @@ import org.antlr.v4.runtime.atn.ATNConfig; import org.antlr.v4.runtime.atn.ATNConfigSet; import org.antlr.v4.runtime.atn.ParserATNSimulator; import org.antlr.v4.runtime.atn.SemanticContext; +import org.antlr.v4.runtime.misc.MurmurHash; import org.antlr.v4.runtime.misc.NotNull; import org.antlr.v4.runtime.misc.Nullable; @@ -150,11 +151,10 @@ public class DFAState { @Override public int hashCode() { - int h = 7; - if ( configs!=null ) { - h = h * 31 ^ configs.hashCode(); - } - return h; + int hash = MurmurHash.initialize(7); + hash = MurmurHash.update(hash, configs.hashCode()); + hash = MurmurHash.finish(hash, 1); + return hash; } /** diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/Array2DHashSet.java b/runtime/Java/src/org/antlr/v4/runtime/misc/Array2DHashSet.java index 3c3cc4f6c..49ebd1ced 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/Array2DHashSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/Array2DHashSet.java @@ -136,15 +136,17 @@ public class Array2DHashSet implements Set { @Override public int hashCode() { - int h = 0; + int hash = MurmurHash.initialize(); for (T[] bucket : buckets) { if ( bucket==null ) continue; for (T o : bucket) { if ( o==null ) break; - h += comparator.hashCode(o); + hash = MurmurHash.update(hash, comparator.hashCode(o)); } } - return h; + + hash = MurmurHash.finish(hash, size()); + return hash; } @Override diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/FlexibleHashMap.java b/runtime/Java/src/org/antlr/v4/runtime/misc/FlexibleHashMap.java index ae24920ec..7f9e86898 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/FlexibleHashMap.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/FlexibleHashMap.java @@ -183,15 +183,17 @@ public class FlexibleHashMap implements Map { @Override public int hashCode() { - int h = 0; + int hash = MurmurHash.initialize(); for (LinkedList> bucket : buckets) { if ( bucket==null ) continue; for (Entry e : bucket) { if ( e==null ) break; - h += comparator.hashCode(e.key); + hash = MurmurHash.update(hash, comparator.hashCode(e.key)); } } - return h; + + hash = MurmurHash.finish(hash, size()); + return hash; } @Override diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java index 58a2144b0..007680fe1 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/IntervalSet.java @@ -424,11 +424,14 @@ public class IntervalSet implements IntSet { @Override public int hashCode() { - if ( isNil() ) return 0; - int n = 0; - // just add left edge of intervals - for (Interval I : intervals) n += I.a; - return n; + int hash = MurmurHash.initialize(); + for (Interval I : intervals) { + hash = MurmurHash.update(hash, I.a); + hash = MurmurHash.update(hash, I.b); + } + + hash = MurmurHash.finish(hash, intervals.size() * 2); + return hash; } /** Are two IntervalSets equal? Because all intervals are sorted diff --git a/runtime/Java/src/org/antlr/v4/runtime/misc/MurmurHash.java b/runtime/Java/src/org/antlr/v4/runtime/misc/MurmurHash.java index c429092b7..4d5d0ea66 100644 --- a/runtime/Java/src/org/antlr/v4/runtime/misc/MurmurHash.java +++ b/runtime/Java/src/org/antlr/v4/runtime/misc/MurmurHash.java @@ -113,7 +113,25 @@ public final class MurmurHash { return hash; } - private MurmurHash() { + /** + * Utility function to compute the hash code of an array using the + * MurmurHash algorithm. + * + * @param the array element type + * @param data the array data + * @param seed the seed for the MurmurHash algorithm + * @return the hash code of the data + */ + public static int hashCode(T[] data, int seed) { + int hash = initialize(seed); + for (T value : data) { + hash = update(hash, value); + } + + hash = finish(hash, data.length); + return hash; } + private MurmurHash() { + } } diff --git a/tool/src/org/antlr/v4/codegen/model/decl/ContextGetterDecl.java b/tool/src/org/antlr/v4/codegen/model/decl/ContextGetterDecl.java index 3977e0c16..99ceef298 100644 --- a/tool/src/org/antlr/v4/codegen/model/decl/ContextGetterDecl.java +++ b/tool/src/org/antlr/v4/codegen/model/decl/ContextGetterDecl.java @@ -31,6 +31,7 @@ package org.antlr.v4.codegen.model.decl; import org.antlr.v4.codegen.OutputModelFactory; +import org.antlr.v4.runtime.misc.MurmurHash; public abstract class ContextGetterDecl extends Decl { public ContextGetterDecl(OutputModelFactory factory, String name) { @@ -44,7 +45,11 @@ public abstract class ContextGetterDecl extends Decl { @Override public int hashCode() { - return name.hashCode() + getArgType().hashCode(); + int hash = MurmurHash.initialize(); + hash = MurmurHash.update(hash, name); + hash = MurmurHash.update(hash, getArgType()); + hash = MurmurHash.finish(hash, 2); + return hash; } /** Make sure that a getter does not equal a label. X() and X are ok.