From 70db334a58b1391b3395bd3ddbc4c16a91b11e0d Mon Sep 17 00:00:00 2001 From: Ewan Mellor Date: Wed, 27 Sep 2017 12:47:17 -0700 Subject: [PATCH] Improve MurmurHash in a number of ways: Use Swift's overflowing operators rather than multipliedReportingOverflow etc. Use UInt32 for the hash values. This matches how MurmurHash3 is generally defined (e.g. on Wikipedia). Add support for decoding Strings (UTF-8, then little-endian) and hashing the resultant UInt32 values. Add a test set, using test patterns from Ian Boyd (public domain). --- .../Swift/Sources/Antlr4/atn/ATNConfig.swift | 5 +- .../Sources/Antlr4/atn/ATNConfigSet.swift | 9 +- .../Sources/Antlr4/atn/LexerATNConfig.swift | 5 +- .../Antlr4/atn/LexerActionExecutor.swift | 2 +- .../Antlr4/atn/LexerChannelAction.swift | 2 +- .../Antlr4/atn/LexerCustomAction.swift | 2 +- .../Antlr4/atn/LexerIndexedCustomAction.swift | 2 +- .../Sources/Antlr4/atn/LexerModeAction.swift | 2 +- .../Sources/Antlr4/atn/LexerMoreAction.swift | 2 +- .../Antlr4/atn/LexerPopModeAction.swift | 2 +- .../Antlr4/atn/LexerPushModeAction.swift | 2 +- .../Sources/Antlr4/atn/LexerSkipAction.swift | 2 +- .../Sources/Antlr4/atn/LexerTypeAction.swift | 2 +- .../Antlr4/atn/PredictionContext.swift | 17 +-- .../Sources/Antlr4/atn/SemanticContext.swift | 5 +- .../Swift/Sources/Antlr4/dfa/DFAState.swift | 5 +- .../Sources/Antlr4/misc/IntervalSet.swift | 10 +- .../Sources/Antlr4/misc/MurmurHash.swift | 134 ++++++++++++------ .../Swift/Sources/Antlr4/misc/Triple.swift | 2 +- .../Tests/Antlr4Tests/MurmurHashTests.swift | 47 ++++++ 20 files changed, 168 insertions(+), 91 deletions(-) create mode 100644 runtime/Swift/Tests/Antlr4Tests/MurmurHashTests.swift diff --git a/runtime/Swift/Sources/Antlr4/atn/ATNConfig.swift b/runtime/Swift/Sources/Antlr4/atn/ATNConfig.swift index 75bff30e4..f91854531 100644 --- a/runtime/Swift/Sources/Antlr4/atn/ATNConfig.swift +++ b/runtime/Swift/Sources/Antlr4/atn/ATNConfig.swift @@ -150,13 +150,12 @@ public class ATNConfig: Hashable, CustomStringConvertible { /// public var hashValue: Int { - var hashCode: Int = MurmurHash.initialize(7) + var hashCode = MurmurHash.initialize(7) hashCode = MurmurHash.update(hashCode, state.stateNumber) hashCode = MurmurHash.update(hashCode, alt) hashCode = MurmurHash.update(hashCode, context) hashCode = MurmurHash.update(hashCode, semanticContext) - hashCode = MurmurHash.finish(hashCode, 4) - return hashCode + return MurmurHash.finish(hashCode, 4) } diff --git a/runtime/Swift/Sources/Antlr4/atn/ATNConfigSet.swift b/runtime/Swift/Sources/Antlr4/atn/ATNConfigSet.swift index 803d9c369..59c44367b 100644 --- a/runtime/Swift/Sources/Antlr4/atn/ATNConfigSet.swift +++ b/runtime/Swift/Sources/Antlr4/atn/ATNConfigSet.swift @@ -313,15 +313,12 @@ public class ATNConfigSet: Hashable, CustomStringConvertible { /// return configLookup.toArray(a); /// private final func configHash(_ stateNumber: Int,_ context: PredictionContext?) -> Int{ - - var hashCode: Int = MurmurHash.initialize(7) + var hashCode = MurmurHash.initialize(7) hashCode = MurmurHash.update(hashCode, stateNumber) hashCode = MurmurHash.update(hashCode, context) - hashCode = MurmurHash.finish(hashCode, 2) - - return hashCode - + return MurmurHash.finish(hashCode, 2) } + public final func getConflictingAltSubsets() throws -> Array { let length = configs.count let configToAlts: HashMap = HashMap(count: length) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerATNConfig.swift b/runtime/Swift/Sources/Antlr4/atn/LexerATNConfig.swift index c39a25249..3cacb48cb 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerATNConfig.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerATNConfig.swift @@ -77,15 +77,14 @@ public class LexerATNConfig: ATNConfig { }*/ public var hashValue: Int { - var hashCode: Int = MurmurHash.initialize(7) + var hashCode = MurmurHash.initialize(7) hashCode = MurmurHash.update(hashCode, state.stateNumber) hashCode = MurmurHash.update(hashCode, alt) hashCode = MurmurHash.update(hashCode, context) hashCode = MurmurHash.update(hashCode, semanticContext) hashCode = MurmurHash.update(hashCode, passedThroughNonGreedyDecision ? 1 : 0) hashCode = MurmurHash.update(hashCode, lexerActionExecutor) - hashCode = MurmurHash.finish(hashCode, 6) - return hashCode + return MurmurHash.finish(hashCode, 6) } diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerActionExecutor.swift b/runtime/Swift/Sources/Antlr4/atn/LexerActionExecutor.swift index 5079df2d7..e33e92e9b 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerActionExecutor.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerActionExecutor.swift @@ -36,7 +36,7 @@ public class LexerActionExecutor: Hashable { public init(_ lexerActions: [LexerAction]) { self.lexerActions = lexerActions - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() for lexerAction: LexerAction in lexerActions { hash = MurmurHash.update(hash, lexerAction) } diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerChannelAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerChannelAction.swift index 22cde7cf7..4d099f28c 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerChannelAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerChannelAction.swift @@ -65,7 +65,7 @@ public final class LexerChannelAction: LexerAction, CustomStringConvertible { override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) hash = MurmurHash.update(hash, channel) return MurmurHash.finish(hash, 2) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerCustomAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerCustomAction.swift index 35e6afed0..3fe954236 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerCustomAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerCustomAction.swift @@ -94,7 +94,7 @@ public final class LexerCustomAction: LexerAction { override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) hash = MurmurHash.update(hash, ruleIndex) hash = MurmurHash.update(hash, actionIndex) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerIndexedCustomAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerIndexedCustomAction.swift index 91bb6a50c..501b2e637 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerIndexedCustomAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerIndexedCustomAction.swift @@ -97,7 +97,7 @@ public final class LexerIndexedCustomAction: LexerAction { public override var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, offset) hash = MurmurHash.update(hash, action) return MurmurHash.finish(hash, 2) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerModeAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerModeAction.swift index 81d89c285..7bab1e713 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerModeAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerModeAction.swift @@ -64,7 +64,7 @@ public final class LexerModeAction: LexerAction, CustomStringConvertible { } override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) hash = MurmurHash.update(hash, mode) return MurmurHash.finish(hash, 2) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerMoreAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerMoreAction.swift index f393d224f..bb9f197f3 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerMoreAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerMoreAction.swift @@ -58,7 +58,7 @@ public final class LexerMoreAction: LexerAction, CustomStringConvertible { override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) return MurmurHash.finish(hash, 1) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerPopModeAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerPopModeAction.swift index 33fa445b3..f35e78304 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerPopModeAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerPopModeAction.swift @@ -59,7 +59,7 @@ public final class LexerPopModeAction: LexerAction, CustomStringConvertible { override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) return MurmurHash.finish(hash, 1) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerPushModeAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerPushModeAction.swift index adfd40994..33d5bf79b 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerPushModeAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerPushModeAction.swift @@ -66,7 +66,7 @@ public final class LexerPushModeAction: LexerAction, CustomStringConvertible { override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) hash = MurmurHash.update(hash, mode) return MurmurHash.finish(hash, 2) diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerSkipAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerSkipAction.swift index 5c6907dc7..bbdd06d2f 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerSkipAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerSkipAction.swift @@ -58,7 +58,7 @@ public final class LexerSkipAction: LexerAction, CustomStringConvertible { override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) return MurmurHash.finish(hash, 1) } diff --git a/runtime/Swift/Sources/Antlr4/atn/LexerTypeAction.swift b/runtime/Swift/Sources/Antlr4/atn/LexerTypeAction.swift index 33a116fc1..10b41698f 100644 --- a/runtime/Swift/Sources/Antlr4/atn/LexerTypeAction.swift +++ b/runtime/Swift/Sources/Antlr4/atn/LexerTypeAction.swift @@ -64,7 +64,7 @@ public class LexerTypeAction: LexerAction, CustomStringConvertible { override public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, getActionType().rawValue) hash = MurmurHash.update(hash, type) return MurmurHash.finish(hash, 2) diff --git a/runtime/Swift/Sources/Antlr4/atn/PredictionContext.swift b/runtime/Swift/Sources/Antlr4/atn/PredictionContext.swift index 783a1ab5b..9b1096ca9 100644 --- a/runtime/Swift/Sources/Antlr4/atn/PredictionContext.swift +++ b/runtime/Swift/Sources/Antlr4/atn/PredictionContext.swift @@ -21,7 +21,7 @@ public class PredictionContext: Hashable, CustomStringConvertible { /// public static let EMPTY_RETURN_STATE: Int = Int(Int32.max) - private static let INITIAL_HASH: Int = 1 + private static let INITIAL_HASH = UInt32(1) public static var globalNodeCount: Int = 0 public final let id: Int = { @@ -118,21 +118,19 @@ public class PredictionContext: Hashable, CustomStringConvertible { } static func calculateEmptyHashCode() -> Int { - var hash: Int = MurmurHash.initialize(INITIAL_HASH) - hash = MurmurHash.finish(hash, 0) - return hash + let hash = MurmurHash.initialize(INITIAL_HASH) + return MurmurHash.finish(hash, 0) } static func calculateHashCode(_ parent: PredictionContext?, _ returnState: Int) -> Int { - var hash: Int = MurmurHash.initialize(INITIAL_HASH) + var hash = MurmurHash.initialize(INITIAL_HASH) hash = MurmurHash.update(hash, parent) hash = MurmurHash.update(hash, returnState) - hash = MurmurHash.finish(hash, 2) - return hash + return MurmurHash.finish(hash, 2) } static func calculateHashCode(_ parents: [PredictionContext?], _ returnStates: [Int]) -> Int { - var hash: Int = MurmurHash.initialize(INITIAL_HASH) + var hash = MurmurHash.initialize(INITIAL_HASH) var length = parents.count for i in 0.. Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() for I: Interval in intervals { hash = MurmurHash.update(hash, I.a) hash = MurmurHash.update(hash, I.b) } - hash = MurmurHash.finish(hash, intervals.count * 2) - return hash + return MurmurHash.finish(hash, intervals.count * 2) } public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() for I: Interval in intervals { hash = MurmurHash.update(hash, I.a) hash = MurmurHash.update(hash, I.b) } - hash = MurmurHash.finish(hash, intervals.count * 2) - return hash + return MurmurHash.finish(hash, intervals.count * 2) } /// /// Are two IntervalSets equal? Because all intervals are sorted diff --git a/runtime/Swift/Sources/Antlr4/misc/MurmurHash.swift b/runtime/Swift/Sources/Antlr4/misc/MurmurHash.swift index 1bdbcd426..1386e8f09 100644 --- a/runtime/Swift/Sources/Antlr4/misc/MurmurHash.swift +++ b/runtime/Swift/Sources/Antlr4/misc/MurmurHash.swift @@ -6,20 +6,28 @@ /// +/// https://en.wikipedia.org/wiki/MurmurHash /// /// - Author: Sam Harwell /// public final class MurmurHash { - private static let DEFAULT_SEED: Int = 0 + private static let DEFAULT_SEED: UInt32 = 0 + + private static let c1 = UInt32(0xCC9E2D51) + private static let c2 = UInt32(0x1B873593) + private static let r1 = UInt32(15) + private static let r2 = UInt32(13) + private static let m = UInt32(5) + private static let n = UInt32(0xE6546B64) /// /// Initialize the hash using the default seed value. /// /// - Returns: the intermediate hash value /// - public static func initialize() -> Int { + public static func initialize() -> UInt32 { return initialize(DEFAULT_SEED) } @@ -29,10 +37,18 @@ public final class MurmurHash { /// - Parameter seed: the seed /// - Returns: the intermediate hash value /// - public static func initialize(_ seed: Int) -> Int { + public static func initialize(_ seed: UInt32) -> UInt32 { return seed } + private static func calcK(_ value: UInt32) -> UInt32 { + var k = value + k = k &* c1 + k = (k << r1) | (k >> (32 - r1)) + k = k &* c2 + return k + } + /// /// Update the intermediate hash value for the next input `value`. /// @@ -40,31 +56,14 @@ public final class MurmurHash { /// - Parameter value: the value to add to the current hash /// - Returns: the updated intermediate hash value /// - public static func update2(_ hashIn: Int, _ value: Int) -> Int { - - let c1: Int32 = -862048943//0xCC9E2D51; - let c2: Int32 = 0x1B873593 - let r1: Int32 = 15 - let r2: Int32 = 13 - let m: Int32 = 5 - let n: Int32 = -430675100//0xE6546B64; - - var k: Int32 = Int32(truncatingIfNeeded: value) - k = k.multipliedReportingOverflow(by: c1).partialValue - // (k,_) = UInt32.multiplyWithOverflow(k, c1) ;//( k * c1); - //TODO: CHECKE >>> - k = (k << r1) | (k >>> (Int32(32) - r1)) //k = (k << r1) | (k >>> (32 - r1)); - //k = UInt32 (truncatingBitPattern:Int64(Int64(k) * Int64(c2)));//( k * c2); - //(k,_) = UInt32.multiplyWithOverflow(k, c2) - k = k.multipliedReportingOverflow(by: c2).partialValue - var hash = Int32(hashIn) + public static func update2(_ hashIn: UInt32, _ value: Int) -> UInt32 { + let k = calcK(UInt32(truncatingIfNeeded: value)) + var hash = hashIn hash = hash ^ k - hash = (hash << r2) | (hash >>> (Int32(32) - r2))//hash = (hash << r2) | (hash >>> (32 - r2)); - hash = hash.multipliedReportingOverflow(by: m).partialValue - hash = hash.addingReportingOverflow(n).partialValue - //hash = hash * m + n; + hash = (hash << r2) | (hash >> (32 - r2)) + hash = hash &* m &+ n // print("murmur update2 : \(hash)") - return Int(hash) + return hash } /// @@ -74,9 +73,8 @@ public final class MurmurHash { /// - Parameter value: the value to add to the current hash /// - Returns: the updated intermediate hash value /// - public static func update(_ hash: Int, _ value: T?) -> Int { + public static func update(_ hash: UInt32, _ value: T?) -> UInt32 { return update2(hash, value != nil ? value!.hashValue : 0) - // return update2(hash, value); } /// @@ -84,21 +82,24 @@ public final class MurmurHash { /// to form the final result of the MurmurHash 3 hash function. /// /// - Parameter hash: the intermediate hash value - /// - Parameter numberOfWords: the number of integer values added to the hash + /// - Parameter numberOfWords: the number of UInt32 values added to the hash /// - Returns: the final hash result /// - public static func finish(_ hashin: Int, _ numberOfWordsIn: Int) -> Int { - var hash = Int32(hashin) - let numberOfWords = Int32(numberOfWordsIn) - hash = hash ^ numberOfWords.multipliedReportingOverflow(by: 4).partialValue //(numberOfWords * UInt32(4)); - hash = hash ^ (hash >>> Int32(16)) //hash = hash ^ (hash >>> 16); - hash = hash.multipliedReportingOverflow(by: -2048144789).partialValue //hash * UInt32(0x85EBCA6B); - hash = hash ^ (hash >>> Int32(13))//hash = hash ^ (hash >>> 13); - //hash = UInt32(truncatingBitPattern: UInt64(hash) * UInt64(0xC2B2AE35)) ; - hash = hash.multipliedReportingOverflow(by: -1028477387).partialValue - hash = hash ^ (hash >>> Int32(16))// hash = hash ^ (hash >>> 16); + public static func finish(_ hashin: UInt32, _ numberOfWords: Int) -> Int { + return Int(finish(hashin, byteCount: (numberOfWords &* 4))) + } + + private static func finish(_ hashin: UInt32, byteCount byteCountInt: Int) -> UInt32 { + let byteCount = UInt32(truncatingIfNeeded: byteCountInt) + var hash = hashin + hash ^= byteCount + hash ^= (hash >> 16) + hash = hash &* 0x85EBCA6B + hash ^= (hash >> 13) + hash = hash &* 0xC2B2AE35 + hash ^= (hash >> 16) //print("murmur finish : \(hash)") - return Int(hash) + return hash } /// @@ -111,14 +112,55 @@ public final class MurmurHash { /// - Returns: the hash code of the data /// public static func hashCode(_ data: [T], _ seed: Int) -> Int { - var hash: Int = initialize(seed) - for value: T in data { - //var hashValue = value != nil ? value.hashValue : 0 - hash = update(hash, value.hashValue) + var hash = initialize(UInt32(truncatingIfNeeded: seed)) + for value in data { + hash = update(hash, value) } - hash = finish(hash, data.count) - return hash + return finish(hash, data.count) + } + + /// + /// Compute a hash for the given String and seed. The String is encoded + /// using UTF-8, then the bytes are interpreted as unsigned 32-bit + /// little-endian values, giving UInt32 values for the update call. + /// + /// If the bytes do not evenly divide by 4, the final bytes are treated + /// slightly differently (not doing the final rotate / multiply / add). + /// + /// This matches the treatment of byte sequences in publicly available + /// test patterns (see MurmurHashTests.swift) and the example code on + /// Wikipedia. + /// + public static func hashString(_ s: String, _ seed: UInt32) -> UInt32 { + let bytes = Array(s.utf8) + return hashBytesLittleEndian(bytes, seed) + } + + private static func hashBytesLittleEndian(_ bytes: [UInt8], _ seed: UInt32) -> UInt32 { + let byteCount = bytes.count + + var hash = seed + for i in stride(from: 0, to: byteCount - 3, by: 4) { + var word = UInt32(bytes[i]) + word |= UInt32(bytes[i + 1]) << 8 + word |= UInt32(bytes[i + 2]) << 16 + word |= UInt32(bytes[i + 3]) << 24 + + hash = update(hash, word) + } + let remaining = byteCount & 3 + if remaining != 0 { + var lastWord = UInt32(0) + for r in 0 ..< remaining { + lastWord |= UInt32(bytes[byteCount - 1 - r]) << (8 * (remaining - 1 - r)) + } + + let k = calcK(lastWord) + hash ^= k + } + + return finish(hash, byteCount: byteCount) } private init() { diff --git a/runtime/Swift/Sources/Antlr4/misc/Triple.swift b/runtime/Swift/Sources/Antlr4/misc/Triple.swift index 871a6a50d..a05366038 100644 --- a/runtime/Swift/Sources/Antlr4/misc/Triple.swift +++ b/runtime/Swift/Sources/Antlr4/misc/Triple.swift @@ -16,7 +16,7 @@ public class Triple: Hashable, CustomStringC self.c = c } public var hashValue: Int { - var hash: Int = MurmurHash.initialize() + var hash = MurmurHash.initialize() hash = MurmurHash.update(hash, a) hash = MurmurHash.update(hash, b) hash = MurmurHash.update(hash, c) diff --git a/runtime/Swift/Tests/Antlr4Tests/MurmurHashTests.swift b/runtime/Swift/Tests/Antlr4Tests/MurmurHashTests.swift new file mode 100644 index 000000000..dad5853de --- /dev/null +++ b/runtime/Swift/Tests/Antlr4Tests/MurmurHashTests.swift @@ -0,0 +1,47 @@ +/// Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. +/// Use of this file is governed by the BSD 3-clause license that +/// can be found in the LICENSE.txt file in the project root. + +/// The test patterns below are by Ian Boyd and have been released into the +/// public domain. +/// https://stackoverflow.com/questions/14747343/murmurhash3-test-vectors + +import XCTest +import Antlr4 + +class MurmurHashTests: XCTestCase { + + func testMurmurHash() { + doMurmurHashTest("", 0, 0) //empty string with zero seed should give zero + doMurmurHashTest("", 1, 0x514E28B7) + doMurmurHashTest("", 0xffffffff, 0x81F16F39) //make sure seed value is handled unsigned + doMurmurHashTest("\0\0\0\0", 0, 0x2362F9DE) //make sure we handle embedded nulls + + doMurmurHashTest("aaaa", 0x9747b28c, 0x5A97808A) //one full chunk + doMurmurHashTest("aaa", 0x9747b28c, 0x283E0130) //three characters + doMurmurHashTest("aa", 0x9747b28c, 0x5D211726) //two characters + doMurmurHashTest("a", 0x9747b28c, 0x7FA09EA6) //one character + + //Endian order within the chunks + doMurmurHashTest("abcd", 0x9747b28c, 0xF0478627) //one full chunk + doMurmurHashTest("abc", 0x9747b28c, 0xC84A62DD) + doMurmurHashTest("ab", 0x9747b28c, 0x74875592) + doMurmurHashTest("a", 0x9747b28c, 0x7FA09EA6) + + doMurmurHashTest("Hello, world!", 0x9747b28c, 0x24884CBA) + + //Make sure you handle UTF-8 high characters. A bcrypt implementation messed this up + doMurmurHashTest("ππππππππ", 0x9747b28c, 0xD58063C1) //U+03C0: Greek Small Letter Pi + + //String of 256 characters. + doMurmurHashTest(String(repeating: "a", count: 256), 0x9747b28c, 0x37405BDC) + + doMurmurHashTest("abc", 0, 0xB3DD93FA) + doMurmurHashTest("abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 0, 0xEE925B90) + doMurmurHashTest("The quick brown fox jumps over the lazy dog", 0x9747b28c, 0x2FA826CD) + } +} + +private func doMurmurHashTest(_ input: String, _ seed: UInt32, _ expected: UInt32) { + XCTAssertEqual(MurmurHash.hashString(input, seed), expected) +}