247 lines
3.5 KiB
ArmAsm
247 lines
3.5 KiB
ArmAsm
// +build go1.5,amd64
|
|
|
|
// SeedSum128(seed1, seed2 uint64, data []byte) (h1 uint64, h2 uint64)
|
|
TEXT ·SeedSum128(SB), $0-56
|
|
MOVQ seed1+0(FP), R12
|
|
MOVQ seed2+8(FP), R13
|
|
MOVQ data_base+16(FP), SI
|
|
MOVQ data_len+24(FP), R9
|
|
LEAQ h1+40(FP), BX
|
|
JMP sum128internal<>(SB)
|
|
|
|
// Sum128(data []byte) (h1 uint64, h2 uint64)
|
|
TEXT ·Sum128(SB), $0-40
|
|
XORQ R12, R12
|
|
XORQ R13, R13
|
|
MOVQ data_base+0(FP), SI
|
|
MOVQ data_len+8(FP), R9
|
|
LEAQ h1+24(FP), BX
|
|
JMP sum128internal<>(SB)
|
|
|
|
// SeedStringSum128(seed1, seed2 uint64, data string) (h1 uint64, h2 uint64)
|
|
TEXT ·SeedStringSum128(SB), $0-48
|
|
MOVQ seed1+0(FP), R12
|
|
MOVQ seed2+8(FP), R13
|
|
MOVQ data_base+16(FP), SI
|
|
MOVQ data_len+24(FP), R9
|
|
LEAQ h1+32(FP), BX
|
|
JMP sum128internal<>(SB)
|
|
|
|
// StringSum128(data string) (h1 uint64, h2 uint64)
|
|
TEXT ·StringSum128(SB), $0-32
|
|
XORQ R12, R12
|
|
XORQ R13, R13
|
|
MOVQ data_base+0(FP), SI
|
|
MOVQ data_len+8(FP), R9
|
|
LEAQ h1+16(FP), BX
|
|
JMP sum128internal<>(SB)
|
|
|
|
// Expects:
|
|
// R12 == h1 uint64 seed
|
|
// R13 == h2 uint64 seed
|
|
// SI == &data
|
|
// R9 == len(data)
|
|
// BX == &[2]uint64 return
|
|
TEXT sum128internal<>(SB), $0
|
|
MOVQ $0x87c37b91114253d5, R14 // c1
|
|
MOVQ $0x4cf5ad432745937f, R15 // c2
|
|
|
|
MOVQ R9, CX
|
|
ANDQ $-16, CX // cx == data_len - (data_len % 16)
|
|
|
|
// for r10 = 0; r10 < cx; r10 += 16 {...
|
|
XORQ R10, R10
|
|
|
|
loop:
|
|
CMPQ R10, CX
|
|
JE tail
|
|
MOVQ (SI)(R10*1), AX
|
|
MOVQ 8(SI)(R10*1), DX
|
|
ADDQ $16, R10
|
|
|
|
IMULQ R14, AX
|
|
IMULQ R15, DX
|
|
|
|
ROLQ $31, AX
|
|
ROLQ $33, DX
|
|
|
|
IMULQ R15, AX
|
|
IMULQ R14, DX
|
|
|
|
XORQ AX, R12
|
|
ROLQ $27, R12
|
|
ADDQ R13, R12
|
|
XORQ DX, R13
|
|
ROLQ $31, R13
|
|
LEAQ 0x52dce729(R12)(R12*4), R12
|
|
|
|
ADDQ R12, R13
|
|
LEAQ 0x38495ab5(R13)(R13*4), R13
|
|
|
|
JMP loop
|
|
|
|
tail:
|
|
MOVQ R9, CX
|
|
ANDQ $0xf, CX
|
|
JZ finalize // if len % 16 == 0
|
|
|
|
XORQ AX, AX
|
|
|
|
// poor man's binary tree jump table
|
|
SUBQ $8, CX
|
|
JZ tail8
|
|
JG over8
|
|
ADDQ $4, CX
|
|
JZ tail4
|
|
JG over4
|
|
ADDQ $2, CX
|
|
JL tail1
|
|
JZ tail2
|
|
JMP tail3
|
|
|
|
over4:
|
|
SUBQ $2, CX
|
|
JL tail5
|
|
JZ tail6
|
|
JMP tail7
|
|
|
|
over8:
|
|
SUBQ $4, CX
|
|
JZ tail12
|
|
JG over12
|
|
ADDQ $2, CX
|
|
JL tail9
|
|
JZ tail10
|
|
JMP tail11
|
|
|
|
over12:
|
|
SUBQ $2, CX
|
|
JL tail13
|
|
JZ tail14
|
|
|
|
tail15:
|
|
MOVBQZX 14(SI)(R10*1), AX
|
|
SALQ $16, AX
|
|
|
|
tail14:
|
|
MOVW 12(SI)(R10*1), AX
|
|
SALQ $32, AX
|
|
JMP tail12
|
|
|
|
tail13:
|
|
MOVBQZX 12(SI)(R10*1), AX
|
|
SALQ $32, AX
|
|
|
|
tail12:
|
|
MOVL 8(SI)(R10*1), DX
|
|
ORQ DX, AX
|
|
JMP fintailhigh
|
|
|
|
tail11:
|
|
MOVBQZX 10(SI)(R10*1), AX
|
|
SALQ $16, AX
|
|
|
|
tail10:
|
|
MOVW 8(SI)(R10*1), AX
|
|
JMP fintailhigh
|
|
|
|
tail9:
|
|
MOVB 8(SI)(R10*1), AL
|
|
|
|
fintailhigh:
|
|
IMULQ R15, AX
|
|
ROLQ $33, AX
|
|
IMULQ R14, AX
|
|
XORQ AX, R13
|
|
|
|
tail8:
|
|
MOVQ (SI)(R10*1), AX
|
|
JMP fintaillow
|
|
|
|
tail7:
|
|
MOVBQZX 6(SI)(R10*1), AX
|
|
SALQ $16, AX
|
|
|
|
tail6:
|
|
MOVW 4(SI)(R10*1), AX
|
|
SALQ $32, AX
|
|
JMP tail4
|
|
|
|
tail5:
|
|
MOVBQZX 4(SI)(R10*1), AX
|
|
SALQ $32, AX
|
|
|
|
tail4:
|
|
MOVL (SI)(R10*1), DX
|
|
ORQ DX, AX
|
|
JMP fintaillow
|
|
|
|
tail3:
|
|
MOVBQZX 2(SI)(R10*1), AX
|
|
SALQ $16, AX
|
|
|
|
tail2:
|
|
MOVW (SI)(R10*1), AX
|
|
JMP fintaillow
|
|
|
|
tail1:
|
|
MOVB (SI)(R10*1), AL
|
|
|
|
fintaillow:
|
|
IMULQ R14, AX
|
|
ROLQ $31, AX
|
|
IMULQ R15, AX
|
|
XORQ AX, R12
|
|
|
|
finalize:
|
|
XORQ R9, R12
|
|
XORQ R9, R13
|
|
|
|
ADDQ R13, R12
|
|
ADDQ R12, R13
|
|
|
|
// fmix128 (both interleaved)
|
|
MOVQ R12, DX
|
|
MOVQ R13, AX
|
|
|
|
SHRQ $33, DX
|
|
SHRQ $33, AX
|
|
|
|
XORQ DX, R12
|
|
XORQ AX, R13
|
|
|
|
MOVQ $0xff51afd7ed558ccd, CX
|
|
|
|
IMULQ CX, R12
|
|
IMULQ CX, R13
|
|
|
|
MOVQ R12, DX
|
|
MOVQ R13, AX
|
|
|
|
SHRQ $33, DX
|
|
SHRQ $33, AX
|
|
|
|
XORQ DX, R12
|
|
XORQ AX, R13
|
|
|
|
MOVQ $0xc4ceb9fe1a85ec53, CX
|
|
|
|
IMULQ CX, R12
|
|
IMULQ CX, R13
|
|
|
|
MOVQ R12, DX
|
|
MOVQ R13, AX
|
|
|
|
SHRQ $33, DX
|
|
SHRQ $33, AX
|
|
|
|
XORQ DX, R12
|
|
XORQ AX, R13
|
|
|
|
ADDQ R13, R12
|
|
ADDQ R12, R13
|
|
|
|
MOVQ R12, (BX)
|
|
MOVQ R13, 8(BX)
|
|
RET
|