// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.

// +build !appengine
// +build !noasm
// +build gc

#include "textflag.h"

// func encodeBlockAsm(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm(SB), $65560-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000200, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeBlockAsm:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeBlockAsm
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeBlockAsm:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x06, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeBlockAsm
	MOVL  BP, 20(SP)
	MOVQ  $0x0000cf1bbcdcbf9b, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x10, R9
	IMULQ R8, R9
	SHRQ  $0x32, R9
	SHLQ  $0x10, R10
	IMULQ R8, R10
	SHRQ  $0x32, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x10, R9
	IMULQ R8, R9
	SHRQ  $0x32, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeBlockAsm
	LEAL  1(CX), SI
	MOVL  12(SP), DI
	MOVL  SI, BP
	SUBL  16(SP), BP
	JZ    repeat_extend_back_end_encodeBlockAsm

repeat_extend_back_loop_encodeBlockAsm:
	CMPL SI, DI
	JLE  repeat_extend_back_end_encodeBlockAsm
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeBlockAsm
	LEAL -1(SI), SI
	DECL BP
	JNZ  repeat_extend_back_loop_encodeBlockAsm

repeat_extend_back_end_encodeBlockAsm:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R9
	SUBL BP, R8
	LEAL -1(R8), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeBlockAsm
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeBlockAsm
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeBlockAsm
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeBlockAsm
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm

four_bytes_repeat_emit_encodeBlockAsm:
	MOVL BP, R10
	SHRL $0x10, R10
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R10, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm

three_bytes_repeat_emit_encodeBlockAsm:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm

two_bytes_repeat_emit_encodeBlockAsm:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeBlockAsm
	JMP  memmove_long_repeat_emit_encodeBlockAsm

one_byte_repeat_emit_encodeBlockAsm:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeBlockAsm:
	LEAQ (AX)(R8*1), BP
	CMPQ R8, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_1or2:
	MOVB (R9), R10
	MOVB -1(R9)(R8*1), R9
	MOVB R10, (AX)
	MOVB R9, -1(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm

emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_3:
	MOVW (R9), R10
	MOVB 2(R9), R9
	MOVW R10, (AX)
	MOVB R9, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm

emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_4through7:
	MOVL (R9), R10
	MOVL -4(R9)(R8*1), R9
	MOVL R10, (AX)
	MOVL R9, -4(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm

emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
	MOVQ (R9), R10
	MOVQ -8(R9)(R8*1), R9
	MOVQ R10, (AX)
	MOVQ R9, -8(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm

emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
	MOVOU (R9), X0
	MOVOU -16(R9)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm

emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_repeat_emit_encodeBlockAsm:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeBlockAsm

memmove_long_repeat_emit_encodeBlockAsm:
	LEAQ  (AX)(R8*1), BP
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVQ  R8, R11
	SHRQ  $0x07, R11
	MOVQ  AX, R10
	ANDL  $0x0000001f, R10
	MOVQ  $0x00000040, R12
	SUBQ  R10, R12
	DECQ  R11
	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
	LEAQ  -32(R9)(R12*1), R10
	LEAQ  -32(AX)(R12*1), R13

emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
	MOVOU (R10), X4
	MOVOU 16(R10), X5
	MOVOU 32(R10), X6
	MOVOU 48(R10), X7
	MOVOU 64(R10), X8
	MOVOU 80(R10), X9
	MOVOU 96(R10), X10
	MOVOU 112(R10), X11
	MOVOA X4, (R13)
	MOVOA X5, 16(R13)
	MOVOA X6, 32(R13)
	MOVOA X7, 48(R13)
	MOVOA X8, 64(R13)
	MOVOA X9, 80(R13)
	MOVOA X10, 96(R13)
	MOVOA X11, 112(R13)
	ADDQ  $0x80, R13
	ADDQ  $0x80, R10
	ADDQ  $0x80, R12
	DECQ  R11
	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
	MOVOU -32(R9)(R12*1), X4
	MOVOU -16(R9)(R12*1), X5
	MOVOA X4, -32(AX)(R12*1)
	MOVOA X5, -16(AX)(R12*1)
	ADDQ  $0x20, R12
	CMPQ  R8, R12
	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeBlockAsm:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), R8
	SUBL CX, R8
	LEAQ (DX)(CX*1), R9
	LEAQ (DX)(BP*1), BP
	XORL R11, R11
	CMPL R8, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R9)(R11*1), R10
	XORQ  (BP)(R11*1), R10
	TESTQ R10, R10
	JZ    matchlen_loop_repeat_extend
	BSFQ  R10, R10
	SARQ  $0x03, R10
	LEAL  (R11)(R10*1), R11
	JMP   repeat_extend_forward_end_encodeBlockAsm

matchlen_loop_repeat_extend:
	LEAL -8(R8), R8
	LEAL 8(R11), R11
	CMPL R8, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL R8, R8
	JZ    repeat_extend_forward_end_encodeBlockAsm

matchlen_single_loopback_repeat_extend:
	MOVB (R9)(R11*1), R10
	CMPB (BP)(R11*1), R10
	JNE  repeat_extend_forward_end_encodeBlockAsm
	LEAL 1(R11), R11
	DECL R8
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeBlockAsm:
	ADDL  R11, CX
	MOVL  CX, BP
	SUBL  SI, BP
	MOVL  16(SP), SI
	TESTL DI, DI
	JZ    repeat_as_copy_encodeBlockAsm

emit_repeat_again_match_repeat_encodeBlockAsm:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_match_repeat_encodeBlockAsm
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_match_repeat_encodeBlockAsm

cant_repeat_two_offset_match_repeat_encodeBlockAsm:
	CMPL BP, $0x00000104
	JLT  repeat_three_match_repeat_encodeBlockAsm
	CMPL BP, $0x00010100
	JLT  repeat_four_match_repeat_encodeBlockAsm
	CMPL BP, $0x0100ffff
	JLT  repeat_five_match_repeat_encodeBlockAsm
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_repeat_encodeBlockAsm

repeat_five_match_repeat_encodeBlockAsm:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_four_match_repeat_encodeBlockAsm:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_three_match_repeat_encodeBlockAsm:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_two_match_repeat_encodeBlockAsm:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_two_offset_match_repeat_encodeBlockAsm:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_as_copy_encodeBlockAsm:
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeBlockAsm

four_bytes_loop_back_repeat_as_copy_encodeBlockAsm:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeBlockAsm
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeBlockAsm

emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy

repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm
	JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm

four_bytes_remain_repeat_as_copy_encodeBlockAsm:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeBlockAsm
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeBlockAsm

two_byte_offset_repeat_as_copy_encodeBlockAsm:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX

emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short

repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm

repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm
	JMP two_byte_offset_repeat_as_copy_encodeBlockAsm

two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm

emit_copy_three_repeat_as_copy_encodeBlockAsm:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeBlockAsm:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm
	JMP  search_loop_encodeBlockAsm

no_repeat_found_encodeBlockAsm:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeBlockAsm
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeBlockAsm
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeBlockAsm
	MOVL 20(SP), CX
	JMP  search_loop_encodeBlockAsm

candidate3_match_encodeBlockAsm:
	ADDL $0x02, CX
	JMP  candidate_match_encodeBlockAsm

candidate2_match_encodeBlockAsm:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeBlockAsm:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeBlockAsm

match_extend_back_loop_encodeBlockAsm:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeBlockAsm
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeBlockAsm
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeBlockAsm
	JMP  match_extend_back_loop_encodeBlockAsm

match_extend_back_end_encodeBlockAsm:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeBlockAsm
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeBlockAsm:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeBlockAsm
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeBlockAsm
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeBlockAsm
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeBlockAsm
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeBlockAsm
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeBlockAsm

four_bytes_match_emit_encodeBlockAsm:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeBlockAsm

three_bytes_match_emit_encodeBlockAsm:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeBlockAsm

two_bytes_match_emit_encodeBlockAsm:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeBlockAsm
	JMP  memmove_long_match_emit_encodeBlockAsm

one_byte_match_emit_encodeBlockAsm:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeBlockAsm:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64

emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm

emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm

emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm

emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm

emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeBlockAsm

emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeBlockAsm:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeBlockAsm

memmove_long_match_emit_encodeBlockAsm:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeBlockAsm:
match_nolit_loop_encodeBlockAsm:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeBlockAsm

matchlen_loopback_match_nolit_encodeBlockAsm:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeBlockAsm
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeBlockAsm

matchlen_loop_match_nolit_encodeBlockAsm:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeBlockAsm

matchlen_single_match_nolit_encodeBlockAsm:
	TESTL SI, SI
	JZ    match_nolit_end_encodeBlockAsm

matchlen_single_loopback_match_nolit_encodeBlockAsm:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeBlockAsm
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm

match_nolit_end_encodeBlockAsm:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeBlockAsm

four_bytes_loop_back_match_nolit_encodeBlockAsm:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeBlockAsm
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeBlockAsm

emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm_emit_copy
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy

cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm_emit_copy
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm_emit_copy
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm_emit_copy
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy

repeat_five_match_nolit_encodeBlockAsm_emit_copy:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_four_match_nolit_encodeBlockAsm_emit_copy:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_three_match_nolit_encodeBlockAsm_emit_copy:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_two_match_nolit_encodeBlockAsm_emit_copy:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm
	JMP four_bytes_loop_back_match_nolit_encodeBlockAsm

four_bytes_remain_match_nolit_encodeBlockAsm:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeBlockAsm
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeBlockAsm

two_byte_offset_match_nolit_encodeBlockAsm:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeBlockAsm
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX

emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short

cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short

repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm
	JMP two_byte_offset_match_nolit_encodeBlockAsm

two_byte_offset_short_match_nolit_encodeBlockAsm:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeBlockAsm
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeBlockAsm
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm

emit_copy_three_match_nolit_encodeBlockAsm:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeBlockAsm:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeBlockAsm
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeBlockAsm:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x0000cf1bbcdcbf9b, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x10, DI
	IMULQ BP, DI
	SHRQ  $0x32, DI
	SHLQ  $0x10, R8
	IMULQ BP, R8
	SHRQ  $0x32, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeBlockAsm
	INCL  CX
	JMP   search_loop_encodeBlockAsm

emit_remainder_encodeBlockAsm:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeBlockAsm
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeBlockAsm:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeBlockAsm
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeBlockAsm
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeBlockAsm
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeBlockAsm
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm

four_bytes_emit_remainder_encodeBlockAsm:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm

three_bytes_emit_remainder_encodeBlockAsm:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm

two_bytes_emit_remainder_encodeBlockAsm:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeBlockAsm
	JMP  memmove_long_emit_remainder_encodeBlockAsm

one_byte_emit_remainder_encodeBlockAsm:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeBlockAsm:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm

emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm

emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm

emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm

emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm

emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeBlockAsm:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeBlockAsm

memmove_long_emit_remainder_encodeBlockAsm:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeBlockAsm:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func encodeBlockAsm12B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm12B(SB), $16408-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000080, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeBlockAsm12B:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeBlockAsm12B
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeBlockAsm12B:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x05, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeBlockAsm12B
	MOVL  BP, 20(SP)
	MOVQ  $0x000000cf1bbcdcbb, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x18, R9
	IMULQ R8, R9
	SHRQ  $0x34, R9
	SHLQ  $0x18, R10
	IMULQ R8, R10
	SHRQ  $0x34, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x18, R9
	IMULQ R8, R9
	SHRQ  $0x34, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeBlockAsm12B
	LEAL  1(CX), SI
	MOVL  12(SP), DI
	MOVL  SI, BP
	SUBL  16(SP), BP
	JZ    repeat_extend_back_end_encodeBlockAsm12B

repeat_extend_back_loop_encodeBlockAsm12B:
	CMPL SI, DI
	JLE  repeat_extend_back_end_encodeBlockAsm12B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeBlockAsm12B
	LEAL -1(SI), SI
	DECL BP
	JNZ  repeat_extend_back_loop_encodeBlockAsm12B

repeat_extend_back_end_encodeBlockAsm12B:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm12B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R9
	SUBL BP, R8
	LEAL -1(R8), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeBlockAsm12B
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeBlockAsm12B
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeBlockAsm12B
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeBlockAsm12B
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm12B

four_bytes_repeat_emit_encodeBlockAsm12B:
	MOVL BP, R10
	SHRL $0x10, R10
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R10, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm12B

three_bytes_repeat_emit_encodeBlockAsm12B:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm12B

two_bytes_repeat_emit_encodeBlockAsm12B:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeBlockAsm12B
	JMP  memmove_long_repeat_emit_encodeBlockAsm12B

one_byte_repeat_emit_encodeBlockAsm12B:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeBlockAsm12B:
	LEAQ (AX)(R8*1), BP
	CMPQ R8, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_1or2:
	MOVB (R9), R10
	MOVB -1(R9)(R8*1), R9
	MOVB R10, (AX)
	MOVB R9, -1(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B

emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_3:
	MOVW (R9), R10
	MOVB 2(R9), R9
	MOVW R10, (AX)
	MOVB R9, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B

emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_4through7:
	MOVL (R9), R10
	MOVL -4(R9)(R8*1), R9
	MOVL R10, (AX)
	MOVL R9, -4(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B

emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
	MOVQ (R9), R10
	MOVQ -8(R9)(R8*1), R9
	MOVQ R10, (AX)
	MOVQ R9, -8(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B

emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
	MOVOU (R9), X0
	MOVOU -16(R9)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm12B

emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_repeat_emit_encodeBlockAsm12B:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeBlockAsm12B

memmove_long_repeat_emit_encodeBlockAsm12B:
	LEAQ  (AX)(R8*1), BP
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVQ  R8, R11
	SHRQ  $0x07, R11
	MOVQ  AX, R10
	ANDL  $0x0000001f, R10
	MOVQ  $0x00000040, R12
	SUBQ  R10, R12
	DECQ  R11
	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
	LEAQ  -32(R9)(R12*1), R10
	LEAQ  -32(AX)(R12*1), R13

emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
	MOVOU (R10), X4
	MOVOU 16(R10), X5
	MOVOU 32(R10), X6
	MOVOU 48(R10), X7
	MOVOU 64(R10), X8
	MOVOU 80(R10), X9
	MOVOU 96(R10), X10
	MOVOU 112(R10), X11
	MOVOA X4, (R13)
	MOVOA X5, 16(R13)
	MOVOA X6, 32(R13)
	MOVOA X7, 48(R13)
	MOVOA X8, 64(R13)
	MOVOA X9, 80(R13)
	MOVOA X10, 96(R13)
	MOVOA X11, 112(R13)
	ADDQ  $0x80, R13
	ADDQ  $0x80, R10
	ADDQ  $0x80, R12
	DECQ  R11
	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
	MOVOU -32(R9)(R12*1), X4
	MOVOU -16(R9)(R12*1), X5
	MOVOA X4, -32(AX)(R12*1)
	MOVOA X5, -16(AX)(R12*1)
	ADDQ  $0x20, R12
	CMPQ  R8, R12
	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeBlockAsm12B:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), R8
	SUBL CX, R8
	LEAQ (DX)(CX*1), R9
	LEAQ (DX)(BP*1), BP
	XORL R11, R11
	CMPL R8, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R9)(R11*1), R10
	XORQ  (BP)(R11*1), R10
	TESTQ R10, R10
	JZ    matchlen_loop_repeat_extend
	BSFQ  R10, R10
	SARQ  $0x03, R10
	LEAL  (R11)(R10*1), R11
	JMP   repeat_extend_forward_end_encodeBlockAsm12B

matchlen_loop_repeat_extend:
	LEAL -8(R8), R8
	LEAL 8(R11), R11
	CMPL R8, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL R8, R8
	JZ    repeat_extend_forward_end_encodeBlockAsm12B

matchlen_single_loopback_repeat_extend:
	MOVB (R9)(R11*1), R10
	CMPB (BP)(R11*1), R10
	JNE  repeat_extend_forward_end_encodeBlockAsm12B
	LEAL 1(R11), R11
	DECL R8
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeBlockAsm12B:
	ADDL  R11, CX
	MOVL  CX, BP
	SUBL  SI, BP
	MOVL  16(SP), SI
	TESTL DI, DI
	JZ    repeat_as_copy_encodeBlockAsm12B

emit_repeat_again_match_repeat_encodeBlockAsm12B:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_match_repeat_encodeBlockAsm12B
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_match_repeat_encodeBlockAsm12B

cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
	CMPL BP, $0x00000104
	JLT  repeat_three_match_repeat_encodeBlockAsm12B
	CMPL BP, $0x00010100
	JLT  repeat_four_match_repeat_encodeBlockAsm12B
	CMPL BP, $0x0100ffff
	JLT  repeat_five_match_repeat_encodeBlockAsm12B
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_repeat_encodeBlockAsm12B

repeat_five_match_repeat_encodeBlockAsm12B:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_four_match_repeat_encodeBlockAsm12B:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_three_match_repeat_encodeBlockAsm12B:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_two_match_repeat_encodeBlockAsm12B:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_two_offset_match_repeat_encodeBlockAsm12B:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_as_copy_encodeBlockAsm12B:
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeBlockAsm12B

four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeBlockAsm12B
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeBlockAsm12B

emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy

repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm12B
	JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B

four_bytes_remain_repeat_as_copy_encodeBlockAsm12B:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeBlockAsm12B
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeBlockAsm12B

two_byte_offset_repeat_as_copy_encodeBlockAsm12B:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX

emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short

repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm12B
	JMP two_byte_offset_repeat_as_copy_encodeBlockAsm12B

two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm12B

emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeBlockAsm12B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm12B
	JMP  search_loop_encodeBlockAsm12B

no_repeat_found_encodeBlockAsm12B:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeBlockAsm12B
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeBlockAsm12B
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeBlockAsm12B
	MOVL 20(SP), CX
	JMP  search_loop_encodeBlockAsm12B

candidate3_match_encodeBlockAsm12B:
	ADDL $0x02, CX
	JMP  candidate_match_encodeBlockAsm12B

candidate2_match_encodeBlockAsm12B:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeBlockAsm12B:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeBlockAsm12B

match_extend_back_loop_encodeBlockAsm12B:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeBlockAsm12B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeBlockAsm12B
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeBlockAsm12B
	JMP  match_extend_back_loop_encodeBlockAsm12B

match_extend_back_end_encodeBlockAsm12B:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeBlockAsm12B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeBlockAsm12B:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeBlockAsm12B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeBlockAsm12B
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeBlockAsm12B
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeBlockAsm12B
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeBlockAsm12B
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeBlockAsm12B

four_bytes_match_emit_encodeBlockAsm12B:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeBlockAsm12B

three_bytes_match_emit_encodeBlockAsm12B:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeBlockAsm12B

two_bytes_match_emit_encodeBlockAsm12B:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeBlockAsm12B
	JMP  memmove_long_match_emit_encodeBlockAsm12B

one_byte_match_emit_encodeBlockAsm12B:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeBlockAsm12B:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64

emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B

emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B

emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B

emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B

emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeBlockAsm12B

emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeBlockAsm12B:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeBlockAsm12B

memmove_long_match_emit_encodeBlockAsm12B:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeBlockAsm12B:
match_nolit_loop_encodeBlockAsm12B:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeBlockAsm12B

matchlen_loopback_match_nolit_encodeBlockAsm12B:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeBlockAsm12B
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeBlockAsm12B

matchlen_loop_match_nolit_encodeBlockAsm12B:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeBlockAsm12B

matchlen_single_match_nolit_encodeBlockAsm12B:
	TESTL SI, SI
	JZ    match_nolit_end_encodeBlockAsm12B

matchlen_single_loopback_match_nolit_encodeBlockAsm12B:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeBlockAsm12B
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm12B

match_nolit_end_encodeBlockAsm12B:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeBlockAsm12B

four_bytes_loop_back_match_nolit_encodeBlockAsm12B:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeBlockAsm12B
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeBlockAsm12B

emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy

cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm12B_emit_copy
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm12B_emit_copy
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm12B_emit_copy
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy

repeat_five_match_nolit_encodeBlockAsm12B_emit_copy:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_four_match_nolit_encodeBlockAsm12B_emit_copy:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_three_match_nolit_encodeBlockAsm12B_emit_copy:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_two_match_nolit_encodeBlockAsm12B_emit_copy:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
	JMP four_bytes_loop_back_match_nolit_encodeBlockAsm12B

four_bytes_remain_match_nolit_encodeBlockAsm12B:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeBlockAsm12B
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeBlockAsm12B

two_byte_offset_match_nolit_encodeBlockAsm12B:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeBlockAsm12B
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX

emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short

cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short

repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
	JMP two_byte_offset_match_nolit_encodeBlockAsm12B

two_byte_offset_short_match_nolit_encodeBlockAsm12B:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeBlockAsm12B
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeBlockAsm12B
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B

emit_copy_three_match_nolit_encodeBlockAsm12B:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeBlockAsm12B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm12B
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeBlockAsm12B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeBlockAsm12B:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x000000cf1bbcdcbb, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x18, DI
	IMULQ BP, DI
	SHRQ  $0x34, DI
	SHLQ  $0x18, R8
	IMULQ BP, R8
	SHRQ  $0x34, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeBlockAsm12B
	INCL  CX
	JMP   search_loop_encodeBlockAsm12B

emit_remainder_encodeBlockAsm12B:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeBlockAsm12B
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeBlockAsm12B:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm12B
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeBlockAsm12B
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeBlockAsm12B
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeBlockAsm12B
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeBlockAsm12B
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm12B

four_bytes_emit_remainder_encodeBlockAsm12B:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm12B

three_bytes_emit_remainder_encodeBlockAsm12B:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm12B

two_bytes_emit_remainder_encodeBlockAsm12B:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeBlockAsm12B
	JMP  memmove_long_emit_remainder_encodeBlockAsm12B

one_byte_emit_remainder_encodeBlockAsm12B:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeBlockAsm12B:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B

emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B

emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B

emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B

emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm12B

emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeBlockAsm12B:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeBlockAsm12B

memmove_long_emit_remainder_encodeBlockAsm12B:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeBlockAsm12B:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func encodeBlockAsm10B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm10B(SB), $4120-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000020, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeBlockAsm10B:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeBlockAsm10B
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeBlockAsm10B:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x05, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeBlockAsm10B
	MOVL  BP, 20(SP)
	MOVQ  $0x9e3779b1, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x36, R9
	SHLQ  $0x20, R10
	IMULQ R8, R10
	SHRQ  $0x36, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x36, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeBlockAsm10B
	LEAL  1(CX), SI
	MOVL  12(SP), DI
	MOVL  SI, BP
	SUBL  16(SP), BP
	JZ    repeat_extend_back_end_encodeBlockAsm10B

repeat_extend_back_loop_encodeBlockAsm10B:
	CMPL SI, DI
	JLE  repeat_extend_back_end_encodeBlockAsm10B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeBlockAsm10B
	LEAL -1(SI), SI
	DECL BP
	JNZ  repeat_extend_back_loop_encodeBlockAsm10B

repeat_extend_back_end_encodeBlockAsm10B:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm10B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R9
	SUBL BP, R8
	LEAL -1(R8), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeBlockAsm10B
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeBlockAsm10B
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeBlockAsm10B
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeBlockAsm10B
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm10B

four_bytes_repeat_emit_encodeBlockAsm10B:
	MOVL BP, R10
	SHRL $0x10, R10
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R10, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm10B

three_bytes_repeat_emit_encodeBlockAsm10B:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm10B

two_bytes_repeat_emit_encodeBlockAsm10B:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeBlockAsm10B
	JMP  memmove_long_repeat_emit_encodeBlockAsm10B

one_byte_repeat_emit_encodeBlockAsm10B:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeBlockAsm10B:
	LEAQ (AX)(R8*1), BP
	CMPQ R8, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_1or2:
	MOVB (R9), R10
	MOVB -1(R9)(R8*1), R9
	MOVB R10, (AX)
	MOVB R9, -1(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B

emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_3:
	MOVW (R9), R10
	MOVB 2(R9), R9
	MOVW R10, (AX)
	MOVB R9, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B

emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_4through7:
	MOVL (R9), R10
	MOVL -4(R9)(R8*1), R9
	MOVL R10, (AX)
	MOVL R9, -4(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B

emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
	MOVQ (R9), R10
	MOVQ -8(R9)(R8*1), R9
	MOVQ R10, (AX)
	MOVQ R9, -8(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B

emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
	MOVOU (R9), X0
	MOVOU -16(R9)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm10B

emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_repeat_emit_encodeBlockAsm10B:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeBlockAsm10B

memmove_long_repeat_emit_encodeBlockAsm10B:
	LEAQ  (AX)(R8*1), BP
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVQ  R8, R11
	SHRQ  $0x07, R11
	MOVQ  AX, R10
	ANDL  $0x0000001f, R10
	MOVQ  $0x00000040, R12
	SUBQ  R10, R12
	DECQ  R11
	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
	LEAQ  -32(R9)(R12*1), R10
	LEAQ  -32(AX)(R12*1), R13

emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
	MOVOU (R10), X4
	MOVOU 16(R10), X5
	MOVOU 32(R10), X6
	MOVOU 48(R10), X7
	MOVOU 64(R10), X8
	MOVOU 80(R10), X9
	MOVOU 96(R10), X10
	MOVOU 112(R10), X11
	MOVOA X4, (R13)
	MOVOA X5, 16(R13)
	MOVOA X6, 32(R13)
	MOVOA X7, 48(R13)
	MOVOA X8, 64(R13)
	MOVOA X9, 80(R13)
	MOVOA X10, 96(R13)
	MOVOA X11, 112(R13)
	ADDQ  $0x80, R13
	ADDQ  $0x80, R10
	ADDQ  $0x80, R12
	DECQ  R11
	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
	MOVOU -32(R9)(R12*1), X4
	MOVOU -16(R9)(R12*1), X5
	MOVOA X4, -32(AX)(R12*1)
	MOVOA X5, -16(AX)(R12*1)
	ADDQ  $0x20, R12
	CMPQ  R8, R12
	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeBlockAsm10B:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), R8
	SUBL CX, R8
	LEAQ (DX)(CX*1), R9
	LEAQ (DX)(BP*1), BP
	XORL R11, R11
	CMPL R8, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R9)(R11*1), R10
	XORQ  (BP)(R11*1), R10
	TESTQ R10, R10
	JZ    matchlen_loop_repeat_extend
	BSFQ  R10, R10
	SARQ  $0x03, R10
	LEAL  (R11)(R10*1), R11
	JMP   repeat_extend_forward_end_encodeBlockAsm10B

matchlen_loop_repeat_extend:
	LEAL -8(R8), R8
	LEAL 8(R11), R11
	CMPL R8, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL R8, R8
	JZ    repeat_extend_forward_end_encodeBlockAsm10B

matchlen_single_loopback_repeat_extend:
	MOVB (R9)(R11*1), R10
	CMPB (BP)(R11*1), R10
	JNE  repeat_extend_forward_end_encodeBlockAsm10B
	LEAL 1(R11), R11
	DECL R8
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeBlockAsm10B:
	ADDL  R11, CX
	MOVL  CX, BP
	SUBL  SI, BP
	MOVL  16(SP), SI
	TESTL DI, DI
	JZ    repeat_as_copy_encodeBlockAsm10B

emit_repeat_again_match_repeat_encodeBlockAsm10B:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_match_repeat_encodeBlockAsm10B
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_match_repeat_encodeBlockAsm10B

cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
	CMPL BP, $0x00000104
	JLT  repeat_three_match_repeat_encodeBlockAsm10B
	CMPL BP, $0x00010100
	JLT  repeat_four_match_repeat_encodeBlockAsm10B
	CMPL BP, $0x0100ffff
	JLT  repeat_five_match_repeat_encodeBlockAsm10B
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_repeat_encodeBlockAsm10B

repeat_five_match_repeat_encodeBlockAsm10B:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_four_match_repeat_encodeBlockAsm10B:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_three_match_repeat_encodeBlockAsm10B:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_two_match_repeat_encodeBlockAsm10B:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_two_offset_match_repeat_encodeBlockAsm10B:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_as_copy_encodeBlockAsm10B:
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeBlockAsm10B

four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeBlockAsm10B
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeBlockAsm10B

emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy

repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm10B
	JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B

four_bytes_remain_repeat_as_copy_encodeBlockAsm10B:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeBlockAsm10B
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeBlockAsm10B

two_byte_offset_repeat_as_copy_encodeBlockAsm10B:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX

emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short

repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm10B
	JMP two_byte_offset_repeat_as_copy_encodeBlockAsm10B

two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm10B

emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeBlockAsm10B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm10B
	JMP  search_loop_encodeBlockAsm10B

no_repeat_found_encodeBlockAsm10B:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeBlockAsm10B
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeBlockAsm10B
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeBlockAsm10B
	MOVL 20(SP), CX
	JMP  search_loop_encodeBlockAsm10B

candidate3_match_encodeBlockAsm10B:
	ADDL $0x02, CX
	JMP  candidate_match_encodeBlockAsm10B

candidate2_match_encodeBlockAsm10B:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeBlockAsm10B:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeBlockAsm10B

match_extend_back_loop_encodeBlockAsm10B:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeBlockAsm10B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeBlockAsm10B
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeBlockAsm10B
	JMP  match_extend_back_loop_encodeBlockAsm10B

match_extend_back_end_encodeBlockAsm10B:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeBlockAsm10B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeBlockAsm10B:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeBlockAsm10B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeBlockAsm10B
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeBlockAsm10B
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeBlockAsm10B
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeBlockAsm10B
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeBlockAsm10B

four_bytes_match_emit_encodeBlockAsm10B:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeBlockAsm10B

three_bytes_match_emit_encodeBlockAsm10B:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeBlockAsm10B

two_bytes_match_emit_encodeBlockAsm10B:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeBlockAsm10B
	JMP  memmove_long_match_emit_encodeBlockAsm10B

one_byte_match_emit_encodeBlockAsm10B:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeBlockAsm10B:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64

emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B

emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B

emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B

emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B

emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeBlockAsm10B

emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeBlockAsm10B:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeBlockAsm10B

memmove_long_match_emit_encodeBlockAsm10B:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeBlockAsm10B:
match_nolit_loop_encodeBlockAsm10B:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeBlockAsm10B

matchlen_loopback_match_nolit_encodeBlockAsm10B:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeBlockAsm10B
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeBlockAsm10B

matchlen_loop_match_nolit_encodeBlockAsm10B:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeBlockAsm10B

matchlen_single_match_nolit_encodeBlockAsm10B:
	TESTL SI, SI
	JZ    match_nolit_end_encodeBlockAsm10B

matchlen_single_loopback_match_nolit_encodeBlockAsm10B:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeBlockAsm10B
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm10B

match_nolit_end_encodeBlockAsm10B:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeBlockAsm10B

four_bytes_loop_back_match_nolit_encodeBlockAsm10B:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeBlockAsm10B
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeBlockAsm10B

emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy

cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm10B_emit_copy
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm10B_emit_copy
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm10B_emit_copy
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy

repeat_five_match_nolit_encodeBlockAsm10B_emit_copy:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_four_match_nolit_encodeBlockAsm10B_emit_copy:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_three_match_nolit_encodeBlockAsm10B_emit_copy:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_two_match_nolit_encodeBlockAsm10B_emit_copy:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
	JMP four_bytes_loop_back_match_nolit_encodeBlockAsm10B

four_bytes_remain_match_nolit_encodeBlockAsm10B:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeBlockAsm10B
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeBlockAsm10B

two_byte_offset_match_nolit_encodeBlockAsm10B:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeBlockAsm10B
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX

emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short

cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short

repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
	JMP two_byte_offset_match_nolit_encodeBlockAsm10B

two_byte_offset_short_match_nolit_encodeBlockAsm10B:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeBlockAsm10B
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeBlockAsm10B
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B

emit_copy_three_match_nolit_encodeBlockAsm10B:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeBlockAsm10B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm10B
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeBlockAsm10B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeBlockAsm10B:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x9e3779b1, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x20, DI
	IMULQ BP, DI
	SHRQ  $0x36, DI
	SHLQ  $0x20, R8
	IMULQ BP, R8
	SHRQ  $0x36, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeBlockAsm10B
	INCL  CX
	JMP   search_loop_encodeBlockAsm10B

emit_remainder_encodeBlockAsm10B:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeBlockAsm10B
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeBlockAsm10B:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm10B
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeBlockAsm10B
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeBlockAsm10B
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeBlockAsm10B
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeBlockAsm10B
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm10B

four_bytes_emit_remainder_encodeBlockAsm10B:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm10B

three_bytes_emit_remainder_encodeBlockAsm10B:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm10B

two_bytes_emit_remainder_encodeBlockAsm10B:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeBlockAsm10B
	JMP  memmove_long_emit_remainder_encodeBlockAsm10B

one_byte_emit_remainder_encodeBlockAsm10B:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeBlockAsm10B:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B

emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B

emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B

emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B

emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm10B

emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeBlockAsm10B:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeBlockAsm10B

memmove_long_emit_remainder_encodeBlockAsm10B:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeBlockAsm10B:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func encodeBlockAsm8B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeBlockAsm8B(SB), $1048-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000008, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeBlockAsm8B:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeBlockAsm8B
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeBlockAsm8B:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x04, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeBlockAsm8B
	MOVL  BP, 20(SP)
	MOVQ  $0x9e3779b1, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x38, R9
	SHLQ  $0x20, R10
	IMULQ R8, R10
	SHRQ  $0x38, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x38, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeBlockAsm8B
	LEAL  1(CX), SI
	MOVL  12(SP), DI
	MOVL  SI, BP
	SUBL  16(SP), BP
	JZ    repeat_extend_back_end_encodeBlockAsm8B

repeat_extend_back_loop_encodeBlockAsm8B:
	CMPL SI, DI
	JLE  repeat_extend_back_end_encodeBlockAsm8B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeBlockAsm8B
	LEAL -1(SI), SI
	DECL BP
	JNZ  repeat_extend_back_loop_encodeBlockAsm8B

repeat_extend_back_end_encodeBlockAsm8B:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm8B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R9
	SUBL BP, R8
	LEAL -1(R8), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeBlockAsm8B
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeBlockAsm8B
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeBlockAsm8B
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeBlockAsm8B
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm8B

four_bytes_repeat_emit_encodeBlockAsm8B:
	MOVL BP, R10
	SHRL $0x10, R10
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R10, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm8B

three_bytes_repeat_emit_encodeBlockAsm8B:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeBlockAsm8B

two_bytes_repeat_emit_encodeBlockAsm8B:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeBlockAsm8B
	JMP  memmove_long_repeat_emit_encodeBlockAsm8B

one_byte_repeat_emit_encodeBlockAsm8B:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeBlockAsm8B:
	LEAQ (AX)(R8*1), BP
	CMPQ R8, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_1or2:
	MOVB (R9), R10
	MOVB -1(R9)(R8*1), R9
	MOVB R10, (AX)
	MOVB R9, -1(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B

emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_3:
	MOVW (R9), R10
	MOVB 2(R9), R9
	MOVW R10, (AX)
	MOVB R9, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B

emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_4through7:
	MOVL (R9), R10
	MOVL -4(R9)(R8*1), R9
	MOVL R10, (AX)
	MOVL R9, -4(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B

emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
	MOVQ (R9), R10
	MOVQ -8(R9)(R8*1), R9
	MOVQ R10, (AX)
	MOVQ R9, -8(AX)(R8*1)
	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B

emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
	MOVOU (R9), X0
	MOVOU -16(R9)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm8B

emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_repeat_emit_encodeBlockAsm8B:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeBlockAsm8B

memmove_long_repeat_emit_encodeBlockAsm8B:
	LEAQ  (AX)(R8*1), BP
	MOVOU (R9), X0
	MOVOU 16(R9), X1
	MOVOU -32(R9)(R8*1), X2
	MOVOU -16(R9)(R8*1), X3
	MOVQ  R8, R11
	SHRQ  $0x07, R11
	MOVQ  AX, R10
	ANDL  $0x0000001f, R10
	MOVQ  $0x00000040, R12
	SUBQ  R10, R12
	DECQ  R11
	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
	LEAQ  -32(R9)(R12*1), R10
	LEAQ  -32(AX)(R12*1), R13

emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
	MOVOU (R10), X4
	MOVOU 16(R10), X5
	MOVOU 32(R10), X6
	MOVOU 48(R10), X7
	MOVOU 64(R10), X8
	MOVOU 80(R10), X9
	MOVOU 96(R10), X10
	MOVOU 112(R10), X11
	MOVOA X4, (R13)
	MOVOA X5, 16(R13)
	MOVOA X6, 32(R13)
	MOVOA X7, 48(R13)
	MOVOA X8, 64(R13)
	MOVOA X9, 80(R13)
	MOVOA X10, 96(R13)
	MOVOA X11, 112(R13)
	ADDQ  $0x80, R13
	ADDQ  $0x80, R10
	ADDQ  $0x80, R12
	DECQ  R11
	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
	MOVOU -32(R9)(R12*1), X4
	MOVOU -16(R9)(R12*1), X5
	MOVOA X4, -32(AX)(R12*1)
	MOVOA X5, -16(AX)(R12*1)
	ADDQ  $0x20, R12
	CMPQ  R8, R12
	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeBlockAsm8B:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), R8
	SUBL CX, R8
	LEAQ (DX)(CX*1), R9
	LEAQ (DX)(BP*1), BP
	XORL R11, R11
	CMPL R8, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R9)(R11*1), R10
	XORQ  (BP)(R11*1), R10
	TESTQ R10, R10
	JZ    matchlen_loop_repeat_extend
	BSFQ  R10, R10
	SARQ  $0x03, R10
	LEAL  (R11)(R10*1), R11
	JMP   repeat_extend_forward_end_encodeBlockAsm8B

matchlen_loop_repeat_extend:
	LEAL -8(R8), R8
	LEAL 8(R11), R11
	CMPL R8, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL R8, R8
	JZ    repeat_extend_forward_end_encodeBlockAsm8B

matchlen_single_loopback_repeat_extend:
	MOVB (R9)(R11*1), R10
	CMPB (BP)(R11*1), R10
	JNE  repeat_extend_forward_end_encodeBlockAsm8B
	LEAL 1(R11), R11
	DECL R8
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeBlockAsm8B:
	ADDL  R11, CX
	MOVL  CX, BP
	SUBL  SI, BP
	MOVL  16(SP), SI
	TESTL DI, DI
	JZ    repeat_as_copy_encodeBlockAsm8B

emit_repeat_again_match_repeat_encodeBlockAsm8B:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_match_repeat_encodeBlockAsm8B
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_match_repeat_encodeBlockAsm8B

cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
	CMPL BP, $0x00000104
	JLT  repeat_three_match_repeat_encodeBlockAsm8B
	CMPL BP, $0x00010100
	JLT  repeat_four_match_repeat_encodeBlockAsm8B
	CMPL BP, $0x0100ffff
	JLT  repeat_five_match_repeat_encodeBlockAsm8B
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_repeat_encodeBlockAsm8B

repeat_five_match_repeat_encodeBlockAsm8B:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_four_match_repeat_encodeBlockAsm8B:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_three_match_repeat_encodeBlockAsm8B:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_two_match_repeat_encodeBlockAsm8B:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_two_offset_match_repeat_encodeBlockAsm8B:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_as_copy_encodeBlockAsm8B:
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeBlockAsm8B

four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeBlockAsm8B
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeBlockAsm8B

emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy

repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm8B
	JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B

four_bytes_remain_repeat_as_copy_encodeBlockAsm8B:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeBlockAsm8B
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeBlockAsm8B

two_byte_offset_repeat_as_copy_encodeBlockAsm8B:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX

emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
	MOVL BP, DI
	LEAL -4(BP), BP
	CMPL DI, $0x08
	JLE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
	CMPL DI, $0x0c
	JGE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
	CMPL SI, $0x00000800
	JLT  repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short

cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
	CMPL BP, $0x00000104
	JLT  repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
	CMPL BP, $0x00010100
	JLT  repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
	CMPL BP, $0x0100ffff
	JLT  repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
	LEAL -16842747(BP), BP
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short

repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
	LEAL -65536(BP), BP
	MOVL BP, SI
	MOVW $0x001d, (AX)
	MOVW BP, 2(AX)
	SARL $0x10, SI
	MOVB SI, 4(AX)
	ADDQ $0x05, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
	LEAL -256(BP), BP
	MOVW $0x0019, (AX)
	MOVW BP, 2(AX)
	ADDQ $0x04, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
	LEAL -4(BP), BP
	MOVW $0x0015, (AX)
	MOVB BP, 2(AX)
	ADDQ $0x03, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
	SHLL $0x02, BP
	ORL  $0x01, BP
	MOVW BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
	XORQ DI, DI
	LEAL 1(DI)(BP*4), BP
	MOVB SI, 1(AX)
	SARL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm8B
	JMP two_byte_offset_repeat_as_copy_encodeBlockAsm8B

two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeBlockAsm8B

emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeBlockAsm8B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm8B
	JMP  search_loop_encodeBlockAsm8B

no_repeat_found_encodeBlockAsm8B:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeBlockAsm8B
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeBlockAsm8B
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeBlockAsm8B
	MOVL 20(SP), CX
	JMP  search_loop_encodeBlockAsm8B

candidate3_match_encodeBlockAsm8B:
	ADDL $0x02, CX
	JMP  candidate_match_encodeBlockAsm8B

candidate2_match_encodeBlockAsm8B:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeBlockAsm8B:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeBlockAsm8B

match_extend_back_loop_encodeBlockAsm8B:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeBlockAsm8B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeBlockAsm8B
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeBlockAsm8B
	JMP  match_extend_back_loop_encodeBlockAsm8B

match_extend_back_end_encodeBlockAsm8B:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeBlockAsm8B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeBlockAsm8B:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeBlockAsm8B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeBlockAsm8B
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeBlockAsm8B
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeBlockAsm8B
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeBlockAsm8B
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeBlockAsm8B

four_bytes_match_emit_encodeBlockAsm8B:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeBlockAsm8B

three_bytes_match_emit_encodeBlockAsm8B:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeBlockAsm8B

two_bytes_match_emit_encodeBlockAsm8B:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeBlockAsm8B
	JMP  memmove_long_match_emit_encodeBlockAsm8B

one_byte_match_emit_encodeBlockAsm8B:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeBlockAsm8B:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64

emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B

emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B

emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B

emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B

emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeBlockAsm8B

emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeBlockAsm8B:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeBlockAsm8B

memmove_long_match_emit_encodeBlockAsm8B:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeBlockAsm8B:
match_nolit_loop_encodeBlockAsm8B:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeBlockAsm8B

matchlen_loopback_match_nolit_encodeBlockAsm8B:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeBlockAsm8B
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeBlockAsm8B

matchlen_loop_match_nolit_encodeBlockAsm8B:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeBlockAsm8B

matchlen_single_match_nolit_encodeBlockAsm8B:
	TESTL SI, SI
	JZ    match_nolit_end_encodeBlockAsm8B

matchlen_single_loopback_match_nolit_encodeBlockAsm8B:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeBlockAsm8B
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeBlockAsm8B

match_nolit_end_encodeBlockAsm8B:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeBlockAsm8B

four_bytes_loop_back_match_nolit_encodeBlockAsm8B:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeBlockAsm8B
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeBlockAsm8B

emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy

cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm8B_emit_copy
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm8B_emit_copy
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm8B_emit_copy
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy

repeat_five_match_nolit_encodeBlockAsm8B_emit_copy:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_four_match_nolit_encodeBlockAsm8B_emit_copy:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_three_match_nolit_encodeBlockAsm8B_emit_copy:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_two_match_nolit_encodeBlockAsm8B_emit_copy:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
	JMP four_bytes_loop_back_match_nolit_encodeBlockAsm8B

four_bytes_remain_match_nolit_encodeBlockAsm8B:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeBlockAsm8B
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeBlockAsm8B

two_byte_offset_match_nolit_encodeBlockAsm8B:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeBlockAsm8B
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX

emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short:
	MOVL R9, SI
	LEAL -4(R9), R9
	CMPL SI, $0x08
	JLE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
	CMPL SI, $0x0c
	JGE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
	CMPL BP, $0x00000800
	JLT  repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short

cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
	CMPL R9, $0x00000104
	JLT  repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
	CMPL R9, $0x00010100
	JLT  repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short
	CMPL R9, $0x0100ffff
	JLT  repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short
	LEAL -16842747(R9), R9
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	JMP  emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short

repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short:
	LEAL -65536(R9), R9
	MOVL R9, BP
	MOVW $0x001d, (AX)
	MOVW R9, 2(AX)
	SARL $0x10, BP
	MOVB BP, 4(AX)
	ADDQ $0x05, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short:
	LEAL -256(R9), R9
	MOVW $0x0019, (AX)
	MOVW R9, 2(AX)
	ADDQ $0x04, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
	LEAL -4(R9), R9
	MOVW $0x0015, (AX)
	MOVB R9, 2(AX)
	ADDQ $0x03, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
	SHLL $0x02, R9
	ORL  $0x01, R9
	MOVW R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
	XORQ SI, SI
	LEAL 1(SI)(R9*4), R9
	MOVB BP, 1(AX)
	SARL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
	JMP two_byte_offset_match_nolit_encodeBlockAsm8B

two_byte_offset_short_match_nolit_encodeBlockAsm8B:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeBlockAsm8B
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeBlockAsm8B
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B

emit_copy_three_match_nolit_encodeBlockAsm8B:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeBlockAsm8B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeBlockAsm8B
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeBlockAsm8B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeBlockAsm8B:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x9e3779b1, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x20, DI
	IMULQ BP, DI
	SHRQ  $0x38, DI
	SHLQ  $0x20, R8
	IMULQ BP, R8
	SHRQ  $0x38, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeBlockAsm8B
	INCL  CX
	JMP   search_loop_encodeBlockAsm8B

emit_remainder_encodeBlockAsm8B:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeBlockAsm8B
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeBlockAsm8B:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm8B
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeBlockAsm8B
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeBlockAsm8B
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeBlockAsm8B
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeBlockAsm8B
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm8B

four_bytes_emit_remainder_encodeBlockAsm8B:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm8B

three_bytes_emit_remainder_encodeBlockAsm8B:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeBlockAsm8B

two_bytes_emit_remainder_encodeBlockAsm8B:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeBlockAsm8B
	JMP  memmove_long_emit_remainder_encodeBlockAsm8B

one_byte_emit_remainder_encodeBlockAsm8B:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeBlockAsm8B:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B

emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B

emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B

emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B

emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm8B

emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeBlockAsm8B:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeBlockAsm8B

memmove_long_emit_remainder_encodeBlockAsm8B:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeBlockAsm8B:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func encodeSnappyBlockAsm(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm(SB), $65560-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000200, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeSnappyBlockAsm:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeSnappyBlockAsm
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x06, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeSnappyBlockAsm
	MOVL  BP, 20(SP)
	MOVQ  $0x0000cf1bbcdcbf9b, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x10, R9
	IMULQ R8, R9
	SHRQ  $0x32, R9
	SHLQ  $0x10, R10
	IMULQ R8, R10
	SHRQ  $0x32, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x10, R9
	IMULQ R8, R9
	SHRQ  $0x32, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeSnappyBlockAsm
	LEAL  1(CX), SI
	MOVL  12(SP), BP
	MOVL  SI, DI
	SUBL  16(SP), DI
	JZ    repeat_extend_back_end_encodeSnappyBlockAsm

repeat_extend_back_loop_encodeSnappyBlockAsm:
	CMPL SI, BP
	JLE  repeat_extend_back_end_encodeSnappyBlockAsm
	MOVB -1(DX)(DI*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeSnappyBlockAsm
	LEAL -1(SI), SI
	DECL DI
	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm

repeat_extend_back_end_encodeSnappyBlockAsm:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
	MOVL SI, DI
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R8
	SUBL BP, DI
	LEAL -1(DI), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeSnappyBlockAsm
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeSnappyBlockAsm
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeSnappyBlockAsm
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeSnappyBlockAsm
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm

four_bytes_repeat_emit_encodeSnappyBlockAsm:
	MOVL BP, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm

three_bytes_repeat_emit_encodeSnappyBlockAsm:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm

two_bytes_repeat_emit_encodeSnappyBlockAsm:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeSnappyBlockAsm
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm

one_byte_repeat_emit_encodeSnappyBlockAsm:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeSnappyBlockAsm:
	LEAQ (AX)(DI*1), BP
	CMPQ DI, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3
	CMPQ DI, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7
	CMPQ DI, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
	CMPQ DI, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_1or2:
	MOVB (R8), R9
	MOVB -1(R8)(DI*1), R8
	MOVB R9, (AX)
	MOVB R8, -1(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_3:
	MOVW (R8), R9
	MOVB 2(R8), R8
	MOVW R9, (AX)
	MOVB R8, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_4through7:
	MOVL (R8), R9
	MOVL -4(R8)(DI*1), R8
	MOVL R9, (AX)
	MOVL R8, -4(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
	MOVQ (R8), R9
	MOVQ -8(R8)(DI*1), R8
	MOVQ R9, (AX)
	MOVQ R8, -8(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
	MOVOU (R8), X0
	MOVOU -16(R8)(DI*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(DI*1)
	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)

memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm

memmove_long_repeat_emit_encodeSnappyBlockAsm:
	LEAQ  (AX)(DI*1), BP
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVQ  DI, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
	LEAQ  -32(R8)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
	MOVOU -32(R8)(R11*1), X4
	MOVOU -16(R8)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  DI, R11
	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), DI
	SUBL CX, DI
	LEAQ (DX)(CX*1), R8
	LEAQ (DX)(BP*1), BP
	XORL R10, R10
	CMPL DI, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R8)(R10*1), R9
	XORQ  (BP)(R10*1), R9
	TESTQ R9, R9
	JZ    matchlen_loop_repeat_extend
	BSFQ  R9, R9
	SARQ  $0x03, R9
	LEAL  (R10)(R9*1), R10
	JMP   repeat_extend_forward_end_encodeSnappyBlockAsm

matchlen_loop_repeat_extend:
	LEAL -8(DI), DI
	LEAL 8(R10), R10
	CMPL DI, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL DI, DI
	JZ    repeat_extend_forward_end_encodeSnappyBlockAsm

matchlen_single_loopback_repeat_extend:
	MOVB (R8)(R10*1), R9
	CMPB (BP)(R10*1), R9
	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm
	LEAL 1(R10), R10
	DECL DI
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeSnappyBlockAsm:
	ADDL R10, CX
	MOVL CX, BP
	SUBL SI, BP
	MOVL 16(SP), SI
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm

four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm

four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeSnappyBlockAsm
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeSnappyBlockAsm

two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX
	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm

two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeSnappyBlockAsm

emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeSnappyBlockAsm:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm
	JMP  search_loop_encodeSnappyBlockAsm

no_repeat_found_encodeSnappyBlockAsm:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeSnappyBlockAsm
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeSnappyBlockAsm
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeSnappyBlockAsm
	MOVL 20(SP), CX
	JMP  search_loop_encodeSnappyBlockAsm

candidate3_match_encodeSnappyBlockAsm:
	ADDL $0x02, CX
	JMP  candidate_match_encodeSnappyBlockAsm

candidate2_match_encodeSnappyBlockAsm:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeSnappyBlockAsm:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeSnappyBlockAsm

match_extend_back_loop_encodeSnappyBlockAsm:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeSnappyBlockAsm
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeSnappyBlockAsm
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeSnappyBlockAsm
	JMP  match_extend_back_loop_encodeSnappyBlockAsm

match_extend_back_end_encodeSnappyBlockAsm:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeSnappyBlockAsm
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeSnappyBlockAsm:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeSnappyBlockAsm
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeSnappyBlockAsm
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeSnappyBlockAsm
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeSnappyBlockAsm
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm

four_bytes_match_emit_encodeSnappyBlockAsm:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm

three_bytes_match_emit_encodeSnappyBlockAsm:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm

two_bytes_match_emit_encodeSnappyBlockAsm:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeSnappyBlockAsm
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm

one_byte_match_emit_encodeSnappyBlockAsm:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeSnappyBlockAsm:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64

emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm

emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm

emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm

emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm

emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm

emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeSnappyBlockAsm:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm

memmove_long_match_emit_encodeSnappyBlockAsm:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeSnappyBlockAsm:
match_nolit_loop_encodeSnappyBlockAsm:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeSnappyBlockAsm

matchlen_loopback_match_nolit_encodeSnappyBlockAsm:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeSnappyBlockAsm

matchlen_loop_match_nolit_encodeSnappyBlockAsm:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm

matchlen_single_match_nolit_encodeSnappyBlockAsm:
	TESTL SI, SI
	JZ    match_nolit_end_encodeSnappyBlockAsm

matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeSnappyBlockAsm
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm

match_nolit_end_encodeSnappyBlockAsm:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeSnappyBlockAsm

four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeSnappyBlockAsm
	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm

four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm

two_byte_offset_match_nolit_encodeSnappyBlockAsm:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX
	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm

two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm

emit_copy_three_match_nolit_encodeSnappyBlockAsm:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeSnappyBlockAsm:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeSnappyBlockAsm
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeSnappyBlockAsm:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x0000cf1bbcdcbf9b, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x10, DI
	IMULQ BP, DI
	SHRQ  $0x32, DI
	SHLQ  $0x10, R8
	IMULQ BP, R8
	SHRQ  $0x32, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeSnappyBlockAsm
	INCL  CX
	JMP   search_loop_encodeSnappyBlockAsm

emit_remainder_encodeSnappyBlockAsm:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeSnappyBlockAsm
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeSnappyBlockAsm:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeSnappyBlockAsm
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeSnappyBlockAsm
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeSnappyBlockAsm
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeSnappyBlockAsm
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm

four_bytes_emit_remainder_encodeSnappyBlockAsm:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm

three_bytes_emit_remainder_encodeSnappyBlockAsm:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm

two_bytes_emit_remainder_encodeSnappyBlockAsm:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeSnappyBlockAsm
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm

one_byte_emit_remainder_encodeSnappyBlockAsm:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeSnappyBlockAsm:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm

memmove_long_emit_remainder_encodeSnappyBlockAsm:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000080, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeSnappyBlockAsm12B:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeSnappyBlockAsm12B
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm12B:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x05, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeSnappyBlockAsm12B
	MOVL  BP, 20(SP)
	MOVQ  $0x000000cf1bbcdcbb, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x18, R9
	IMULQ R8, R9
	SHRQ  $0x34, R9
	SHLQ  $0x18, R10
	IMULQ R8, R10
	SHRQ  $0x34, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x18, R9
	IMULQ R8, R9
	SHRQ  $0x34, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeSnappyBlockAsm12B
	LEAL  1(CX), SI
	MOVL  12(SP), BP
	MOVL  SI, DI
	SUBL  16(SP), DI
	JZ    repeat_extend_back_end_encodeSnappyBlockAsm12B

repeat_extend_back_loop_encodeSnappyBlockAsm12B:
	CMPL SI, BP
	JLE  repeat_extend_back_end_encodeSnappyBlockAsm12B
	MOVB -1(DX)(DI*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeSnappyBlockAsm12B
	LEAL -1(SI), SI
	DECL DI
	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm12B

repeat_extend_back_end_encodeSnappyBlockAsm12B:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
	MOVL SI, DI
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R8
	SUBL BP, DI
	LEAL -1(DI), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeSnappyBlockAsm12B
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeSnappyBlockAsm12B
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeSnappyBlockAsm12B
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeSnappyBlockAsm12B
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B

four_bytes_repeat_emit_encodeSnappyBlockAsm12B:
	MOVL BP, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B

three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B

two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeSnappyBlockAsm12B
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B

one_byte_repeat_emit_encodeSnappyBlockAsm12B:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeSnappyBlockAsm12B:
	LEAQ (AX)(DI*1), BP
	CMPQ DI, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3
	CMPQ DI, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7
	CMPQ DI, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
	CMPQ DI, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_1or2:
	MOVB (R8), R9
	MOVB -1(R8)(DI*1), R8
	MOVB R9, (AX)
	MOVB R8, -1(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_3:
	MOVW (R8), R9
	MOVB 2(R8), R8
	MOVW R9, (AX)
	MOVB R8, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_4through7:
	MOVL (R8), R9
	MOVL -4(R8)(DI*1), R8
	MOVL R9, (AX)
	MOVL R8, -4(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
	MOVQ (R8), R9
	MOVQ -8(R8)(DI*1), R8
	MOVQ R9, (AX)
	MOVQ R8, -8(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
	MOVOU (R8), X0
	MOVOU -16(R8)(DI*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(DI*1)
	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)

memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B

memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
	LEAQ  (AX)(DI*1), BP
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVQ  DI, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
	LEAQ  -32(R8)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
	MOVOU -32(R8)(R11*1), X4
	MOVOU -16(R8)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  DI, R11
	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), DI
	SUBL CX, DI
	LEAQ (DX)(CX*1), R8
	LEAQ (DX)(BP*1), BP
	XORL R10, R10
	CMPL DI, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R8)(R10*1), R9
	XORQ  (BP)(R10*1), R9
	TESTQ R9, R9
	JZ    matchlen_loop_repeat_extend
	BSFQ  R9, R9
	SARQ  $0x03, R9
	LEAL  (R10)(R9*1), R10
	JMP   repeat_extend_forward_end_encodeSnappyBlockAsm12B

matchlen_loop_repeat_extend:
	LEAL -8(DI), DI
	LEAL 8(R10), R10
	CMPL DI, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL DI, DI
	JZ    repeat_extend_forward_end_encodeSnappyBlockAsm12B

matchlen_single_loopback_repeat_extend:
	MOVB (R8)(R10*1), R9
	CMPB (BP)(R10*1), R9
	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm12B
	LEAL 1(R10), R10
	DECL DI
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeSnappyBlockAsm12B:
	ADDL R10, CX
	MOVL CX, BP
	SUBL SI, BP
	MOVL 16(SP), SI
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B

four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B
	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B

four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeSnappyBlockAsm12B
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeSnappyBlockAsm12B

two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX
	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B

two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeSnappyBlockAsm12B

emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeSnappyBlockAsm12B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm12B
	JMP  search_loop_encodeSnappyBlockAsm12B

no_repeat_found_encodeSnappyBlockAsm12B:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeSnappyBlockAsm12B
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeSnappyBlockAsm12B
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeSnappyBlockAsm12B
	MOVL 20(SP), CX
	JMP  search_loop_encodeSnappyBlockAsm12B

candidate3_match_encodeSnappyBlockAsm12B:
	ADDL $0x02, CX
	JMP  candidate_match_encodeSnappyBlockAsm12B

candidate2_match_encodeSnappyBlockAsm12B:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeSnappyBlockAsm12B:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeSnappyBlockAsm12B

match_extend_back_loop_encodeSnappyBlockAsm12B:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeSnappyBlockAsm12B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeSnappyBlockAsm12B
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeSnappyBlockAsm12B
	JMP  match_extend_back_loop_encodeSnappyBlockAsm12B

match_extend_back_end_encodeSnappyBlockAsm12B:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeSnappyBlockAsm12B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeSnappyBlockAsm12B:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeSnappyBlockAsm12B
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeSnappyBlockAsm12B
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeSnappyBlockAsm12B
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeSnappyBlockAsm12B
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B

four_bytes_match_emit_encodeSnappyBlockAsm12B:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B

three_bytes_match_emit_encodeSnappyBlockAsm12B:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B

two_bytes_match_emit_encodeSnappyBlockAsm12B:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeSnappyBlockAsm12B
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B

one_byte_match_emit_encodeSnappyBlockAsm12B:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeSnappyBlockAsm12B:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64

emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm12B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm12B

memmove_long_match_emit_encodeSnappyBlockAsm12B:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
match_nolit_loop_encodeSnappyBlockAsm12B:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeSnappyBlockAsm12B

matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm12B
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeSnappyBlockAsm12B

matchlen_loop_match_nolit_encodeSnappyBlockAsm12B:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm12B

matchlen_single_match_nolit_encodeSnappyBlockAsm12B:
	TESTL SI, SI
	JZ    match_nolit_end_encodeSnappyBlockAsm12B

matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeSnappyBlockAsm12B
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm12B

match_nolit_end_encodeSnappyBlockAsm12B:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeSnappyBlockAsm12B

four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B
	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B

four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm12B

two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX
	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm12B

two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm12B

emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm12B
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeSnappyBlockAsm12B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeSnappyBlockAsm12B:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x000000cf1bbcdcbb, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x18, DI
	IMULQ BP, DI
	SHRQ  $0x34, DI
	SHLQ  $0x18, R8
	IMULQ BP, R8
	SHRQ  $0x34, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeSnappyBlockAsm12B
	INCL  CX
	JMP   search_loop_encodeSnappyBlockAsm12B

emit_remainder_encodeSnappyBlockAsm12B:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeSnappyBlockAsm12B
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeSnappyBlockAsm12B:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeSnappyBlockAsm12B
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeSnappyBlockAsm12B
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeSnappyBlockAsm12B
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeSnappyBlockAsm12B
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B

four_bytes_emit_remainder_encodeSnappyBlockAsm12B:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B

three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B

two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeSnappyBlockAsm12B
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B

one_byte_emit_remainder_encodeSnappyBlockAsm12B:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeSnappyBlockAsm12B:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B

memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000020, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeSnappyBlockAsm10B:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeSnappyBlockAsm10B
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm10B:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x05, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeSnappyBlockAsm10B
	MOVL  BP, 20(SP)
	MOVQ  $0x9e3779b1, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x36, R9
	SHLQ  $0x20, R10
	IMULQ R8, R10
	SHRQ  $0x36, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x36, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeSnappyBlockAsm10B
	LEAL  1(CX), SI
	MOVL  12(SP), BP
	MOVL  SI, DI
	SUBL  16(SP), DI
	JZ    repeat_extend_back_end_encodeSnappyBlockAsm10B

repeat_extend_back_loop_encodeSnappyBlockAsm10B:
	CMPL SI, BP
	JLE  repeat_extend_back_end_encodeSnappyBlockAsm10B
	MOVB -1(DX)(DI*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeSnappyBlockAsm10B
	LEAL -1(SI), SI
	DECL DI
	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm10B

repeat_extend_back_end_encodeSnappyBlockAsm10B:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
	MOVL SI, DI
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R8
	SUBL BP, DI
	LEAL -1(DI), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeSnappyBlockAsm10B
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeSnappyBlockAsm10B
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeSnappyBlockAsm10B
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeSnappyBlockAsm10B
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B

four_bytes_repeat_emit_encodeSnappyBlockAsm10B:
	MOVL BP, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B

three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B

two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeSnappyBlockAsm10B
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B

one_byte_repeat_emit_encodeSnappyBlockAsm10B:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeSnappyBlockAsm10B:
	LEAQ (AX)(DI*1), BP
	CMPQ DI, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3
	CMPQ DI, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7
	CMPQ DI, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
	CMPQ DI, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_1or2:
	MOVB (R8), R9
	MOVB -1(R8)(DI*1), R8
	MOVB R9, (AX)
	MOVB R8, -1(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_3:
	MOVW (R8), R9
	MOVB 2(R8), R8
	MOVW R9, (AX)
	MOVB R8, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_4through7:
	MOVL (R8), R9
	MOVL -4(R8)(DI*1), R8
	MOVL R9, (AX)
	MOVL R8, -4(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
	MOVQ (R8), R9
	MOVQ -8(R8)(DI*1), R8
	MOVQ R9, (AX)
	MOVQ R8, -8(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
	MOVOU (R8), X0
	MOVOU -16(R8)(DI*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(DI*1)
	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)

memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B

memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
	LEAQ  (AX)(DI*1), BP
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVQ  DI, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
	LEAQ  -32(R8)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
	MOVOU -32(R8)(R11*1), X4
	MOVOU -16(R8)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  DI, R11
	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), DI
	SUBL CX, DI
	LEAQ (DX)(CX*1), R8
	LEAQ (DX)(BP*1), BP
	XORL R10, R10
	CMPL DI, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R8)(R10*1), R9
	XORQ  (BP)(R10*1), R9
	TESTQ R9, R9
	JZ    matchlen_loop_repeat_extend
	BSFQ  R9, R9
	SARQ  $0x03, R9
	LEAL  (R10)(R9*1), R10
	JMP   repeat_extend_forward_end_encodeSnappyBlockAsm10B

matchlen_loop_repeat_extend:
	LEAL -8(DI), DI
	LEAL 8(R10), R10
	CMPL DI, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL DI, DI
	JZ    repeat_extend_forward_end_encodeSnappyBlockAsm10B

matchlen_single_loopback_repeat_extend:
	MOVB (R8)(R10*1), R9
	CMPB (BP)(R10*1), R9
	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm10B
	LEAL 1(R10), R10
	DECL DI
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeSnappyBlockAsm10B:
	ADDL R10, CX
	MOVL CX, BP
	SUBL SI, BP
	MOVL 16(SP), SI
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B

four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B
	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B

four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeSnappyBlockAsm10B
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeSnappyBlockAsm10B

two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX
	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B

two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeSnappyBlockAsm10B

emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeSnappyBlockAsm10B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm10B
	JMP  search_loop_encodeSnappyBlockAsm10B

no_repeat_found_encodeSnappyBlockAsm10B:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeSnappyBlockAsm10B
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeSnappyBlockAsm10B
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeSnappyBlockAsm10B
	MOVL 20(SP), CX
	JMP  search_loop_encodeSnappyBlockAsm10B

candidate3_match_encodeSnappyBlockAsm10B:
	ADDL $0x02, CX
	JMP  candidate_match_encodeSnappyBlockAsm10B

candidate2_match_encodeSnappyBlockAsm10B:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeSnappyBlockAsm10B:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeSnappyBlockAsm10B

match_extend_back_loop_encodeSnappyBlockAsm10B:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeSnappyBlockAsm10B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeSnappyBlockAsm10B
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeSnappyBlockAsm10B
	JMP  match_extend_back_loop_encodeSnappyBlockAsm10B

match_extend_back_end_encodeSnappyBlockAsm10B:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeSnappyBlockAsm10B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeSnappyBlockAsm10B:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeSnappyBlockAsm10B
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeSnappyBlockAsm10B
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeSnappyBlockAsm10B
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeSnappyBlockAsm10B
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B

four_bytes_match_emit_encodeSnappyBlockAsm10B:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B

three_bytes_match_emit_encodeSnappyBlockAsm10B:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B

two_bytes_match_emit_encodeSnappyBlockAsm10B:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeSnappyBlockAsm10B
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B

one_byte_match_emit_encodeSnappyBlockAsm10B:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeSnappyBlockAsm10B:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64

emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm10B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm10B

memmove_long_match_emit_encodeSnappyBlockAsm10B:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
match_nolit_loop_encodeSnappyBlockAsm10B:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeSnappyBlockAsm10B

matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm10B
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeSnappyBlockAsm10B

matchlen_loop_match_nolit_encodeSnappyBlockAsm10B:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm10B

matchlen_single_match_nolit_encodeSnappyBlockAsm10B:
	TESTL SI, SI
	JZ    match_nolit_end_encodeSnappyBlockAsm10B

matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeSnappyBlockAsm10B
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm10B

match_nolit_end_encodeSnappyBlockAsm10B:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeSnappyBlockAsm10B

four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B
	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B

four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm10B

two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX
	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm10B

two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm10B

emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm10B
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeSnappyBlockAsm10B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeSnappyBlockAsm10B:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x9e3779b1, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x20, DI
	IMULQ BP, DI
	SHRQ  $0x36, DI
	SHLQ  $0x20, R8
	IMULQ BP, R8
	SHRQ  $0x36, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeSnappyBlockAsm10B
	INCL  CX
	JMP   search_loop_encodeSnappyBlockAsm10B

emit_remainder_encodeSnappyBlockAsm10B:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeSnappyBlockAsm10B
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeSnappyBlockAsm10B:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeSnappyBlockAsm10B
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeSnappyBlockAsm10B
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeSnappyBlockAsm10B
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeSnappyBlockAsm10B
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B

four_bytes_emit_remainder_encodeSnappyBlockAsm10B:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B

three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B

two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeSnappyBlockAsm10B
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B

one_byte_emit_remainder_encodeSnappyBlockAsm10B:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeSnappyBlockAsm10B:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B

memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
// Requires: SSE2
TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
	MOVQ dst_base+0(FP), AX
	MOVQ $0x00000008, CX
	LEAQ 24(SP), DX
	PXOR X0, X0

zero_loop_encodeSnappyBlockAsm8B:
	MOVOU X0, (DX)
	MOVOU X0, 16(DX)
	MOVOU X0, 32(DX)
	MOVOU X0, 48(DX)
	MOVOU X0, 64(DX)
	MOVOU X0, 80(DX)
	MOVOU X0, 96(DX)
	MOVOU X0, 112(DX)
	ADDQ  $0x80, DX
	DECQ  CX
	JNZ   zero_loop_encodeSnappyBlockAsm8B
	MOVL  $0x00000000, 12(SP)
	MOVQ  src_len+32(FP), CX
	LEAQ  -5(CX), DX
	LEAQ  -8(CX), BP
	MOVL  BP, 8(SP)
	SHRQ  $0x05, CX
	SUBL  CX, DX
	LEAQ  (AX)(DX*1), DX
	MOVQ  DX, (SP)
	MOVL  $0x00000001, CX
	MOVL  CX, 16(SP)
	MOVQ  src_base+24(FP), DX

search_loop_encodeSnappyBlockAsm8B:
	MOVQ  (DX)(CX*1), SI
	MOVL  CX, BP
	SUBL  12(SP), BP
	SHRL  $0x04, BP
	LEAL  4(CX)(BP*1), BP
	MOVL  8(SP), DI
	CMPL  BP, DI
	JGT   emit_remainder_encodeSnappyBlockAsm8B
	MOVL  BP, 20(SP)
	MOVQ  $0x9e3779b1, R8
	MOVQ  SI, R9
	MOVQ  SI, R10
	SHRQ  $0x08, R10
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x38, R9
	SHLQ  $0x20, R10
	IMULQ R8, R10
	SHRQ  $0x38, R10
	MOVL  24(SP)(R9*4), BP
	MOVL  24(SP)(R10*4), DI
	MOVL  CX, 24(SP)(R9*4)
	LEAL  1(CX), R9
	MOVL  R9, 24(SP)(R10*4)
	MOVQ  SI, R9
	SHRQ  $0x10, R9
	SHLQ  $0x20, R9
	IMULQ R8, R9
	SHRQ  $0x38, R9
	MOVL  CX, R8
	SUBL  16(SP), R8
	MOVL  1(DX)(R8*1), R10
	MOVQ  SI, R8
	SHRQ  $0x08, R8
	CMPL  R8, R10
	JNE   no_repeat_found_encodeSnappyBlockAsm8B
	LEAL  1(CX), SI
	MOVL  12(SP), BP
	MOVL  SI, DI
	SUBL  16(SP), DI
	JZ    repeat_extend_back_end_encodeSnappyBlockAsm8B

repeat_extend_back_loop_encodeSnappyBlockAsm8B:
	CMPL SI, BP
	JLE  repeat_extend_back_end_encodeSnappyBlockAsm8B
	MOVB -1(DX)(DI*1), BL
	MOVB -1(DX)(SI*1), R8
	CMPB BL, R8
	JNE  repeat_extend_back_end_encodeSnappyBlockAsm8B
	LEAL -1(SI), SI
	DECL DI
	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm8B

repeat_extend_back_end_encodeSnappyBlockAsm8B:
	MOVL 12(SP), BP
	CMPL BP, SI
	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
	MOVL SI, DI
	MOVL SI, 12(SP)
	LEAQ (DX)(BP*1), R8
	SUBL BP, DI
	LEAL -1(DI), BP
	CMPL BP, $0x3c
	JLT  one_byte_repeat_emit_encodeSnappyBlockAsm8B
	CMPL BP, $0x00000100
	JLT  two_bytes_repeat_emit_encodeSnappyBlockAsm8B
	CMPL BP, $0x00010000
	JLT  three_bytes_repeat_emit_encodeSnappyBlockAsm8B
	CMPL BP, $0x01000000
	JLT  four_bytes_repeat_emit_encodeSnappyBlockAsm8B
	MOVB $0xfc, (AX)
	MOVL BP, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B

four_bytes_repeat_emit_encodeSnappyBlockAsm8B:
	MOVL BP, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B

three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B

two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_repeat_emit_encodeSnappyBlockAsm8B
	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B

one_byte_repeat_emit_encodeSnappyBlockAsm8B:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, AX

memmove_repeat_emit_encodeSnappyBlockAsm8B:
	LEAQ (AX)(DI*1), BP
	CMPQ DI, $0x03
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2
	JE   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3
	CMPQ DI, $0x08
	JB   emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7
	CMPQ DI, $0x10
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
	CMPQ DI, $0x20
	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_1or2:
	MOVB (R8), R9
	MOVB -1(R8)(DI*1), R8
	MOVB R9, (AX)
	MOVB R8, -1(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_3:
	MOVW (R8), R9
	MOVB 2(R8), R8
	MOVW R9, (AX)
	MOVB R8, 2(AX)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_4through7:
	MOVL (R8), R9
	MOVL -4(R8)(DI*1), R8
	MOVL R9, (AX)
	MOVL R8, -4(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
	MOVQ (R8), R9
	MOVQ -8(R8)(DI*1), R8
	MOVQ R9, (AX)
	MOVQ R8, -8(AX)(DI*1)
	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
	MOVOU (R8), X0
	MOVOU -16(R8)(DI*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(DI*1)
	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)

memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
	MOVQ BP, AX
	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B

memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
	LEAQ  (AX)(DI*1), BP
	MOVOU (R8), X0
	MOVOU 16(R8), X1
	MOVOU -32(R8)(DI*1), X2
	MOVOU -16(R8)(DI*1), X3
	MOVQ  DI, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
	LEAQ  -32(R8)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back

emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
	MOVOU -32(R8)(R11*1), X4
	MOVOU -16(R8)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  DI, R11
	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DI*1)
	MOVOU X3, -16(AX)(DI*1)
	MOVQ  BP, AX

emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
	ADDL $0x05, CX
	MOVL CX, BP
	SUBL 16(SP), BP
	MOVQ src_len+32(FP), DI
	SUBL CX, DI
	LEAQ (DX)(CX*1), R8
	LEAQ (DX)(BP*1), BP
	XORL R10, R10
	CMPL DI, $0x08
	JL   matchlen_single_repeat_extend

matchlen_loopback_repeat_extend:
	MOVQ  (R8)(R10*1), R9
	XORQ  (BP)(R10*1), R9
	TESTQ R9, R9
	JZ    matchlen_loop_repeat_extend
	BSFQ  R9, R9
	SARQ  $0x03, R9
	LEAL  (R10)(R9*1), R10
	JMP   repeat_extend_forward_end_encodeSnappyBlockAsm8B

matchlen_loop_repeat_extend:
	LEAL -8(DI), DI
	LEAL 8(R10), R10
	CMPL DI, $0x08
	JGE  matchlen_loopback_repeat_extend

matchlen_single_repeat_extend:
	TESTL DI, DI
	JZ    repeat_extend_forward_end_encodeSnappyBlockAsm8B

matchlen_single_loopback_repeat_extend:
	MOVB (R8)(R10*1), R9
	CMPB (BP)(R10*1), R9
	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm8B
	LEAL 1(R10), R10
	DECL DI
	JNZ  matchlen_single_loopback_repeat_extend

repeat_extend_forward_end_encodeSnappyBlockAsm8B:
	ADDL R10, CX
	MOVL CX, BP
	SUBL SI, BP
	MOVL 16(SP), SI
	CMPL SI, $0x00010000
	JL   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B

four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B:
	CMPL BP, $0x40
	JLE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B
	MOVB $0xff, (AX)
	MOVL SI, 1(AX)
	LEAL -64(BP), BP
	ADDQ $0x05, AX
	CMPL BP, $0x04
	JL   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B
	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B

four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B:
	TESTL BP, BP
	JZ    repeat_end_emit_encodeSnappyBlockAsm8B
	MOVB  $0x03, BL
	LEAL  -4(BX)(BP*4), BP
	MOVB  BP, (AX)
	MOVL  SI, 1(AX)
	ADDQ  $0x05, AX
	JMP   repeat_end_emit_encodeSnappyBlockAsm8B

two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
	CMPL BP, $0x40
	JLE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
	MOVB $0xee, (AX)
	MOVW SI, 1(AX)
	LEAL -60(BP), BP
	ADDQ $0x03, AX
	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B

two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
	CMPL BP, $0x0c
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
	CMPL SI, $0x00000800
	JGE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
	MOVB $0x01, BL
	LEAL -16(BX)(BP*4), BP
	MOVB SI, 1(AX)
	SHRL $0x08, SI
	SHLL $0x05, SI
	ORL  SI, BP
	MOVB BP, (AX)
	ADDQ $0x02, AX
	JMP  repeat_end_emit_encodeSnappyBlockAsm8B

emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
	MOVB $0x02, BL
	LEAL -4(BX)(BP*4), BP
	MOVB BP, (AX)
	MOVW SI, 1(AX)
	ADDQ $0x03, AX

repeat_end_emit_encodeSnappyBlockAsm8B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm8B
	JMP  search_loop_encodeSnappyBlockAsm8B

no_repeat_found_encodeSnappyBlockAsm8B:
	CMPL (DX)(BP*1), SI
	JEQ  candidate_match_encodeSnappyBlockAsm8B
	SHRQ $0x08, SI
	MOVL 24(SP)(R9*4), BP
	LEAL 2(CX), R8
	CMPL (DX)(DI*1), SI
	JEQ  candidate2_match_encodeSnappyBlockAsm8B
	MOVL R8, 24(SP)(R9*4)
	SHRQ $0x08, SI
	CMPL (DX)(BP*1), SI
	JEQ  candidate3_match_encodeSnappyBlockAsm8B
	MOVL 20(SP), CX
	JMP  search_loop_encodeSnappyBlockAsm8B

candidate3_match_encodeSnappyBlockAsm8B:
	ADDL $0x02, CX
	JMP  candidate_match_encodeSnappyBlockAsm8B

candidate2_match_encodeSnappyBlockAsm8B:
	MOVL R8, 24(SP)(R9*4)
	INCL CX
	MOVL DI, BP

candidate_match_encodeSnappyBlockAsm8B:
	MOVL  12(SP), SI
	TESTL BP, BP
	JZ    match_extend_back_end_encodeSnappyBlockAsm8B

match_extend_back_loop_encodeSnappyBlockAsm8B:
	CMPL CX, SI
	JLE  match_extend_back_end_encodeSnappyBlockAsm8B
	MOVB -1(DX)(BP*1), BL
	MOVB -1(DX)(CX*1), DI
	CMPB BL, DI
	JNE  match_extend_back_end_encodeSnappyBlockAsm8B
	LEAL -1(CX), CX
	DECL BP
	JZ   match_extend_back_end_encodeSnappyBlockAsm8B
	JMP  match_extend_back_loop_encodeSnappyBlockAsm8B

match_extend_back_end_encodeSnappyBlockAsm8B:
	MOVL CX, SI
	SUBL 12(SP), SI
	LEAQ 4(AX)(SI*1), SI
	CMPQ SI, (SP)
	JL   match_dst_size_check_encodeSnappyBlockAsm8B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_dst_size_check_encodeSnappyBlockAsm8B:
	MOVL CX, SI
	MOVL 12(SP), DI
	CMPL DI, SI
	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
	MOVL SI, R8
	MOVL SI, 12(SP)
	LEAQ (DX)(DI*1), SI
	SUBL DI, R8
	LEAL -1(R8), DI
	CMPL DI, $0x3c
	JLT  one_byte_match_emit_encodeSnappyBlockAsm8B
	CMPL DI, $0x00000100
	JLT  two_bytes_match_emit_encodeSnappyBlockAsm8B
	CMPL DI, $0x00010000
	JLT  three_bytes_match_emit_encodeSnappyBlockAsm8B
	CMPL DI, $0x01000000
	JLT  four_bytes_match_emit_encodeSnappyBlockAsm8B
	MOVB $0xfc, (AX)
	MOVL DI, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B

four_bytes_match_emit_encodeSnappyBlockAsm8B:
	MOVL DI, R9
	SHRL $0x10, R9
	MOVB $0xf8, (AX)
	MOVW DI, 1(AX)
	MOVB R9, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B

three_bytes_match_emit_encodeSnappyBlockAsm8B:
	MOVB $0xf4, (AX)
	MOVW DI, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B

two_bytes_match_emit_encodeSnappyBlockAsm8B:
	MOVB $0xf0, (AX)
	MOVB DI, 1(AX)
	ADDQ $0x02, AX
	CMPL DI, $0x40
	JL   memmove_match_emit_encodeSnappyBlockAsm8B
	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B

one_byte_match_emit_encodeSnappyBlockAsm8B:
	SHLB $0x02, DI
	MOVB DI, (AX)
	ADDQ $0x01, AX

memmove_match_emit_encodeSnappyBlockAsm8B:
	LEAQ (AX)(R8*1), DI
	CMPQ R8, $0x03
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2
	JE   emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3
	CMPQ R8, $0x08
	JB   emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7
	CMPQ R8, $0x10
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
	CMPQ R8, $0x20
	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64

emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_1or2:
	MOVB (SI), R9
	MOVB -1(SI)(R8*1), SI
	MOVB R9, (AX)
	MOVB SI, -1(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_3:
	MOVW (SI), R9
	MOVB 2(SI), SI
	MOVW R9, (AX)
	MOVB SI, 2(AX)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_4through7:
	MOVL (SI), R9
	MOVL -4(SI)(R8*1), SI
	MOVL R9, (AX)
	MOVL SI, -4(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
	MOVQ (SI), R9
	MOVQ -8(SI)(R8*1), SI
	MOVQ R9, (AX)
	MOVQ SI, -8(AX)(R8*1)
	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
	MOVOU (SI), X0
	MOVOU -16(SI)(R8*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(R8*1)
	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm8B

emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)

memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
	MOVQ DI, AX
	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm8B

memmove_long_match_emit_encodeSnappyBlockAsm8B:
	LEAQ  (AX)(R8*1), DI
	MOVOU (SI), X0
	MOVOU 16(SI), X1
	MOVOU -32(SI)(R8*1), X2
	MOVOU -16(SI)(R8*1), X3
	MOVQ  R8, R10
	SHRQ  $0x07, R10
	MOVQ  AX, R9
	ANDL  $0x0000001f, R9
	MOVQ  $0x00000040, R11
	SUBQ  R9, R11
	DECQ  R10
	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
	LEAQ  -32(SI)(R11*1), R9
	LEAQ  -32(AX)(R11*1), R12

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
	MOVOU (R9), X4
	MOVOU 16(R9), X5
	MOVOU 32(R9), X6
	MOVOU 48(R9), X7
	MOVOU 64(R9), X8
	MOVOU 80(R9), X9
	MOVOU 96(R9), X10
	MOVOU 112(R9), X11
	MOVOA X4, (R12)
	MOVOA X5, 16(R12)
	MOVOA X6, 32(R12)
	MOVOA X7, 48(R12)
	MOVOA X8, 64(R12)
	MOVOA X9, 80(R12)
	MOVOA X10, 96(R12)
	MOVOA X11, 112(R12)
	ADDQ  $0x80, R12
	ADDQ  $0x80, R9
	ADDQ  $0x80, R11
	DECQ  R10
	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back

emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
	MOVOU -32(SI)(R11*1), X4
	MOVOU -16(SI)(R11*1), X5
	MOVOA X4, -32(AX)(R11*1)
	MOVOA X5, -16(AX)(R11*1)
	ADDQ  $0x20, R11
	CMPQ  R8, R11
	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(R8*1)
	MOVOU X3, -16(AX)(R8*1)
	MOVQ  DI, AX

emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
match_nolit_loop_encodeSnappyBlockAsm8B:
	MOVL CX, SI
	SUBL BP, SI
	MOVL SI, 16(SP)
	ADDL $0x04, CX
	ADDL $0x04, BP
	MOVQ src_len+32(FP), SI
	SUBL CX, SI
	LEAQ (DX)(CX*1), DI
	LEAQ (DX)(BP*1), BP
	XORL R9, R9
	CMPL SI, $0x08
	JL   matchlen_single_match_nolit_encodeSnappyBlockAsm8B

matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B:
	MOVQ  (DI)(R9*1), R8
	XORQ  (BP)(R9*1), R8
	TESTQ R8, R8
	JZ    matchlen_loop_match_nolit_encodeSnappyBlockAsm8B
	BSFQ  R8, R8
	SARQ  $0x03, R8
	LEAL  (R9)(R8*1), R9
	JMP   match_nolit_end_encodeSnappyBlockAsm8B

matchlen_loop_match_nolit_encodeSnappyBlockAsm8B:
	LEAL -8(SI), SI
	LEAL 8(R9), R9
	CMPL SI, $0x08
	JGE  matchlen_loopback_match_nolit_encodeSnappyBlockAsm8B

matchlen_single_match_nolit_encodeSnappyBlockAsm8B:
	TESTL SI, SI
	JZ    match_nolit_end_encodeSnappyBlockAsm8B

matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B:
	MOVB (DI)(R9*1), R8
	CMPB (BP)(R9*1), R8
	JNE  match_nolit_end_encodeSnappyBlockAsm8B
	LEAL 1(R9), R9
	DECL SI
	JNZ  matchlen_single_loopback_match_nolit_encodeSnappyBlockAsm8B

match_nolit_end_encodeSnappyBlockAsm8B:
	ADDL R9, CX
	MOVL 16(SP), BP
	ADDL $0x04, R9
	CMPL BP, $0x00010000
	JL   two_byte_offset_match_nolit_encodeSnappyBlockAsm8B

four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B:
	CMPL R9, $0x40
	JLE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B
	MOVB $0xff, (AX)
	MOVL BP, 1(AX)
	LEAL -64(R9), R9
	ADDQ $0x05, AX
	CMPL R9, $0x04
	JL   four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B
	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B

four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B:
	TESTL R9, R9
	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
	MOVB  $0x03, BL
	LEAL  -4(BX)(R9*4), R9
	MOVB  R9, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, AX
	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm8B

two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
	CMPL R9, $0x40
	JLE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
	MOVB $0xee, (AX)
	MOVW BP, 1(AX)
	LEAL -60(R9), R9
	ADDQ $0x03, AX
	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm8B

two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
	CMPL R9, $0x0c
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
	CMPL BP, $0x00000800
	JGE  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
	MOVB $0x01, BL
	LEAL -16(BX)(R9*4), R9
	MOVB BP, 1(AX)
	SHRL $0x08, BP
	SHLL $0x05, BP
	ORL  BP, R9
	MOVB R9, (AX)
	ADDQ $0x02, AX
	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm8B

emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
	MOVB $0x02, BL
	LEAL -4(BX)(R9*4), R9
	MOVB R9, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, AX

match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
	MOVL CX, 12(SP)
	CMPL CX, 8(SP)
	JGE  emit_remainder_encodeSnappyBlockAsm8B
	CMPQ AX, (SP)
	JL   match_nolit_dst_ok_encodeSnappyBlockAsm8B
	MOVQ $0x00000000, ret+48(FP)
	RET

match_nolit_dst_ok_encodeSnappyBlockAsm8B:
	MOVQ  -2(DX)(CX*1), SI
	MOVQ  $0x9e3779b1, BP
	MOVQ  SI, DI
	SHRQ  $0x10, SI
	MOVQ  SI, R8
	SHLQ  $0x20, DI
	IMULQ BP, DI
	SHRQ  $0x38, DI
	SHLQ  $0x20, R8
	IMULQ BP, R8
	SHRQ  $0x38, R8
	LEAL  -2(CX), R9
	MOVL  24(SP)(R8*4), BP
	MOVL  R9, 24(SP)(DI*4)
	MOVL  CX, 24(SP)(R8*4)
	CMPL  (DX)(BP*1), SI
	JEQ   match_nolit_loop_encodeSnappyBlockAsm8B
	INCL  CX
	JMP   search_loop_encodeSnappyBlockAsm8B

emit_remainder_encodeSnappyBlockAsm8B:
	MOVQ src_len+32(FP), CX
	SUBL 12(SP), CX
	LEAQ 4(AX)(CX*1), CX
	CMPQ CX, (SP)
	JL   emit_remainder_ok_encodeSnappyBlockAsm8B
	MOVQ $0x00000000, ret+48(FP)
	RET

emit_remainder_ok_encodeSnappyBlockAsm8B:
	MOVQ src_len+32(FP), CX
	MOVL 12(SP), BX
	CMPL BX, CX
	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
	MOVL CX, BP
	MOVL CX, 12(SP)
	LEAQ (DX)(BX*1), CX
	SUBL BX, BP
	LEAL -1(BP), DX
	CMPL DX, $0x3c
	JLT  one_byte_emit_remainder_encodeSnappyBlockAsm8B
	CMPL DX, $0x00000100
	JLT  two_bytes_emit_remainder_encodeSnappyBlockAsm8B
	CMPL DX, $0x00010000
	JLT  three_bytes_emit_remainder_encodeSnappyBlockAsm8B
	CMPL DX, $0x01000000
	JLT  four_bytes_emit_remainder_encodeSnappyBlockAsm8B
	MOVB $0xfc, (AX)
	MOVL DX, 1(AX)
	ADDQ $0x05, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B

four_bytes_emit_remainder_encodeSnappyBlockAsm8B:
	MOVL DX, BX
	SHRL $0x10, BX
	MOVB $0xf8, (AX)
	MOVW DX, 1(AX)
	MOVB BL, 3(AX)
	ADDQ $0x04, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B

three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
	MOVB $0xf4, (AX)
	MOVW DX, 1(AX)
	ADDQ $0x03, AX
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B

two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
	MOVB $0xf0, (AX)
	MOVB DL, 1(AX)
	ADDQ $0x02, AX
	CMPL DX, $0x40
	JL   memmove_emit_remainder_encodeSnappyBlockAsm8B
	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B

one_byte_emit_remainder_encodeSnappyBlockAsm8B:
	SHLB $0x02, DL
	MOVB DL, (AX)
	ADDQ $0x01, AX

memmove_emit_remainder_encodeSnappyBlockAsm8B:
	LEAQ (AX)(BP*1), DX
	MOVL BP, BX
	CMPQ BX, $0x03
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2
	JE   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3
	CMPQ BX, $0x08
	JB   emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7
	CMPQ BX, $0x10
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16
	CMPQ BX, $0x20
	JBE  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32
	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(BX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(BX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(BX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(BX*1)
	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(BX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(BX*1)
	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B

emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)

memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
	MOVQ DX, AX
	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B

memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
	LEAQ  (AX)(BP*1), DX
	MOVL  BP, BX
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(BX*1), X2
	MOVOU -16(CX)(BX*1), X3
	MOVQ  BX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back

emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  BX, DI
	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(BX*1)
	MOVOU X3, -16(AX)(BX*1)
	MOVQ  DX, AX

emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
	MOVQ dst_base+0(FP), CX
	SUBQ CX, AX
	MOVQ AX, ret+48(FP)
	RET

// func emitLiteral(dst []byte, lit []byte) int
// Requires: SSE2
TEXT ·emitLiteral(SB), NOSPLIT, $0-56
	MOVQ  lit_len+32(FP), DX
	MOVQ  dst_base+0(FP), AX
	MOVQ  lit_base+24(FP), CX
	TESTQ DX, DX
	JZ    emit_literal_end_standalone_skip
	MOVL  DX, BX
	LEAL  -1(DX), BP
	CMPL  BP, $0x3c
	JLT   one_byte_standalone
	CMPL  BP, $0x00000100
	JLT   two_bytes_standalone
	CMPL  BP, $0x00010000
	JLT   three_bytes_standalone
	CMPL  BP, $0x01000000
	JLT   four_bytes_standalone
	MOVB  $0xfc, (AX)
	MOVL  BP, 1(AX)
	ADDQ  $0x05, BX
	ADDQ  $0x05, AX
	JMP   memmove_long_standalone

four_bytes_standalone:
	MOVL BP, SI
	SHRL $0x10, SI
	MOVB $0xf8, (AX)
	MOVW BP, 1(AX)
	MOVB SI, 3(AX)
	ADDQ $0x04, BX
	ADDQ $0x04, AX
	JMP  memmove_long_standalone

three_bytes_standalone:
	MOVB $0xf4, (AX)
	MOVW BP, 1(AX)
	ADDQ $0x03, BX
	ADDQ $0x03, AX
	JMP  memmove_long_standalone

two_bytes_standalone:
	MOVB $0xf0, (AX)
	MOVB BP, 1(AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	CMPL BP, $0x40
	JL   memmove_standalone
	JMP  memmove_long_standalone

one_byte_standalone:
	SHLB $0x02, BP
	MOVB BP, (AX)
	ADDQ $0x01, BX
	ADDQ $0x01, AX

memmove_standalone:
	CMPQ DX, $0x03
	JB   emit_lit_memmove_standalone_memmove_move_1or2
	JE   emit_lit_memmove_standalone_memmove_move_3
	CMPQ DX, $0x08
	JB   emit_lit_memmove_standalone_memmove_move_4through7
	CMPQ DX, $0x10
	JBE  emit_lit_memmove_standalone_memmove_move_8through16
	CMPQ DX, $0x20
	JBE  emit_lit_memmove_standalone_memmove_move_17through32
	JMP  emit_lit_memmove_standalone_memmove_move_33through64

emit_lit_memmove_standalone_memmove_move_1or2:
	MOVB (CX), BP
	MOVB -1(CX)(DX*1), CL
	MOVB BP, (AX)
	MOVB CL, -1(AX)(DX*1)
	JMP  emit_literal_end_standalone

emit_lit_memmove_standalone_memmove_move_3:
	MOVW (CX), BP
	MOVB 2(CX), CL
	MOVW BP, (AX)
	MOVB CL, 2(AX)
	JMP  emit_literal_end_standalone

emit_lit_memmove_standalone_memmove_move_4through7:
	MOVL (CX), BP
	MOVL -4(CX)(DX*1), CX
	MOVL BP, (AX)
	MOVL CX, -4(AX)(DX*1)
	JMP  emit_literal_end_standalone

emit_lit_memmove_standalone_memmove_move_8through16:
	MOVQ (CX), BP
	MOVQ -8(CX)(DX*1), CX
	MOVQ BP, (AX)
	MOVQ CX, -8(AX)(DX*1)
	JMP  emit_literal_end_standalone

emit_lit_memmove_standalone_memmove_move_17through32:
	MOVOU (CX), X0
	MOVOU -16(CX)(DX*1), X1
	MOVOU X0, (AX)
	MOVOU X1, -16(AX)(DX*1)
	JMP   emit_literal_end_standalone

emit_lit_memmove_standalone_memmove_move_33through64:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(DX*1), X2
	MOVOU -16(CX)(DX*1), X3
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DX*1)
	MOVOU X3, -16(AX)(DX*1)
	JMP   emit_literal_end_standalone
	JMP emit_literal_end_standalone

memmove_long_standalone:
	MOVOU (CX), X0
	MOVOU 16(CX), X1
	MOVOU -32(CX)(DX*1), X2
	MOVOU -16(CX)(DX*1), X3
	MOVQ  DX, SI
	SHRQ  $0x07, SI
	MOVQ  AX, BP
	ANDL  $0x0000001f, BP
	MOVQ  $0x00000040, DI
	SUBQ  BP, DI
	DECQ  SI
	JA    emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
	LEAQ  -32(CX)(DI*1), BP
	LEAQ  -32(AX)(DI*1), R8

emit_lit_memmove_long_standalonelarge_big_loop_back:
	MOVOU (BP), X4
	MOVOU 16(BP), X5
	MOVOU 32(BP), X6
	MOVOU 48(BP), X7
	MOVOU 64(BP), X8
	MOVOU 80(BP), X9
	MOVOU 96(BP), X10
	MOVOU 112(BP), X11
	MOVOA X4, (R8)
	MOVOA X5, 16(R8)
	MOVOA X6, 32(R8)
	MOVOA X7, 48(R8)
	MOVOA X8, 64(R8)
	MOVOA X9, 80(R8)
	MOVOA X10, 96(R8)
	MOVOA X11, 112(R8)
	ADDQ  $0x80, R8
	ADDQ  $0x80, BP
	ADDQ  $0x80, DI
	DECQ  SI
	JNA   emit_lit_memmove_long_standalonelarge_big_loop_back

emit_lit_memmove_long_standalonelarge_forward_sse_loop_32:
	MOVOU -32(CX)(DI*1), X4
	MOVOU -16(CX)(DI*1), X5
	MOVOA X4, -32(AX)(DI*1)
	MOVOA X5, -16(AX)(DI*1)
	ADDQ  $0x20, DI
	CMPQ  DX, DI
	JAE   emit_lit_memmove_long_standalonelarge_forward_sse_loop_32
	MOVOU X0, (AX)
	MOVOU X1, 16(AX)
	MOVOU X2, -32(AX)(DX*1)
	MOVOU X3, -16(AX)(DX*1)
	JMP   emit_literal_end_standalone
	JMP emit_literal_end_standalone

emit_literal_end_standalone_skip:
	XORQ BX, BX

emit_literal_end_standalone:
	MOVQ BX, ret+48(FP)
	RET

// func emitRepeat(dst []byte, offset int, length int) int
TEXT ·emitRepeat(SB), NOSPLIT, $0-48
	XORQ BX, BX
	MOVQ dst_base+0(FP), AX
	MOVQ offset+24(FP), CX
	MOVQ length+32(FP), DX

emit_repeat_again_standalone:
	MOVL DX, BP
	LEAL -4(DX), DX
	CMPL BP, $0x08
	JLE  repeat_two_standalone
	CMPL BP, $0x0c
	JGE  cant_repeat_two_offset_standalone
	CMPL CX, $0x00000800
	JLT  repeat_two_offset_standalone

cant_repeat_two_offset_standalone:
	CMPL DX, $0x00000104
	JLT  repeat_three_standalone
	CMPL DX, $0x00010100
	JLT  repeat_four_standalone
	CMPL DX, $0x0100ffff
	JLT  repeat_five_standalone
	LEAL -16842747(DX), DX
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	ADDQ $0x05, BX
	JMP  emit_repeat_again_standalone

repeat_five_standalone:
	LEAL -65536(DX), DX
	MOVL DX, CX
	MOVW $0x001d, (AX)
	MOVW DX, 2(AX)
	SARL $0x10, CX
	MOVB CL, 4(AX)
	ADDQ $0x05, BX
	ADDQ $0x05, AX
	JMP  gen_emit_repeat_end

repeat_four_standalone:
	LEAL -256(DX), DX
	MOVW $0x0019, (AX)
	MOVW DX, 2(AX)
	ADDQ $0x04, BX
	ADDQ $0x04, AX
	JMP  gen_emit_repeat_end

repeat_three_standalone:
	LEAL -4(DX), DX
	MOVW $0x0015, (AX)
	MOVB DL, 2(AX)
	ADDQ $0x03, BX
	ADDQ $0x03, AX
	JMP  gen_emit_repeat_end

repeat_two_standalone:
	SHLL $0x02, DX
	ORL  $0x01, DX
	MOVW DX, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	JMP  gen_emit_repeat_end

repeat_two_offset_standalone:
	XORQ BP, BP
	LEAL 1(BP)(DX*4), DX
	MOVB CL, 1(AX)
	SARL $0x08, CX
	SHLL $0x05, CX
	ORL  CX, DX
	MOVB DL, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX

gen_emit_repeat_end:
	MOVQ BX, ret+40(FP)
	RET

// func emitCopy(dst []byte, offset int, length int) int
TEXT ·emitCopy(SB), NOSPLIT, $0-48
	XORQ BX, BX
	MOVQ dst_base+0(FP), AX
	MOVQ offset+24(FP), CX
	MOVQ length+32(FP), DX
	CMPL CX, $0x00010000
	JL   two_byte_offset_standalone

four_bytes_loop_back_standalone:
	CMPL DX, $0x40
	JLE  four_bytes_remain_standalone
	MOVB $0xff, (AX)
	MOVL CX, 1(AX)
	LEAL -64(DX), DX
	ADDQ $0x05, BX
	ADDQ $0x05, AX
	CMPL DX, $0x04
	JL   four_bytes_remain_standalone

emit_repeat_again_standalone_emit_copy:
	MOVL DX, BP
	LEAL -4(DX), DX
	CMPL BP, $0x08
	JLE  repeat_two_standalone_emit_copy
	CMPL BP, $0x0c
	JGE  cant_repeat_two_offset_standalone_emit_copy
	CMPL CX, $0x00000800
	JLT  repeat_two_offset_standalone_emit_copy

cant_repeat_two_offset_standalone_emit_copy:
	CMPL DX, $0x00000104
	JLT  repeat_three_standalone_emit_copy
	CMPL DX, $0x00010100
	JLT  repeat_four_standalone_emit_copy
	CMPL DX, $0x0100ffff
	JLT  repeat_five_standalone_emit_copy
	LEAL -16842747(DX), DX
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	ADDQ $0x05, BX
	JMP  emit_repeat_again_standalone_emit_copy

repeat_five_standalone_emit_copy:
	LEAL -65536(DX), DX
	MOVL DX, CX
	MOVW $0x001d, (AX)
	MOVW DX, 2(AX)
	SARL $0x10, CX
	MOVB CL, 4(AX)
	ADDQ $0x05, BX
	ADDQ $0x05, AX
	JMP  gen_emit_copy_end

repeat_four_standalone_emit_copy:
	LEAL -256(DX), DX
	MOVW $0x0019, (AX)
	MOVW DX, 2(AX)
	ADDQ $0x04, BX
	ADDQ $0x04, AX
	JMP  gen_emit_copy_end

repeat_three_standalone_emit_copy:
	LEAL -4(DX), DX
	MOVW $0x0015, (AX)
	MOVB DL, 2(AX)
	ADDQ $0x03, BX
	ADDQ $0x03, AX
	JMP  gen_emit_copy_end

repeat_two_standalone_emit_copy:
	SHLL $0x02, DX
	ORL  $0x01, DX
	MOVW DX, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	JMP  gen_emit_copy_end

repeat_two_offset_standalone_emit_copy:
	XORQ BP, BP
	LEAL 1(BP)(DX*4), DX
	MOVB CL, 1(AX)
	SARL $0x08, CX
	SHLL $0x05, CX
	ORL  CX, DX
	MOVB DL, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	JMP  gen_emit_copy_end
	JMP four_bytes_loop_back_standalone

four_bytes_remain_standalone:
	TESTL DX, DX
	JZ    gen_emit_copy_end
	MOVB  $0x03, BP
	LEAL  -4(BP)(DX*4), DX
	MOVB  DL, (AX)
	MOVL  CX, 1(AX)
	ADDQ  $0x05, BX
	ADDQ  $0x05, AX
	JMP   gen_emit_copy_end

two_byte_offset_standalone:
	CMPL DX, $0x40
	JLE  two_byte_offset_short_standalone
	MOVB $0xee, (AX)
	MOVW CX, 1(AX)
	LEAL -60(DX), DX
	ADDQ $0x03, AX
	ADDQ $0x03, BX

emit_repeat_again_standalone_emit_copy_short:
	MOVL DX, BP
	LEAL -4(DX), DX
	CMPL BP, $0x08
	JLE  repeat_two_standalone_emit_copy_short
	CMPL BP, $0x0c
	JGE  cant_repeat_two_offset_standalone_emit_copy_short
	CMPL CX, $0x00000800
	JLT  repeat_two_offset_standalone_emit_copy_short

cant_repeat_two_offset_standalone_emit_copy_short:
	CMPL DX, $0x00000104
	JLT  repeat_three_standalone_emit_copy_short
	CMPL DX, $0x00010100
	JLT  repeat_four_standalone_emit_copy_short
	CMPL DX, $0x0100ffff
	JLT  repeat_five_standalone_emit_copy_short
	LEAL -16842747(DX), DX
	MOVW $0x001d, (AX)
	MOVW $0xfffb, 2(AX)
	MOVB $0xff, 4(AX)
	ADDQ $0x05, AX
	ADDQ $0x05, BX
	JMP  emit_repeat_again_standalone_emit_copy_short

repeat_five_standalone_emit_copy_short:
	LEAL -65536(DX), DX
	MOVL DX, CX
	MOVW $0x001d, (AX)
	MOVW DX, 2(AX)
	SARL $0x10, CX
	MOVB CL, 4(AX)
	ADDQ $0x05, BX
	ADDQ $0x05, AX
	JMP  gen_emit_copy_end

repeat_four_standalone_emit_copy_short:
	LEAL -256(DX), DX
	MOVW $0x0019, (AX)
	MOVW DX, 2(AX)
	ADDQ $0x04, BX
	ADDQ $0x04, AX
	JMP  gen_emit_copy_end

repeat_three_standalone_emit_copy_short:
	LEAL -4(DX), DX
	MOVW $0x0015, (AX)
	MOVB DL, 2(AX)
	ADDQ $0x03, BX
	ADDQ $0x03, AX
	JMP  gen_emit_copy_end

repeat_two_standalone_emit_copy_short:
	SHLL $0x02, DX
	ORL  $0x01, DX
	MOVW DX, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	JMP  gen_emit_copy_end

repeat_two_offset_standalone_emit_copy_short:
	XORQ BP, BP
	LEAL 1(BP)(DX*4), DX
	MOVB CL, 1(AX)
	SARL $0x08, CX
	SHLL $0x05, CX
	ORL  CX, DX
	MOVB DL, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	JMP  gen_emit_copy_end
	JMP two_byte_offset_standalone

two_byte_offset_short_standalone:
	CMPL DX, $0x0c
	JGE  emit_copy_three_standalone
	CMPL CX, $0x00000800
	JGE  emit_copy_three_standalone
	MOVB $0x01, BP
	LEAL -16(BP)(DX*4), DX
	MOVB CL, 1(AX)
	SHRL $0x08, CX
	SHLL $0x05, CX
	ORL  CX, DX
	MOVB DL, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	JMP  gen_emit_copy_end

emit_copy_three_standalone:
	MOVB $0x02, BP
	LEAL -4(BP)(DX*4), DX
	MOVB DL, (AX)
	MOVW CX, 1(AX)
	ADDQ $0x03, BX
	ADDQ $0x03, AX

gen_emit_copy_end:
	MOVQ BX, ret+40(FP)
	RET

// func emitCopyNoRepeat(dst []byte, offset int, length int) int
TEXT ·emitCopyNoRepeat(SB), NOSPLIT, $0-48
	XORQ BX, BX
	MOVQ dst_base+0(FP), AX
	MOVQ offset+24(FP), CX
	MOVQ length+32(FP), DX
	CMPL CX, $0x00010000
	JL   two_byte_offset_standalone_snappy

four_bytes_loop_back_standalone_snappy:
	CMPL DX, $0x40
	JLE  four_bytes_remain_standalone_snappy
	MOVB $0xff, (AX)
	MOVL CX, 1(AX)
	LEAL -64(DX), DX
	ADDQ $0x05, BX
	ADDQ $0x05, AX
	CMPL DX, $0x04
	JL   four_bytes_remain_standalone_snappy
	JMP  four_bytes_loop_back_standalone_snappy

four_bytes_remain_standalone_snappy:
	TESTL DX, DX
	JZ    gen_emit_copy_end_snappy
	MOVB  $0x03, BP
	LEAL  -4(BP)(DX*4), DX
	MOVB  DL, (AX)
	MOVL  CX, 1(AX)
	ADDQ  $0x05, BX
	ADDQ  $0x05, AX
	JMP   gen_emit_copy_end_snappy

two_byte_offset_standalone_snappy:
	CMPL DX, $0x40
	JLE  two_byte_offset_short_standalone_snappy
	MOVB $0xee, (AX)
	MOVW CX, 1(AX)
	LEAL -60(DX), DX
	ADDQ $0x03, AX
	ADDQ $0x03, BX
	JMP  two_byte_offset_standalone_snappy

two_byte_offset_short_standalone_snappy:
	CMPL DX, $0x0c
	JGE  emit_copy_three_standalone_snappy
	CMPL CX, $0x00000800
	JGE  emit_copy_three_standalone_snappy
	MOVB $0x01, BP
	LEAL -16(BP)(DX*4), DX
	MOVB CL, 1(AX)
	SHRL $0x08, CX
	SHLL $0x05, CX
	ORL  CX, DX
	MOVB DL, (AX)
	ADDQ $0x02, BX
	ADDQ $0x02, AX
	JMP  gen_emit_copy_end_snappy

emit_copy_three_standalone_snappy:
	MOVB $0x02, BP
	LEAL -4(BP)(DX*4), DX
	MOVB DL, (AX)
	MOVW CX, 1(AX)
	ADDQ $0x03, BX
	ADDQ $0x03, AX

gen_emit_copy_end_snappy:
	MOVQ BX, ret+40(FP)
	RET

// func matchLen(a []byte, b []byte) int
TEXT ·matchLen(SB), NOSPLIT, $0-56
	MOVQ a_base+0(FP), AX
	MOVQ b_base+24(FP), CX
	MOVQ a_len+8(FP), DX
	XORL BP, BP
	CMPL DX, $0x08
	JL   matchlen_single_standalone

matchlen_loopback_standalone:
	MOVQ  (AX)(BP*1), BX
	XORQ  (CX)(BP*1), BX
	TESTQ BX, BX
	JZ    matchlen_loop_standalone
	BSFQ  BX, BX
	SARQ  $0x03, BX
	LEAL  (BP)(BX*1), BP
	JMP   gen_match_len_end

matchlen_loop_standalone:
	LEAL -8(DX), DX
	LEAL 8(BP), BP
	CMPL DX, $0x08
	JGE  matchlen_loopback_standalone

matchlen_single_standalone:
	TESTL DX, DX
	JZ    gen_match_len_end

matchlen_single_loopback_standalone:
	MOVB (AX)(BP*1), BL
	CMPB (CX)(BP*1), BL
	JNE  gen_match_len_end
	LEAL 1(BP), BP
	DECL DX
	JNZ  matchlen_single_loopback_standalone

gen_match_len_end:
	MOVQ BP, ret+48(FP)
	RET
