/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.StubPort;
import org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper;
import org.graalvm.compiler.lir.amd64.AMD64LIRInstruction;
import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import org.graalvm.compiler.lir.gen.LIRGeneratorTool;

@StubPort(path="src/hotspot/cpu/x86/stubGenerator_x86_64_ghash.cpp", lineStart=35, lineEnd=535, commit="090cdfc7a2e280c620a0926512fb67f0ce7f3c21", sha1="f76af2a4acc060dc34bd5c0597228a69145fd66c")
public final class AMD64GHASHProcessBlocksOp
extends AMD64LIRInstruction {
    public static final LIRInstructionClass<AMD64GHASHProcessBlocksOp> TYPE = LIRInstructionClass.create(AMD64GHASHProcessBlocksOp.class);
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value stateValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value htblValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value originalDataValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value originalBlocksValue;
    @LIRInstruction.Temp
    protected Value dataValue;
    @LIRInstruction.Temp
    protected Value blocksValue;
    @LIRInstruction.Temp
    protected Value[] temps;
    private static ArrayDataPointerConstant ghashLongSwapMask = AMD64HotSpotHelper.pointerConstant(16, new int[]{185207048, 252579084, 50462976, 117835012});
    private static ArrayDataPointerConstant ghashByteSwapMask = AMD64HotSpotHelper.pointerConstant(16, new int[]{202182159, 134810123, 67438087, 66051});
    private static ArrayDataPointerConstant ghashShuffleMask = AMD64HotSpotHelper.pointerConstant(16, new int[]{0xF0F0F0F, 0xF0F0F0F, 0xF0F0F0F, 0xF0F0F0F});
    private static ArrayDataPointerConstant ghashPolynomial = AMD64HotSpotHelper.pointerConstant(16, new int[]{1, 0, 0, -1040187392});

    public AMD64GHASHProcessBlocksOp(LIRGeneratorTool tool, AllocatableValue stateValue, AllocatableValue htblValue, AllocatableValue originalDataValue, AllocatableValue originalBlocksValue) {
        super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
        this.stateValue = stateValue;
        this.htblValue = htblValue;
        this.originalDataValue = originalDataValue;
        this.originalBlocksValue = originalBlocksValue;
        this.dataValue = tool.newVariable(originalDataValue.getValueKind());
        this.blocksValue = tool.newVariable(originalBlocksValue.getValueKind());
        this.temps = ((AMD64)tool.target().arch).getFeatures().contains(AMD64.CPUFeature.AVX) ? new Value[]{AMD64.rax.asValue(), AMD64.xmm0.asValue(), AMD64.xmm1.asValue(), AMD64.xmm2.asValue(), AMD64.xmm3.asValue(), AMD64.xmm4.asValue(), AMD64.xmm5.asValue(), AMD64.xmm6.asValue(), AMD64.xmm7.asValue(), AMD64.xmm8.asValue(), AMD64.xmm9.asValue(), AMD64.xmm10.asValue(), AMD64.xmm11.asValue(), AMD64.xmm13.asValue(), AMD64.xmm14.asValue(), AMD64.xmm15.asValue()} : new Value[]{AMD64.xmm0.asValue(), AMD64.xmm1.asValue(), AMD64.xmm2.asValue(), AMD64.xmm3.asValue(), AMD64.xmm4.asValue(), AMD64.xmm5.asValue(), AMD64.xmm6.asValue(), AMD64.xmm7.asValue(), AMD64.xmm8.asValue(), AMD64.xmm9.asValue(), AMD64.xmm10.asValue()};
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        GraalError.guarantee(this.stateValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid stateValue kind: %s", (Object)this.stateValue);
        GraalError.guarantee(this.htblValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid htblValue kind: %s", (Object)this.htblValue);
        GraalError.guarantee(this.originalDataValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid originalDataValue kind: %s", (Object)this.originalDataValue);
        GraalError.guarantee(this.originalBlocksValue.getPlatformKind().equals(AMD64Kind.DWORD), "Invalid originalBlocksValue kind: %s", (Object)this.originalBlocksValue);
        if (masm.supports(AMD64.CPUFeature.AVX)) {
            Label labelBeginProcess = new Label();
            Label labelBlock8Reduction = new Label();
            Label labelOneBlkInit = new Label();
            Label labelProcess1Block = new Label();
            Label labelProcess8Blocks = new Label();
            Label labelSaveState = new Label();
            Label labelExitGHASH = new Label();
            Register inputState = ValueUtil.asRegister((Value)this.stateValue);
            Register htbl = ValueUtil.asRegister((Value)this.htblValue);
            Register originalData = ValueUtil.asRegister((Value)this.originalDataValue);
            Register originalBlocks = ValueUtil.asRegister((Value)this.originalBlocksValue);
            Register inputData = ValueUtil.asRegister((Value)this.dataValue);
            Register blocks = ValueUtil.asRegister((Value)this.blocksValue);
            masm.movq(inputData, originalData);
            masm.movl(blocks, originalBlocks);
            Register data = AMD64.xmm1;
            Register state = AMD64.xmm0;
            Register tmp0 = AMD64.xmm3;
            Register tmp1 = AMD64.xmm4;
            Register tmp2 = AMD64.xmm5;
            Register tmp3 = AMD64.xmm6;
            Register bswapMask = AMD64.xmm2;
            Register lswapMask = AMD64.xmm14;
            masm.testlAndJcc(blocks, blocks, AMD64Assembler.ConditionFlag.Zero, labelExitGHASH, false);
            masm.movdqu(tmp2, new AMD64Address(htbl, 16));
            masm.vptest(tmp2, tmp2, AVXKind.AVXSize.XMM);
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelBeginProcess);
            AMD64GHASHProcessBlocksOp.generateHtblOneBlock(crb, masm, htbl);
            masm.bind(labelBeginProcess);
            masm.movdqu(lswapMask, AMD64HotSpotHelper.recordExternalAddress(crb, ghashLongSwapMask));
            masm.movdqu(state, new AMD64Address(inputState));
            masm.vpshufb(state, state, lswapMask, AVXKind.AVXSize.XMM);
            masm.cmplAndJcc(blocks, 8, AMD64Assembler.ConditionFlag.Below, labelOneBlkInit, false);
            masm.movdqu(tmp2, new AMD64Address(htbl, 128));
            masm.vptest(tmp2, tmp2, AVXKind.AVXSize.XMM);
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelProcess8Blocks);
            AMD64GHASHProcessBlocksOp.generateHtblEightBlocks(masm, htbl);
            masm.bind(labelProcess8Blocks);
            masm.subl(blocks, 8);
            masm.movdqu(bswapMask, AMD64HotSpotHelper.recordExternalAddress(crb, ghashByteSwapMask));
            masm.movdqu(data, new AMD64Address(inputData, 112));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            masm.movdqu(AMD64.xmm15, new AMD64Address(htbl, 16));
            masm.vpclmulhqlqdq(tmp2, data, AMD64.xmm15);
            masm.vpclmullqlqdq(tmp0, data, AMD64.xmm15);
            masm.vpclmulhqhqdq(tmp1, data, AMD64.xmm15);
            masm.vpclmullqhqdq(tmp3, data, AMD64.xmm15);
            masm.vpxor(tmp2, tmp2, tmp3, AVXKind.AVXSize.XMM);
            masm.movdqu(data, new AMD64Address(inputData, 96));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.schoolbookAAD(masm, 2, htbl, data, tmp0, tmp1, tmp2, tmp3);
            masm.movdqu(data, new AMD64Address(inputData, 80));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.schoolbookAAD(masm, 3, htbl, data, tmp0, tmp1, tmp2, tmp3);
            masm.movdqu(data, new AMD64Address(inputData, 64));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.schoolbookAAD(masm, 4, htbl, data, tmp0, tmp1, tmp2, tmp3);
            masm.movdqu(data, new AMD64Address(inputData, 48));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.schoolbookAAD(masm, 5, htbl, data, tmp0, tmp1, tmp2, tmp3);
            masm.movdqu(data, new AMD64Address(inputData, 32));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.schoolbookAAD(masm, 6, htbl, data, tmp0, tmp1, tmp2, tmp3);
            masm.movdqu(data, new AMD64Address(inputData, 16));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.schoolbookAAD(masm, 7, htbl, data, tmp0, tmp1, tmp2, tmp3);
            masm.movdqu(data, new AMD64Address(inputData, 0));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            masm.vpxor(data, data, state, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.schoolbookAAD(masm, 8, htbl, data, tmp0, tmp1, tmp2, tmp3);
            masm.vpslldq(tmp3, tmp2, 8, AVXKind.AVXSize.XMM);
            masm.vpsrldq(tmp2, tmp2, 8, AVXKind.AVXSize.XMM);
            masm.vpxor(tmp0, tmp0, tmp3, AVXKind.AVXSize.XMM);
            masm.vpxor(tmp1, tmp1, tmp2, AVXKind.AVXSize.XMM);
            masm.bind(labelBlock8Reduction);
            masm.vpslld(AMD64.xmm8, tmp0, 31, AVXKind.AVXSize.XMM);
            masm.vpslld(AMD64.xmm9, tmp0, 30, AVXKind.AVXSize.XMM);
            masm.vpslld(AMD64.xmm10, tmp0, 25, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm10, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm9, AVXKind.AVXSize.XMM);
            masm.vpslldq(AMD64.xmm9, AMD64.xmm8, 12, AVXKind.AVXSize.XMM);
            masm.vpsrldq(AMD64.xmm8, AMD64.xmm8, 4, AVXKind.AVXSize.XMM);
            masm.vpxor(tmp0, tmp0, AMD64.xmm9, AVXKind.AVXSize.XMM);
            masm.vpsrld(AMD64.xmm9, tmp0, 1, AVXKind.AVXSize.XMM);
            masm.vpsrld(AMD64.xmm10, tmp0, 2, AVXKind.AVXSize.XMM);
            masm.vpsrld(tmp2, tmp0, 7, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm9, AMD64.xmm9, AMD64.xmm10, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm9, AMD64.xmm9, tmp2, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm9, AMD64.xmm9, AMD64.xmm8, AVXKind.AVXSize.XMM);
            masm.vpxor(tmp0, AMD64.xmm9, tmp0, AVXKind.AVXSize.XMM);
            masm.vpxor(state, tmp0, tmp1, AVXKind.AVXSize.XMM);
            masm.leaq(inputData, new AMD64Address(inputData, 128));
            masm.cmplAndJcc(blocks, 8, AMD64Assembler.ConditionFlag.Below, labelOneBlkInit, false);
            masm.jmp(labelProcess8Blocks);
            masm.bind(labelOneBlkInit);
            masm.movdqu(tmp0, new AMD64Address(htbl, 16));
            masm.movdqu(bswapMask, AMD64HotSpotHelper.recordExternalAddress(crb, ghashByteSwapMask));
            masm.bind(labelProcess1Block);
            masm.cmplAndJcc(blocks, 0, AMD64Assembler.ConditionFlag.Equal, labelSaveState, false);
            masm.subl(blocks, 1);
            masm.movdqu(data, new AMD64Address(inputData));
            masm.vpshufb(data, data, bswapMask, AVXKind.AVXSize.XMM);
            masm.vpxor(state, state, data, AVXKind.AVXSize.XMM);
            AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, state);
            masm.addq(inputData, 16);
            masm.jmp(labelProcess1Block);
            masm.bind(labelSaveState);
            masm.vpshufb(state, state, lswapMask, AVXKind.AVXSize.XMM);
            masm.movdqu(new AMD64Address(inputState), state);
            masm.bind(labelExitGHASH);
            masm.vpxor(AMD64.xmm0, AMD64.xmm0, AMD64.xmm0, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm1, AMD64.xmm1, AMD64.xmm1, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm3, AMD64.xmm3, AMD64.xmm3, AVXKind.AVXSize.XMM);
            masm.vpxor(AMD64.xmm15, AMD64.xmm15, AMD64.xmm15, AVXKind.AVXSize.XMM);
        } else {
            Label labelGHASHLoop = new Label();
            Label labelExit = new Label();
            Register state = ValueUtil.asRegister((Value)this.stateValue);
            Register subkeyH = ValueUtil.asRegister((Value)this.htblValue);
            Register originalData = ValueUtil.asRegister((Value)this.originalDataValue);
            Register originalBlocks = ValueUtil.asRegister((Value)this.originalBlocksValue);
            Register data = ValueUtil.asRegister((Value)this.dataValue);
            Register blocks = ValueUtil.asRegister((Value)this.blocksValue);
            masm.movq(data, originalData);
            masm.movl(blocks, originalBlocks);
            Register xmmTemp0 = AMD64.xmm0;
            Register xmmTemp1 = AMD64.xmm1;
            Register xmmTemp2 = AMD64.xmm2;
            Register xmmTemp3 = AMD64.xmm3;
            Register xmmTemp4 = AMD64.xmm4;
            Register xmmTemp5 = AMD64.xmm5;
            Register xmmTemp6 = AMD64.xmm6;
            Register xmmTemp7 = AMD64.xmm7;
            Register xmmTemp8 = AMD64.xmm8;
            Register xmmTemp9 = AMD64.xmm9;
            Register xmmTemp10 = AMD64.xmm10;
            masm.movdqu(xmmTemp10, AMD64HotSpotHelper.recordExternalAddress(crb, ghashLongSwapMask));
            masm.movdqu(xmmTemp0, new AMD64Address(state));
            masm.pshufb(xmmTemp0, xmmTemp10);
            masm.bind(labelGHASHLoop);
            masm.movdqu(xmmTemp2, new AMD64Address(data));
            masm.pshufb(xmmTemp2, AMD64HotSpotHelper.recordExternalAddress(crb, ghashByteSwapMask));
            masm.movdqu(xmmTemp1, new AMD64Address(subkeyH));
            masm.pshufb(xmmTemp1, xmmTemp10);
            masm.pxor(xmmTemp0, xmmTemp2);
            masm.movdqu(xmmTemp3, xmmTemp0);
            masm.pclmulqdq(xmmTemp3, xmmTemp1, 0);
            masm.movdqu(xmmTemp4, xmmTemp0);
            masm.pclmulqdq(xmmTemp4, xmmTemp1, 16);
            masm.movdqu(xmmTemp5, xmmTemp0);
            masm.pclmulqdq(xmmTemp5, xmmTemp1, 1);
            masm.movdqu(xmmTemp6, xmmTemp0);
            masm.pclmulqdq(xmmTemp6, xmmTemp1, 17);
            masm.pxor(xmmTemp4, xmmTemp5);
            masm.movdqu(xmmTemp5, xmmTemp4);
            masm.psrldq(xmmTemp4, 8);
            masm.pslldq(xmmTemp5, 8);
            masm.pxor(xmmTemp3, xmmTemp5);
            masm.pxor(xmmTemp6, xmmTemp4);
            masm.movdqu(xmmTemp7, xmmTemp3);
            masm.movdqu(xmmTemp8, xmmTemp6);
            masm.pslld(xmmTemp3, 1);
            masm.pslld(xmmTemp6, 1);
            masm.psrld(xmmTemp7, 31);
            masm.psrld(xmmTemp8, 31);
            masm.movdqu(xmmTemp9, xmmTemp7);
            masm.pslldq(xmmTemp8, 4);
            masm.pslldq(xmmTemp7, 4);
            masm.psrldq(xmmTemp9, 12);
            masm.por(xmmTemp3, xmmTemp7);
            masm.por(xmmTemp6, xmmTemp8);
            masm.por(xmmTemp6, xmmTemp9);
            masm.movdqu(xmmTemp7, xmmTemp3);
            masm.movdqu(xmmTemp8, xmmTemp3);
            masm.movdqu(xmmTemp9, xmmTemp3);
            masm.pslld(xmmTemp7, 31);
            masm.pslld(xmmTemp8, 30);
            masm.pslld(xmmTemp9, 25);
            masm.pxor(xmmTemp7, xmmTemp8);
            masm.pxor(xmmTemp7, xmmTemp9);
            masm.movdqu(xmmTemp8, xmmTemp7);
            masm.pslldq(xmmTemp7, 12);
            masm.psrldq(xmmTemp8, 4);
            masm.pxor(xmmTemp3, xmmTemp7);
            masm.movdqu(xmmTemp2, xmmTemp3);
            masm.movdqu(xmmTemp4, xmmTemp3);
            masm.movdqu(xmmTemp5, xmmTemp3);
            masm.psrld(xmmTemp2, 1);
            masm.psrld(xmmTemp4, 2);
            masm.psrld(xmmTemp5, 7);
            masm.pxor(xmmTemp2, xmmTemp4);
            masm.pxor(xmmTemp2, xmmTemp5);
            masm.pxor(xmmTemp2, xmmTemp8);
            masm.pxor(xmmTemp3, xmmTemp2);
            masm.pxor(xmmTemp6, xmmTemp3);
            masm.declAndJcc(blocks, AMD64Assembler.ConditionFlag.Zero, labelExit, false);
            masm.movdqu(xmmTemp0, xmmTemp6);
            masm.addq(data, 16);
            masm.jmp(labelGHASHLoop);
            masm.bind(labelExit);
            masm.pshufb(xmmTemp6, xmmTemp10);
            masm.movdqu(new AMD64Address(state), xmmTemp6);
        }
    }

    private static void schoolbookAAD(AMD64MacroAssembler masm, int i, Register htbl, Register data, Register tmp0, Register tmp1, Register tmp2, Register tmp3) {
        masm.movdqu(AMD64.xmm15, new AMD64Address(htbl, i * 16));
        masm.vpclmulhqlqdq(tmp3, data, AMD64.xmm15);
        masm.vpxor(tmp2, tmp2, tmp3, AVXKind.AVXSize.XMM);
        masm.vpclmullqlqdq(tmp3, data, AMD64.xmm15);
        masm.vpxor(tmp0, tmp0, tmp3, AVXKind.AVXSize.XMM);
        masm.vpclmulhqhqdq(tmp3, data, AMD64.xmm15);
        masm.vpxor(tmp1, tmp1, tmp3, AVXKind.AVXSize.XMM);
        masm.vpclmullqhqdq(tmp3, data, AMD64.xmm15);
        masm.vpxor(tmp2, tmp2, tmp3, AVXKind.AVXSize.XMM);
    }

    private static void gfmul(AMD64MacroAssembler masm, Register tmp0, Register state) {
        Register tmp1 = AMD64.xmm4;
        Register tmp2 = AMD64.xmm5;
        Register tmp3 = AMD64.xmm6;
        Register tmp4 = AMD64.xmm7;
        masm.vpclmullqlqdq(tmp1, state, tmp0);
        masm.vpclmulhqhqdq(tmp4, state, tmp0);
        masm.vpclmullqhqdq(tmp2, state, tmp0);
        masm.vpclmulhqlqdq(tmp3, state, tmp0);
        masm.vpxor(tmp2, tmp2, tmp3, AVXKind.AVXSize.XMM);
        masm.vpslldq(tmp3, tmp2, 8, AVXKind.AVXSize.XMM);
        masm.vpsrldq(tmp2, tmp2, 8, AVXKind.AVXSize.XMM);
        masm.vpxor(tmp1, tmp1, tmp3, AVXKind.AVXSize.XMM);
        masm.vpxor(tmp4, tmp4, tmp2, AVXKind.AVXSize.XMM);
        masm.vpslld(AMD64.xmm8, tmp1, 31, AVXKind.AVXSize.XMM);
        masm.vpslld(AMD64.xmm9, tmp1, 30, AVXKind.AVXSize.XMM);
        masm.vpslld(AMD64.xmm10, tmp1, 25, AVXKind.AVXSize.XMM);
        masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm9, AVXKind.AVXSize.XMM);
        masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm10, AVXKind.AVXSize.XMM);
        masm.vpslldq(AMD64.xmm9, AMD64.xmm8, 12, AVXKind.AVXSize.XMM);
        masm.vpsrldq(AMD64.xmm8, AMD64.xmm8, 4, AVXKind.AVXSize.XMM);
        masm.vpxor(tmp1, tmp1, AMD64.xmm9, AVXKind.AVXSize.XMM);
        masm.vpsrld(AMD64.xmm9, tmp1, 1, AVXKind.AVXSize.XMM);
        masm.vpsrld(AMD64.xmm10, tmp1, 2, AVXKind.AVXSize.XMM);
        masm.vpsrld(AMD64.xmm11, tmp1, 7, AVXKind.AVXSize.XMM);
        masm.vpxor(AMD64.xmm9, AMD64.xmm9, AMD64.xmm10, AVXKind.AVXSize.XMM);
        masm.vpxor(AMD64.xmm9, AMD64.xmm9, AMD64.xmm11, AVXKind.AVXSize.XMM);
        masm.vpxor(AMD64.xmm9, AMD64.xmm9, AMD64.xmm8, AVXKind.AVXSize.XMM);
        masm.vpxor(tmp1, tmp1, AMD64.xmm9, AVXKind.AVXSize.XMM);
        masm.vpxor(state, tmp4, tmp1, AVXKind.AVXSize.XMM);
    }

    private static void generateHtblOneBlock(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register htbl) {
        Register t = AMD64.xmm13;
        masm.movdqu(t, new AMD64Address(htbl));
        masm.movdqu(AMD64.xmm10, AMD64HotSpotHelper.recordExternalAddress(crb, ghashLongSwapMask));
        masm.vpshufb(t, t, AMD64.xmm10, AVXKind.AVXSize.XMM);
        masm.vpsrld(AMD64.xmm3, t, 7, AVXKind.AVXSize.XMM);
        masm.movdqu(AMD64.xmm4, AMD64HotSpotHelper.recordExternalAddress(crb, ghashShuffleMask));
        masm.vpshufb(AMD64.xmm3, AMD64.xmm3, AMD64.xmm4, AVXKind.AVXSize.XMM);
        masm.movl(AMD64.rax, 65280);
        masm.movdl(AMD64.xmm4, AMD64.rax);
        masm.vpshufb(AMD64.xmm4, AMD64.xmm4, AMD64.xmm3, AVXKind.AVXSize.XMM);
        masm.movdqu(AMD64.xmm5, AMD64HotSpotHelper.recordExternalAddress(crb, ghashPolynomial));
        masm.vpand(AMD64.xmm5, AMD64.xmm5, AMD64.xmm4, AVXKind.AVXSize.XMM);
        masm.vpsrld(AMD64.xmm3, t, 31, AVXKind.AVXSize.XMM);
        masm.vpslld(AMD64.xmm4, t, 1, AVXKind.AVXSize.XMM);
        masm.vpslldq(AMD64.xmm3, AMD64.xmm3, 4, AVXKind.AVXSize.XMM);
        masm.vpxor(t, AMD64.xmm4, AMD64.xmm3, AVXKind.AVXSize.XMM);
        masm.vpxor(t, t, AMD64.xmm5, AVXKind.AVXSize.XMM);
        masm.movdqu(new AMD64Address(htbl, 16), t);
    }

    private static void generateHtblEightBlocks(AMD64MacroAssembler masm, Register htbl) {
        Register t = AMD64.xmm13;
        Register tmp0 = AMD64.xmm1;
        masm.movdqu(t, new AMD64Address(htbl, 16));
        masm.movdqu(tmp0, t);
        AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, t);
        masm.movdqu(new AMD64Address(htbl, 32), t);
        AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, t);
        masm.movdqu(new AMD64Address(htbl, 48), t);
        AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, t);
        masm.movdqu(new AMD64Address(htbl, 64), t);
        AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, t);
        masm.movdqu(new AMD64Address(htbl, 80), t);
        AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, t);
        masm.movdqu(new AMD64Address(htbl, 96), t);
        AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, t);
        masm.movdqu(new AMD64Address(htbl, 112), t);
        AMD64GHASHProcessBlocksOp.gfmul(masm, tmp0, t);
        masm.movdqu(new AMD64Address(htbl, 128), t);
    }
}

