/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.lir.amd64;

import java.util.function.BiConsumer;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.lir.LIRInstruction;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.StubPort;
import org.graalvm.compiler.lir.amd64.AMD64AESEncryptOp;
import org.graalvm.compiler.lir.amd64.AMD64HotSpotHelper;
import org.graalvm.compiler.lir.amd64.AMD64LIRInstruction;
import org.graalvm.compiler.lir.asm.ArrayDataPointerConstant;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;

@StubPort(path="src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp", lineStart=323, lineEnd=630, commit="090cdfc7a2e280c620a0926512fb67f0ce7f3c21", sha1="15d222b1d71c2bf1284277ca93b3c3e5c3dc6f05")
public final class AMD64CounterModeAESCryptOp
extends AMD64LIRInstruction {
    public static final LIRInstructionClass<AMD64CounterModeAESCryptOp> TYPE = LIRInstructionClass.create(AMD64CounterModeAESCryptOp.class);
    private final int lengthOffset;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value inValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value outValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value keyValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value counterValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value lenValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value encryptedCounterValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value usedPtrValue;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    protected Value resultValue;
    @LIRInstruction.Temp
    protected Value[] temps;
    private static final int PARALLEL_FACTOR = 6;
    private static ArrayDataPointerConstant counterShuffleMask = AMD64HotSpotHelper.pointerConstant(16, new int[]{202182159, 134810123, 67438087, 66051});

    public AMD64CounterModeAESCryptOp(AllocatableValue inValue, AllocatableValue outValue, AllocatableValue keyValue, AllocatableValue counterValue, AllocatableValue lenValue, AllocatableValue encryptedCounterValue, AllocatableValue usedPtrValue, AllocatableValue resultValue, int lengthOffset) {
        super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
        this.inValue = inValue;
        this.outValue = outValue;
        this.keyValue = keyValue;
        this.counterValue = counterValue;
        this.lenValue = lenValue;
        this.encryptedCounterValue = encryptedCounterValue;
        this.usedPtrValue = usedPtrValue;
        this.resultValue = resultValue;
        this.lengthOffset = lengthOffset;
        this.temps = new Value[]{AMD64.r11.asValue(), AMD64.rax.asValue(), AMD64.rbx.asValue(), AMD64.xmm0.asValue(), AMD64.xmm1.asValue(), AMD64.xmm2.asValue(), AMD64.xmm3.asValue(), AMD64.xmm4.asValue(), AMD64.xmm5.asValue(), AMD64.xmm6.asValue(), AMD64.xmm7.asValue(), AMD64.xmm8.asValue(), AMD64.xmm9.asValue(), AMD64.xmm10.asValue(), AMD64.xmm11.asValue(), AMD64.xmm12.asValue(), AMD64.xmm13.asValue(), AMD64.xmm14.asValue()};
    }

    static Label[] newLabels(int len) {
        Label[] labels = new Label[len];
        for (int i = 0; i < len; ++i) {
            labels[i] = new Label();
        }
        return labels;
    }

    private static Label[][] newLabels(int lenDimension1, int lenDimension2) {
        Label[][] labels = new Label[lenDimension1][lenDimension2];
        for (int i = 0; i < lenDimension1; ++i) {
            labels[i] = new Label[lenDimension2];
            for (int j = 0; j < lenDimension2; ++j) {
                labels[i][j] = new Label();
            }
        }
        return labels;
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        GraalError.guarantee(this.inValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid inValue kind: %s", (Object)this.inValue);
        GraalError.guarantee(this.outValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid outValue kind: %s", (Object)this.outValue);
        GraalError.guarantee(this.keyValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid keyValue kind: %s", (Object)this.keyValue);
        GraalError.guarantee(this.counterValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid counterValue kind: %s", (Object)this.counterValue);
        GraalError.guarantee(this.lenValue.getPlatformKind().equals(AMD64Kind.DWORD), "Invalid lenValue kind: %s", (Object)this.lenValue);
        GraalError.guarantee(this.encryptedCounterValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid encryptedCounterValue kind: %s", (Object)this.encryptedCounterValue);
        GraalError.guarantee(this.usedPtrValue.getPlatformKind().equals(AMD64Kind.QWORD), "Invalid usedPtrValue kind: %s", (Object)this.usedPtrValue);
        GraalError.guarantee(this.resultValue.getPlatformKind().equals(AMD64Kind.DWORD), "Invalid resultValue kind: %s", (Object)this.resultValue);
        Register from = ValueUtil.asRegister((Value)this.inValue);
        Register to = ValueUtil.asRegister((Value)this.outValue);
        Register key = ValueUtil.asRegister((Value)this.keyValue);
        Register counter = ValueUtil.asRegister((Value)this.counterValue);
        Register lenReg = ValueUtil.asRegister((Value)this.resultValue);
        Register savedEncCounterStart = ValueUtil.asRegister((Value)this.encryptedCounterValue);
        Register usedAddr = ValueUtil.asRegister((Value)this.usedPtrValue);
        Register used = AMD64.r11;
        Register pos = AMD64.rax;
        Register xmmCounterShufMask = AMD64.xmm0;
        Register xmmKeyShufMask = AMD64.xmm1;
        Register xmmCurrCounter = AMD64.xmm2;
        Register xmmKeyTmp0 = AMD64.xmm3;
        Register xmmKeyTmp1 = AMD64.xmm4;
        Register xmmResult0 = AMD64.xmm5;
        Register xmmResult1 = AMD64.xmm6;
        Register xmmResult2 = AMD64.xmm7;
        Register xmmResult3 = AMD64.xmm8;
        Register xmmResult4 = AMD64.xmm9;
        Register xmmResult5 = AMD64.xmm10;
        Register xmmFrom0 = AMD64.xmm11;
        Register xmmFrom1 = AMD64.xmm12;
        Register xmmFrom2 = AMD64.xmm13;
        Register xmmFrom3 = AMD64.xmm14;
        Register xmmFrom4 = AMD64.xmm3;
        Register xmmFrom5 = AMD64.xmm4;
        int[] rounds = new int[]{10, 12, 14};
        Label labelExitPreLoop = new Label();
        Label labelPreLoopStart = new Label();
        Label[] labelMultiBlockLoopTop = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelSingleBlockLoopTop = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[][] labelIncCounter = AMD64CounterModeAESCryptOp.newLabels(3, 6);
        Label[] labelIncCounterSingle = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTailInsr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTail4Insr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTail2Insr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTail1Insr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTailExitInsr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTail4Extr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTail2Extr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTail1Extr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label[] labelProcessTailExitExtr = AMD64CounterModeAESCryptOp.newLabels(3);
        Label labelExit = new Label();
        masm.movl(lenReg, ValueUtil.asRegister((Value)this.lenValue));
        masm.movl(used, new AMD64Address(usedAddr));
        masm.movdqu(xmmCurrCounter, new AMD64Address(counter));
        masm.movdqu(xmmCounterShufMask, AMD64HotSpotHelper.recordExternalAddress(crb, counterShuffleMask));
        masm.vpshufb(xmmCurrCounter, xmmCurrCounter, xmmCounterShufMask, AVXKind.AVXSize.XMM);
        masm.movq(pos, 0L);
        masm.bind(labelPreLoopStart);
        masm.cmplAndJcc(used, 16, AMD64Assembler.ConditionFlag.AboveEqual, labelExitPreLoop, false);
        masm.cmplAndJcc(lenReg, 0, AMD64Assembler.ConditionFlag.LessEqual, labelExitPreLoop, false);
        masm.movb(AMD64.rbx, new AMD64Address(savedEncCounterStart, used, Stride.S1));
        masm.xorb(AMD64.rbx, new AMD64Address(from, pos, Stride.S1));
        masm.movb(new AMD64Address(to, pos, Stride.S1), AMD64.rbx);
        masm.addq(pos, 1);
        masm.addl(used, 1);
        masm.subl(lenReg, 1);
        masm.jmp(labelPreLoopStart);
        masm.bind(labelExitPreLoop);
        masm.movl(new AMD64Address(usedAddr), used);
        masm.movdqu(xmmKeyShufMask, AMD64HotSpotHelper.recordExternalAddress(crb, AMD64AESEncryptOp.keyShuffleMask));
        masm.movl(AMD64.rbx, new AMD64Address(key, this.lengthOffset));
        masm.cmplAndJcc(AMD64.rbx, 52, AMD64Assembler.ConditionFlag.Equal, labelMultiBlockLoopTop[1], false);
        masm.cmplAndJcc(AMD64.rbx, 60, AMD64Assembler.ConditionFlag.Equal, labelMultiBlockLoopTop[2], false);
        for (int k = 0; k < 3; ++k) {
            int i;
            masm.align(this.preferredLoopAlignment(crb));
            masm.bind(labelMultiBlockLoopTop[k]);
            masm.cmplAndJcc(lenReg, 96, AMD64Assembler.ConditionFlag.LessEqual, labelSingleBlockLoopTop[k], false);
            AMD64AESEncryptOp.loadKey(masm, xmmKeyTmp0, key, 0, xmmKeyShufMask);
            AMD64CounterModeAESCryptOp.applyCTRDoSix(masm::movdqa, xmmCurrCounter);
            AMD64CounterModeAESCryptOp.incCounter(masm, AMD64.rbx, xmmResult1, 1, labelIncCounter[k][0]);
            AMD64CounterModeAESCryptOp.incCounter(masm, AMD64.rbx, xmmResult2, 2, labelIncCounter[k][1]);
            AMD64CounterModeAESCryptOp.incCounter(masm, AMD64.rbx, xmmResult3, 3, labelIncCounter[k][2]);
            AMD64CounterModeAESCryptOp.incCounter(masm, AMD64.rbx, xmmResult4, 4, labelIncCounter[k][3]);
            AMD64CounterModeAESCryptOp.incCounter(masm, AMD64.rbx, xmmResult5, 5, labelIncCounter[k][4]);
            AMD64CounterModeAESCryptOp.incCounter(masm, AMD64.rbx, xmmCurrCounter, 6, labelIncCounter[k][5]);
            AMD64CounterModeAESCryptOp.applyCTRDoSix((dst, src) -> masm.vpshufb((Register)dst, (Register)dst, (Register)src, AVXKind.AVXSize.XMM), xmmCounterShufMask);
            AMD64CounterModeAESCryptOp.applyCTRDoSix(masm::pxor, xmmKeyTmp0);
            for (i = 1; i < rounds[k]; ++i) {
                AMD64AESEncryptOp.loadKey(masm, xmmKeyTmp1, key, i * 16, xmmKeyShufMask);
                AMD64AESEncryptOp.loadKey(masm, xmmKeyTmp0, key, (i + 1) * 16, xmmKeyShufMask);
                AMD64CounterModeAESCryptOp.applyCTRDoSix(masm::aesenc, xmmKeyTmp1);
                if (++i != rounds[k]) {
                    AMD64CounterModeAESCryptOp.applyCTRDoSix(masm::aesenc, xmmKeyTmp0);
                    continue;
                }
                AMD64CounterModeAESCryptOp.applyCTRDoSix(masm::aesenclast, xmmKeyTmp0);
            }
            masm.movdqu(xmmFrom0, new AMD64Address(from, pos, Stride.S1, 0));
            masm.movdqu(xmmFrom1, new AMD64Address(from, pos, Stride.S1, 16));
            masm.movdqu(xmmFrom2, new AMD64Address(from, pos, Stride.S1, 32));
            masm.movdqu(xmmFrom3, new AMD64Address(from, pos, Stride.S1, 48));
            masm.movdqu(xmmFrom4, new AMD64Address(from, pos, Stride.S1, 64));
            masm.movdqu(xmmFrom5, new AMD64Address(from, pos, Stride.S1, 80));
            masm.pxor(xmmResult0, xmmFrom0);
            masm.pxor(xmmResult1, xmmFrom1);
            masm.pxor(xmmResult2, xmmFrom2);
            masm.pxor(xmmResult3, xmmFrom3);
            masm.pxor(xmmResult4, xmmFrom4);
            masm.pxor(xmmResult5, xmmFrom5);
            masm.movdqu(new AMD64Address(to, pos, Stride.S1, 0), xmmResult0);
            masm.movdqu(new AMD64Address(to, pos, Stride.S1, 16), xmmResult1);
            masm.movdqu(new AMD64Address(to, pos, Stride.S1, 32), xmmResult2);
            masm.movdqu(new AMD64Address(to, pos, Stride.S1, 48), xmmResult3);
            masm.movdqu(new AMD64Address(to, pos, Stride.S1, 64), xmmResult4);
            masm.movdqu(new AMD64Address(to, pos, Stride.S1, 80), xmmResult5);
            masm.addq(pos, 96);
            masm.subl(lenReg, 96);
            masm.jmp(labelMultiBlockLoopTop[k]);
            masm.align(this.preferredLoopAlignment(crb));
            masm.bind(labelSingleBlockLoopTop[k]);
            masm.cmplAndJcc(lenReg, 0, AMD64Assembler.ConditionFlag.LessEqual, labelExit, false);
            AMD64AESEncryptOp.loadKey(masm, xmmKeyTmp0, key, 0, xmmKeyShufMask);
            masm.movdqa(xmmResult0, xmmCurrCounter);
            AMD64CounterModeAESCryptOp.incCounter(masm, AMD64.rbx, xmmCurrCounter, 1, labelIncCounterSingle[k]);
            masm.vpshufb(xmmResult0, xmmResult0, xmmCounterShufMask, AVXKind.AVXSize.XMM);
            masm.pxor(xmmResult0, xmmKeyTmp0);
            for (i = 1; i < rounds[k]; ++i) {
                AMD64AESEncryptOp.loadKey(masm, xmmKeyTmp0, key, i * 16, xmmKeyShufMask);
                masm.aesenc(xmmResult0, xmmKeyTmp0);
            }
            AMD64AESEncryptOp.loadKey(masm, xmmKeyTmp0, key, rounds[k] * 16, xmmKeyShufMask);
            masm.aesenclast(xmmResult0, xmmKeyTmp0);
            masm.cmplAndJcc(lenReg, 16, AMD64Assembler.ConditionFlag.Less, labelProcessTailInsr[k], false);
            masm.movdqu(xmmFrom0, new AMD64Address(from, pos, Stride.S1, 0));
            masm.pxor(xmmResult0, xmmFrom0);
            masm.movdqu(new AMD64Address(to, pos, Stride.S1, 0), xmmResult0);
            masm.addq(pos, 16);
            masm.subl(lenReg, 16);
            masm.jmp(labelSingleBlockLoopTop[k]);
            masm.bind(labelProcessTailInsr[k]);
            masm.addq(pos, lenReg);
            masm.testlAndJcc(lenReg, 8, AMD64Assembler.ConditionFlag.Zero, labelProcessTail4Insr[k], false);
            masm.subq(pos, 8);
            AMD64Assembler.VexRVMIOp.VPINSRQ.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, xmmFrom0, xmmFrom0, new AMD64Address(from, pos, Stride.S1), 0);
            masm.bind(labelProcessTail4Insr[k]);
            masm.testlAndJcc(lenReg, 4, AMD64Assembler.ConditionFlag.Zero, labelProcessTail2Insr[k], false);
            masm.subq(pos, 4);
            masm.pslldq(xmmFrom0, 4);
            AMD64Assembler.VexRVMIOp.VPINSRD.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, xmmFrom0, xmmFrom0, new AMD64Address(from, pos, Stride.S1), 0);
            masm.bind(labelProcessTail2Insr[k]);
            masm.testlAndJcc(lenReg, 2, AMD64Assembler.ConditionFlag.Zero, labelProcessTail1Insr[k], false);
            masm.subq(pos, 2);
            masm.pslldq(xmmFrom0, 2);
            AMD64Assembler.VexRVMIOp.VPINSRW.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, xmmFrom0, xmmFrom0, new AMD64Address(from, pos, Stride.S1), 0);
            masm.bind(labelProcessTail1Insr[k]);
            masm.testlAndJcc(lenReg, 1, AMD64Assembler.ConditionFlag.Zero, labelProcessTailExitInsr[k], false);
            masm.subq(pos, 1);
            masm.pslldq(xmmFrom0, 1);
            AMD64Assembler.VexRVMIOp.VPINSRB.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, xmmFrom0, xmmFrom0, new AMD64Address(from, pos, Stride.S1), 0);
            masm.bind(labelProcessTailExitInsr[k]);
            masm.movdqu(new AMD64Address(savedEncCounterStart), xmmResult0);
            masm.pxor(xmmResult0, xmmFrom0);
            masm.testlAndJcc(lenReg, 8, AMD64Assembler.ConditionFlag.Zero, labelProcessTail4Extr[k], false);
            AMD64Assembler.VexMRIOp.VPEXTRQ.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, new AMD64Address(to, pos, Stride.S1), xmmResult0, 0);
            masm.psrldq(xmmResult0, 8);
            masm.addq(pos, 8);
            masm.bind(labelProcessTail4Extr[k]);
            masm.testlAndJcc(lenReg, 4, AMD64Assembler.ConditionFlag.Zero, labelProcessTail2Extr[k], false);
            AMD64Assembler.VexMRIOp.VPEXTRD.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, new AMD64Address(to, pos, Stride.S1), xmmResult0, 0);
            masm.psrldq(xmmResult0, 4);
            masm.addq(pos, 4);
            masm.bind(labelProcessTail2Extr[k]);
            masm.testlAndJcc(lenReg, 2, AMD64Assembler.ConditionFlag.Zero, labelProcessTail1Extr[k], false);
            AMD64Assembler.VexMRIOp.VPEXTRW.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, new AMD64Address(to, pos, Stride.S1), xmmResult0, 0);
            masm.psrldq(xmmResult0, 2);
            masm.addq(pos, 2);
            masm.bind(labelProcessTail1Extr[k]);
            masm.testlAndJcc(lenReg, 1, AMD64Assembler.ConditionFlag.Zero, labelProcessTailExitExtr[k], false);
            AMD64Assembler.VexMRIOp.VPEXTRB.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, new AMD64Address(to, pos, Stride.S1), xmmResult0, 0);
            masm.bind(labelProcessTailExitExtr[k]);
            masm.movl(new AMD64Address(usedAddr), lenReg);
            masm.jmp(labelExit);
        }
        masm.bind(labelExit);
        masm.vpshufb(xmmCurrCounter, xmmCurrCounter, xmmCounterShufMask, AVXKind.AVXSize.XMM);
        masm.movdqu(new AMD64Address(counter), xmmCurrCounter);
        masm.movl(ValueUtil.asRegister((Value)this.resultValue), ValueUtil.asRegister((Value)this.lenValue));
    }

    private static void incCounter(AMD64MacroAssembler masm, Register reg, Register xmmdst, int incDelta, Label nextBlock) {
        AMD64Assembler.VexMRIOp.VPEXTRQ.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, reg, xmmdst, 0);
        masm.addq(reg, incDelta);
        AMD64Assembler.VexRVMIOp.VPINSRQ.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, xmmdst, xmmdst, reg, 0);
        masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, nextBlock);
        AMD64Assembler.VexMRIOp.VPEXTRQ.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, reg, xmmdst, 1);
        masm.addq(reg, 1);
        AMD64Assembler.VexRVMIOp.VPINSRQ.emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, xmmdst, xmmdst, reg, 1);
        masm.bind(nextBlock);
    }

    private static void applyCTRDoSix(BiConsumer<Register, Register> op, Register src) {
        Register xmmResult0 = AMD64.xmm5;
        Register xmmResult1 = AMD64.xmm6;
        Register xmmResult2 = AMD64.xmm7;
        Register xmmResult3 = AMD64.xmm8;
        Register xmmResult4 = AMD64.xmm9;
        Register xmmResult5 = AMD64.xmm10;
        op.accept(xmmResult0, src);
        op.accept(xmmResult1, src);
        op.accept(xmmResult2, src);
        op.accept(xmmResult3, src);
        op.accept(xmmResult4, src);
        op.accept(xmmResult5, src);
    }
}

