/*
 * Decompiled with CFR 0.152.
 */
package org.graalvm.compiler.asm.amd64;

import java.util.function.IntConsumer;
import java.util.function.Supplier;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.TargetDescription;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.amd64.AMD64Address;
import org.graalvm.compiler.asm.amd64.AMD64Assembler;
import org.graalvm.compiler.asm.amd64.AMD64BaseAssembler;
import org.graalvm.compiler.asm.amd64.AVXKind;
import org.graalvm.compiler.core.common.NumUtil;
import org.graalvm.compiler.core.common.Stride;
import org.graalvm.compiler.debug.GraalError;
import org.graalvm.compiler.options.OptionValues;

public class AMD64MacroAssembler
extends AMD64Assembler {
    private static final int DIRECT_CALL_INSTRUCTION_CODE = 232;
    private static final int DIRECT_CALL_INSTRUCTION_SIZE = 5;

    public AMD64MacroAssembler(TargetDescription target) {
        super(target);
    }

    public AMD64MacroAssembler(TargetDescription target, OptionValues optionValues, boolean hasIntelJccErratum) {
        super(target, optionValues, hasIntelJccErratum);
    }

    public final void decrementq(Register reg) {
        this.decrementq(reg, 1);
    }

    public final void decrementq(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subq(reg, value);
            return;
        }
        if (value < 0) {
            this.incrementq(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decq(reg);
        } else {
            this.subq(reg, value);
        }
    }

    public final void decrementq(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subq(dst, value);
            return;
        }
        if (value < 0) {
            this.incrementq(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decq(dst);
        } else {
            this.subq(dst, value);
        }
    }

    public final void incrementq(Register reg) {
        this.incrementq(reg, 1);
    }

    public final void incrementq(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addq(reg, value);
            return;
        }
        if (value < 0) {
            this.decrementq(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incq(reg);
        } else {
            this.addq(reg, value);
        }
    }

    public final void incrementq(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addq(dst, value);
            return;
        }
        if (value < 0) {
            this.decrementq(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incq(dst);
        } else {
            this.addq(dst, value);
        }
    }

    public final void movptr(Register dst, AMD64Address src) {
        this.movq(dst, src);
    }

    public final void movptr(AMD64Address dst, Register src) {
        this.movq(dst, src);
    }

    public final void movptr(AMD64Address dst, int src) {
        this.movslq(dst, src);
    }

    public final void cmpptr(Register src1, Register src2) {
        this.cmpq(src1, src2);
    }

    public final void cmpptr(Register src1, AMD64Address src2) {
        this.cmpq(src1, src2);
    }

    public final void decrementl(Register reg) {
        this.decrementl(reg, 1);
    }

    public final void decrementl(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subl(reg, value);
            return;
        }
        if (value < 0) {
            this.incrementl(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decl(reg);
        } else {
            this.subl(reg, value);
        }
    }

    public final void decrementl(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.subl(dst, value);
            return;
        }
        if (value < 0) {
            this.incrementl(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.decl(dst);
        } else {
            this.subl(dst, value);
        }
    }

    public final void incrementl(Register reg, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addl(reg, value);
            return;
        }
        if (value < 0) {
            this.decrementl(reg, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incl(reg);
        } else {
            this.addl(reg, value);
        }
    }

    public final void incrementl(AMD64Address dst, int value) {
        if (value == Integer.MIN_VALUE) {
            this.addl(dst, value);
            return;
        }
        if (value < 0) {
            this.decrementl(dst, -value);
            return;
        }
        if (value == 0) {
            return;
        }
        if (value == 1) {
            this.incl(dst);
        } else {
            this.addl(dst, value);
        }
    }

    public final void movflt(Register dst, Register src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM) && src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst) || AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVAPS.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movaps(dst, src);
        }
    }

    public final void movflt(Register dst, AMD64Address src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst)) {
            AMD64Assembler.VexMoveOp.VMOVSS.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movss(dst, src);
        }
    }

    public final void movflt(AMD64Address dst, Register src) {
        assert (src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVSS.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movss(dst, src);
        }
    }

    public final void movdbl(Register dst, Register src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM) && src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst) || AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVAPD.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movapd(dst, src);
        }
    }

    public final void movdbl(Register dst, AMD64Address src) {
        assert (dst.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(dst)) {
            AMD64Assembler.VexMoveOp.VMOVSD.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movsd(dst, src);
        }
    }

    public final void movdbl(AMD64Address dst, Register src) {
        assert (src.getRegisterCategory().equals((Object)AMD64.XMM));
        if (AMD64MacroAssembler.isAVX512Register(src)) {
            AMD64Assembler.VexMoveOp.VMOVSD.emit((AMD64Assembler)this, AVXKind.AVXSize.XMM, dst, src);
        } else {
            this.movsd(dst, src);
        }
    }

    public final void movlong(AMD64Address dst, long src) {
        if (NumUtil.isInt(src)) {
            AMD64Assembler.AMD64MIOp.MOV.emit((AMD64Assembler)this, AMD64BaseAssembler.OperandSize.QWORD, dst, (int)src);
        } else {
            AMD64Address high = new AMD64Address(dst.getBase(), dst.getIndex(), dst.getScale(), dst.getDisplacement() + 4, dst.getDisplacementAnnotation(), dst.instructionStartPosition);
            this.movl(dst, (int)(src & 0xFFFFFFFFFFFFFFFFL));
            this.movl(high, (int)(src >> 32));
        }
    }

    public final void setl(AMD64Assembler.ConditionFlag cc, Register dst) {
        this.setb(cc, dst);
        this.movzbl(dst, dst);
    }

    public final void setq(AMD64Assembler.ConditionFlag cc, Register dst) {
        this.setb(cc, dst);
        this.movzbq(dst, dst);
    }

    public final void flog(Register dest, Register value, boolean base10, AMD64Address tmp) {
        if (base10) {
            this.fldlg2();
        } else {
            this.fldln2();
        }
        this.trigPrologue(value, tmp);
        this.fyl2x();
        this.trigEpilogue(dest, tmp);
    }

    public final void fsin(Register dest, Register value, AMD64Address tmp) {
        this.trigPrologue(value, tmp);
        this.fsin();
        this.trigEpilogue(dest, tmp);
    }

    public final void fcos(Register dest, Register value, AMD64Address tmp) {
        this.trigPrologue(value, tmp);
        this.fcos();
        this.trigEpilogue(dest, tmp);
    }

    public final void ftan(Register dest, Register value, AMD64Address tmp) {
        this.trigPrologue(value, tmp);
        this.fptan();
        this.fstp(0);
        this.trigEpilogue(dest, tmp);
    }

    public final void fpop() {
        this.ffree(0);
        this.fincstp();
    }

    private void trigPrologue(Register value, AMD64Address tmp) {
        assert (value.getRegisterCategory().equals((Object)AMD64.XMM));
        this.movdbl(tmp, value);
        this.fldd(tmp);
    }

    private void trigEpilogue(Register dest, AMD64Address tmp) {
        assert (dest.getRegisterCategory().equals((Object)AMD64.XMM));
        this.fstpd(tmp);
        this.movdbl(dest, tmp);
    }

    public final void alignBeforeCall(boolean align, int prefixInstructionSize) {
        this.emitAlignmentForDirectCall(align, prefixInstructionSize);
        if (this.mitigateJCCErratum(this.position() + prefixInstructionSize, 5) != 0) {
            this.emitAlignmentForDirectCall(align, prefixInstructionSize);
        }
    }

    private void emitAlignmentForDirectCall(boolean align, int additionalInstructionSize) {
        int displacementPos;
        if (align && (displacementPos = this.position() + this.getMachineCodeCallDisplacementOffset() + additionalInstructionSize) % 4 != 0) {
            this.nop(4 - displacementPos % 4);
        }
    }

    public final int indirectCall(Register callReg) {
        return this.indirectCall(callReg, false);
    }

    public final int indirectCall(Register callReg, boolean mitigateDecodingAsDirectCall) {
        int prefixNops;
        int indirectCallPos;
        int directCallPos;
        int indirectCallSize = AMD64MacroAssembler.needsRex(callReg) ? 3 : 2;
        int insertedNops = this.mitigateJCCErratum(indirectCallSize);
        if (mitigateDecodingAsDirectCall && ((directCallPos = (indirectCallPos = this.position()) - (5 - indirectCallSize)) < 0 || this.getByte(directCallPos) == 232) && (prefixNops = 5 - indirectCallSize - insertedNops) > 0) {
            this.nop(prefixNops);
        }
        int beforeCall = this.position();
        this.call(callReg);
        assert (beforeCall + indirectCallSize == this.position());
        if (mitigateDecodingAsDirectCall) {
            directCallPos = this.position() - 5;
            GraalError.guarantee(directCallPos >= 0 && this.getByte(directCallPos) != 232, "This indirect call can be decoded as a direct call.");
        }
        return beforeCall;
    }

    public final int directCall(long address, Register scratch) {
        int bytesToEmit = AMD64MacroAssembler.needsRex(scratch) ? 13 : 12;
        this.mitigateJCCErratum(bytesToEmit);
        int beforeCall = this.position();
        this.movq(scratch, address);
        this.call(scratch);
        assert (beforeCall + bytesToEmit == this.position());
        return beforeCall;
    }

    public final int directJmp(long address, Register scratch) {
        int bytesToEmit = AMD64MacroAssembler.needsRex(scratch) ? 13 : 12;
        this.mitigateJCCErratum(bytesToEmit);
        int beforeJmp = this.position();
        this.movq(scratch, address);
        this.jmpWithoutAlignment(scratch);
        assert (beforeJmp + bytesToEmit == this.position());
        return beforeJmp;
    }

    private void alignFusedPair(Label branchTarget, boolean isShortJmp, int prevOpInBytes) {
        assert (prevOpInBytes < 26) : "Fused pair may be longer than 0x20 bytes.";
        if (branchTarget == null) {
            this.mitigateJCCErratum(prevOpInBytes + 6);
        } else if (isShortJmp) {
            this.mitigateJCCErratum(prevOpInBytes + 2);
        } else if (!branchTarget.isBound()) {
            this.mitigateJCCErratum(prevOpInBytes + 6);
        } else {
            long disp = branchTarget.position() - (this.position() + prevOpInBytes);
            if (NumUtil.isByte(disp - 2L)) {
                this.mitigateJCCErratum(prevOpInBytes + 2);
                disp = branchTarget.position() - (this.position() + prevOpInBytes);
                if (NumUtil.isByte(disp - 2L)) {
                    return;
                }
            }
            this.mitigateJCCErratum(prevOpInBytes + 6);
        }
    }

    private void applyMIOpAndJcc(AMD64Assembler.AMD64MIOp op, AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src, op.srcIsByte) + 1 + 1 + op.immediateSize(size);
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        if (applyBeforeFusedPair != null) {
            applyBeforeFusedPair.accept(beforeFusedPair);
        }
        op.emit((AMD64Assembler)this, size, src, imm32, annotateImm);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    private void applyMIOpAndJcc(AMD64Assembler.AMD64MIOp op, AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src) + 1 + this.addressInBytes(src) + op.immediateSize(size);
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        if (applyBeforeFusedPair != null) {
            applyBeforeFusedPair.accept(beforeFusedPair);
        }
        op.emit((AMD64Assembler)this, size, src, imm32, annotateImm);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    private int applyRMOpAndJcc(AMD64Assembler.AMD64RMOp op, AMD64BaseAssembler.OperandSize size, Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src1, op.dstIsByte, src2, op.srcIsByte) + 1 + 1;
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        op.emit((AMD64Assembler)this, size, src1, src2);
        int beforeJcc = this.position();
        assert (beforeFusedPair + bytesToEmit == beforeJcc);
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
        return beforeJcc;
    }

    private int applyRMOpAndJcc(AMD64Assembler.AMD64RMOp op, AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src1, op.dstIsByte, src2) + 1 + this.addressInBytes(src2);
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        if (applyBeforeFusedPair != null) {
            applyBeforeFusedPair.accept(beforeFusedPair);
        }
        op.emit((AMD64Assembler)this, size, src1, src2);
        int beforeJcc = this.position();
        assert (beforeFusedPair + bytesToEmit == beforeJcc);
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
        return beforeJcc;
    }

    public void applyMOpAndJcc(AMD64Assembler.AMD64MOp op, AMD64BaseAssembler.OperandSize size, Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, dst, op.srcIsByte) + 1 + 1;
        this.alignFusedPair(branchTarget, isShortJmp, bytesToEmit);
        int beforeFusedPair = this.position();
        op.emit((AMD64Assembler)this, size, dst);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, isShortJmp);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void testlAndJcc(Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64MIOp.TEST, AMD64BaseAssembler.OperandSize.DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void testqAndJcc(Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64MIOp.TEST, AMD64BaseAssembler.OperandSize.QWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64MIOp.TEST, size, src, imm32, cc, branchTarget, isShortJmp, false, applyBeforeFusedPair);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, size, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void testlAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, AMD64BaseAssembler.OperandSize.DWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void testqAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, AMD64BaseAssembler.OperandSize.QWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, size, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final void testAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TEST, size, src1, src2, cc, branchTarget, isShortJmp, applyBeforeFusedPair);
    }

    public final void testbAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TESTB, AMD64BaseAssembler.OperandSize.BYTE, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void testbAndJcc(Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64RMOp.TESTB, AMD64BaseAssembler.OperandSize.BYTE, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, annotateImm, applyBeforeFusedPair);
    }

    public final void cmplAndJcc(Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpqAndJcc(Register src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(AMD64BaseAssembler.OperandSize.QWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.QWORD, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, AMD64Address src, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, boolean annotateImm, IntConsumer applyBeforeFusedPair) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getMIOpcode(size, NumUtil.isByte(imm32)), size, src, imm32, cc, branchTarget, isShortJmp, annotateImm, applyBeforeFusedPair);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size), size, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size), size, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final void cmplAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final int cmpqAndJcc(Register src1, Register src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        return this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, src1, src2, cc, branchTarget, isShortJmp);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size), size, src1, src2, cc, branchTarget, isShortJmp, applyBeforeFusedPair);
    }

    public final void cmplAndJcc(Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final int cmpqAndJcc(Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        return this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, src1, src2, cc, branchTarget, isShortJmp, null);
    }

    public final int cmpqAndJcc(Register src1, AMD64Address src2, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp, IntConsumer applyBeforeFusedPair) {
        return this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, src1, src2, cc, branchTarget, isShortJmp, applyBeforeFusedPair);
    }

    public final void cmpAndJcc(AMD64BaseAssembler.OperandSize size, Register src1, Supplier<AMD64Address> src2, AMD64Assembler.ConditionFlag cc, Label branchTarget) {
        AMD64Address placeHolder = this.getPlaceholder(this.position());
        AMD64Assembler.AMD64RMOp op = AMD64Assembler.AMD64BinaryArithmetic.CMP.getRMOpcode(size);
        int bytesToEmit = AMD64MacroAssembler.getPrefixInBytes(size, src1, op.dstIsByte, placeHolder) + 1 + this.addressInBytes(placeHolder);
        this.alignFusedPair(branchTarget, false, bytesToEmit);
        int beforeFusedPair = this.position();
        AMD64Address src2AsAddress = src2.get();
        op.emit((AMD64Assembler)this, size, src1, src2AsAddress);
        assert (beforeFusedPair + bytesToEmit == this.position());
        this.jcc(cc, branchTarget, false);
        assert (this.ensureWithinBoundary(beforeFusedPair));
    }

    public final void andlAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.AND.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void andqAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.AND.getMIOpcode(AMD64BaseAssembler.OperandSize.QWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.QWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void andqAndJcc(Register dst, Register src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.AND.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, dst, src, cc, branchTarget, isShortJmp);
    }

    public final void addlAndJcc(Register dst, Register src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.ADD.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, dst, src, cc, branchTarget, isShortJmp);
    }

    public final void addqAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.ADD.getMIOpcode(AMD64BaseAssembler.OperandSize.QWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.QWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void sublAndJcc(Register dst, Register src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, dst, src, cc, branchTarget, isShortJmp);
    }

    public final void subqAndJcc(Register dst, Register src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, dst, src, cc, branchTarget, isShortJmp);
    }

    public final void sublAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void subqAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.SUB.getMIOpcode(AMD64BaseAssembler.OperandSize.QWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.QWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void inclAndJcc(Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMOpAndJcc(AMD64Assembler.AMD64MOp.INC, AMD64BaseAssembler.OperandSize.DWORD, dst, cc, branchTarget, isShortJmp);
    }

    public final void incqAndJcc(Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMOpAndJcc(AMD64Assembler.AMD64MOp.INC, AMD64BaseAssembler.OperandSize.QWORD, dst, cc, branchTarget, isShortJmp);
    }

    public final void declAndJcc(Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMOpAndJcc(AMD64Assembler.AMD64MOp.DEC, AMD64BaseAssembler.OperandSize.DWORD, dst, cc, branchTarget, isShortJmp);
    }

    public final void decqAndJcc(Register dst, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMOpAndJcc(AMD64Assembler.AMD64MOp.DEC, AMD64BaseAssembler.OperandSize.QWORD, dst, cc, branchTarget, isShortJmp);
    }

    public final void xorlAndJcc(Register dst, int imm32, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyMIOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.XOR.getMIOpcode(AMD64BaseAssembler.OperandSize.DWORD, NumUtil.isByte(imm32)), AMD64BaseAssembler.OperandSize.DWORD, dst, imm32, cc, branchTarget, isShortJmp, false, null);
    }

    public final void xorlAndJcc(Register dst, AMD64Address src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.XOR.getRMOpcode(AMD64BaseAssembler.OperandSize.DWORD), AMD64BaseAssembler.OperandSize.DWORD, dst, src, cc, branchTarget, isShortJmp, null);
    }

    public final void xorqAndJcc(Register dst, AMD64Address src, AMD64Assembler.ConditionFlag cc, Label branchTarget, boolean isShortJmp) {
        this.applyRMOpAndJcc(AMD64Assembler.AMD64BinaryArithmetic.XOR.getRMOpcode(AMD64BaseAssembler.OperandSize.QWORD), AMD64BaseAssembler.OperandSize.QWORD, dst, src, cc, branchTarget, isShortJmp, null);
    }

    public final void movSZx(AMD64BaseAssembler.OperandSize operandSize, ExtendMode extendMode, Register dst, AMD64Address src) {
        this.movSZx(Stride.fromInt(operandSize.getBytes()), extendMode, dst, src);
    }

    public final void movSZx(Stride strideSrc, ExtendMode extendMode, Register dst, AMD64Address src) {
        switch (strideSrc) {
            case S1: {
                if (extendMode == ExtendMode.SIGN_EXTEND) {
                    this.movsbq(dst, src);
                    break;
                }
                this.movzbq(dst, src);
                break;
            }
            case S2: {
                if (extendMode == ExtendMode.SIGN_EXTEND) {
                    this.movswq(dst, src);
                    break;
                }
                this.movzwq(dst, src);
                break;
            }
            case S4: {
                if (extendMode == ExtendMode.SIGN_EXTEND) {
                    this.movslq(dst, src);
                    break;
                }
                this.movl(dst, src);
                break;
            }
            case S8: {
                this.movq(dst, src);
                break;
            }
            default: {
                throw new IllegalStateException();
            }
        }
    }

    public final void pmovSZxQWORD(ExtendMode extendMode, Register dst, Stride strideDst, Register src, Stride strideSrc, Register index, int displacement) {
        int scaledDisplacement = AMD64MacroAssembler.scaleDisplacement(strideDst, strideSrc, displacement);
        AMD64Address address = new AMD64Address(src, index, strideSrc, scaledDisplacement);
        if (strideSrc.value < strideDst.value) {
            GraalError.guarantee(strideDst.log2 - strideSrc.log2 == 1, "unsupported stride pair %s %s", (Object)strideSrc, (Object)strideDst);
            if (this.isAVX()) {
                AMD64Assembler.VexMoveOp.VMOVD.emit((AMD64Assembler)this, AVXKind.AVXSize.DWORD, dst, address);
                this.loadAndExtendAVX(AVXKind.AVXSize.QWORD, extendMode, dst, strideDst, dst, strideSrc);
            } else {
                this.movdl(dst, address);
                this.loadAndExtendSSE(extendMode, dst, strideDst, dst, strideSrc);
            }
        } else {
            GraalError.guarantee(strideSrc.value == strideDst.value, "source stride must be smaller or equal to target stride");
            if (this.isAVX()) {
                AMD64Assembler.VexMoveOp.VMOVQ.emit((AMD64Assembler)this, AVXKind.AVXSize.QWORD, dst, address);
            } else {
                this.movdq(dst, address);
            }
        }
    }

    public final void pmovSZx(AVXKind.AVXSize size, ExtendMode extendMode, Register dst, Stride strideDst, Register src, Stride strideSrc, Register index, int displacement) {
        GraalError.guarantee(size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM, "unsupported AVXSize %s", (Object)size);
        GraalError.guarantee(strideSrc.value <= strideDst.value, "source stride must be smaller or equal to target stride");
        int scaledDisplacement = AMD64MacroAssembler.scaleDisplacement(strideDst, strideSrc, displacement);
        AMD64Address address = new AMD64Address(src, index, strideSrc, scaledDisplacement);
        this.pmovSZx(size, extendMode, dst, strideDst, address, strideSrc);
    }

    public final void pmovSZx(AVXKind.AVXSize size, ExtendMode extendMode, Register dst, Stride strideDst, AMD64Address src, Stride strideSrc) {
        GraalError.guarantee(size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM, "unsupported AVXSize %s", (Object)size);
        if (strideSrc.value < strideDst.value) {
            if (this.isAVX()) {
                this.loadAndExtendAVX(size, extendMode, dst, strideDst, src, strideSrc);
            } else {
                this.loadAndExtendSSE(extendMode, dst, strideDst, src, strideSrc);
            }
        } else {
            GraalError.guarantee(strideSrc.value == strideDst.value, "source stride must be smaller or equal to target stride");
            this.movdqu(size, dst, src);
        }
    }

    public final void pmovSZx(AVXKind.AVXSize size, ExtendMode extendMode, Register dst, Stride strideDst, Register src, Stride strideSrc) {
        GraalError.guarantee(size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM, "unsupported AVXSize %s", (Object)size);
        if (strideSrc.value < strideDst.value) {
            if (this.isAVX()) {
                this.loadAndExtendAVX(size, extendMode, dst, strideDst, src, strideSrc);
            } else {
                this.loadAndExtendSSE(extendMode, dst, strideDst, src, strideSrc);
            }
        } else {
            GraalError.guarantee(strideSrc.value == strideDst.value, "source stride must be smaller or equal to target stride");
            this.movdqu(size, dst, src);
        }
    }

    public final void pmovmsk(AVXKind.AVXSize size, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRMOp.VPMOVMSKB.emit((AMD64Assembler)this, size, dst, src);
        } else {
            this.pmovmskb(dst, src);
        }
    }

    public final void movdqu(AVXKind.AVXSize size, Register dst, AMD64Address src) {
        GraalError.guarantee(size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM, "unsupported AVXSize %s", (Object)size);
        if (this.isAVX()) {
            AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)this, size, dst, src);
        } else {
            this.movdqu(dst, src);
        }
    }

    public final void movdqu(AVXKind.AVXSize size, AMD64Address dst, Register src) {
        GraalError.guarantee(size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM, "unsupported AVXSize %s", (Object)size);
        if (this.isAVX()) {
            AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)this, size, dst, src);
        } else {
            this.movdqu(dst, src);
        }
    }

    public final void movdqu(AVXKind.AVXSize size, Register dst, Register src) {
        GraalError.guarantee(size == AVXKind.AVXSize.XMM || size == AVXKind.AVXSize.YMM, "unsupported AVXSize %s", (Object)size);
        if (this.isAVX()) {
            AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)this, size, dst, src);
        } else {
            this.movdqu(dst, src);
        }
    }

    public final void pcmpeq(AVXKind.AVXSize vectorSize, Stride elementSize, Register dst, Register src) {
        switch (elementSize) {
            case S1: {
                this.pcmpeqb(vectorSize, dst, src);
                break;
            }
            case S2: {
                this.pcmpeqw(vectorSize, dst, src);
                break;
            }
            case S4: {
                this.pcmpeqd(vectorSize, dst, src);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    public final void pcmpeqw(AVXKind.AVXSize vectorSize, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPEQW.emit((AMD64Assembler)this, vectorSize, dst, src, dst);
        } else {
            this.pcmpeqw(dst, src);
        }
    }

    public final void pcmpeqd(AVXKind.AVXSize vectorSize, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPEQD.emit((AMD64Assembler)this, vectorSize, dst, src, dst);
        } else {
            this.pcmpeqd(dst, src);
        }
    }

    public final void pcmpeqb(AVXKind.AVXSize size, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPEQB.emit((AMD64Assembler)this, size, dst, src, dst);
        } else {
            this.pcmpeqb(dst, src);
        }
    }

    public final void pcmpeq(AVXKind.AVXSize vectorSize, Stride elementSize, Register dst, AMD64Address src) {
        switch (elementSize) {
            case S1: {
                this.pcmpeqb(vectorSize, dst, src);
                break;
            }
            case S2: {
                this.pcmpeqw(vectorSize, dst, src);
                break;
            }
            case S4: {
                this.pcmpeqd(vectorSize, dst, src);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    public final void pcmpeqb(AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPEQB.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pcmpeqb(dst, src);
        }
    }

    public final void pcmpeqw(AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPEQW.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pcmpeqw(dst, src);
        }
    }

    public final void pcmpeqd(AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPEQD.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pcmpeqd(dst, src);
        }
    }

    public final void pcmpgtb(AVXKind.AVXSize size, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPGTB.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pcmpgtb(dst, src);
        }
    }

    public final void pcmpgtd(AVXKind.AVXSize size, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPCMPGTD.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pcmpgtd(dst, src);
        }
    }

    public final void pminu(AVXKind.AVXSize vectorSize, Stride elementSize, Register dst, Register src1, Register src2) {
        switch (elementSize) {
            case S1: {
                this.pminub(vectorSize, dst, src1, src2);
                break;
            }
            case S2: {
                this.pminuw(vectorSize, dst, src1, src2);
                break;
            }
            case S4: {
                this.pminud(vectorSize, dst, src1, src2);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    public final void pminub(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        this.simdRVMOp(AMD64Assembler.VexRVMOp.VPMINUB, AMD64Assembler.SSEOp.PMINUB, size, dst, src1, src2, true);
    }

    public final void pminuw(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        this.simdRVMOp(AMD64Assembler.VexRVMOp.VPMINUW, AMD64Assembler.SSEOp.PMINUW, size, dst, src1, src2, true);
    }

    public final void pminud(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        this.simdRVMOp(AMD64Assembler.VexRVMOp.VPMINUD, AMD64Assembler.SSEOp.PMINUD, size, dst, src1, src2, true);
    }

    private void simdRVMOp(AMD64Assembler.VexRVMOp avxOp, AMD64Assembler.SSEOp sseOp, AVXKind.AVXSize vectorSize, Register dst, Register src1, Register src2, boolean isCommutative) {
        if (this.isAVX()) {
            avxOp.emit((AMD64Assembler)this, vectorSize, dst, src1, src2);
        } else {
            this.threeVectorOpSSE(sseOp, dst, src1, src2, isCommutative);
        }
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private void threeVectorOpSSE(AMD64Assembler.SSEOp op, Register dst, Register src1, Register src2, boolean isCommutative) {
        if (dst.equals((Object)src1)) {
            op.emit((AMD64Assembler)this, AMD64BaseAssembler.OperandSize.PD, dst, src2);
            return;
        } else if (dst.equals((Object)src2)) {
            if (!isCommutative) throw GraalError.shouldNotReachHere("can't simulate non-commutative 3-vector AVX op on SSE when dst == src2!");
            op.emit((AMD64Assembler)this, AMD64BaseAssembler.OperandSize.PD, dst, src1);
            return;
        } else {
            this.movdqu(dst, src1);
            op.emit((AMD64Assembler)this, AMD64BaseAssembler.OperandSize.PD, dst, src2);
        }
    }

    private static int scaleDisplacement(Stride strideDst, Stride strideSrc, int displacement) {
        if (strideSrc.value < strideDst.value) {
            assert ((displacement & (1 << strideDst.log2 - strideSrc.log2) - 1) == 0);
            return displacement >> strideDst.log2 - strideSrc.log2;
        }
        assert (strideSrc.value == strideDst.value);
        return displacement;
    }

    public final void loadAndExtendAVX(AVXKind.AVXSize size, ExtendMode extendMode, Register dst, Stride strideDst, Register src, Stride strideSrc) {
        AMD64MacroAssembler.getAVXLoadAndExtendOp(strideDst, strideSrc, extendMode).emit((AMD64Assembler)this, size, dst, src);
    }

    public final void loadAndExtendAVX(AVXKind.AVXSize size, ExtendMode extendMode, Register dst, Stride strideDst, AMD64Address src, Stride strideSrc) {
        AMD64MacroAssembler.getAVXLoadAndExtendOp(strideDst, strideSrc, extendMode).emit((AMD64Assembler)this, size, dst, src);
    }

    private static AMD64Assembler.VexRMOp getAVXLoadAndExtendOp(Stride strideDst, Stride strideSrc, ExtendMode extendMode) {
        switch (strideSrc) {
            case S1: {
                switch (strideDst) {
                    case S2: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXBW : AMD64Assembler.VexRMOp.VPMOVZXBW;
                    }
                    case S4: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXBD : AMD64Assembler.VexRMOp.VPMOVZXBD;
                    }
                    case S8: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXBQ : AMD64Assembler.VexRMOp.VPMOVZXBQ;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case S2: {
                switch (strideDst) {
                    case S4: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXWD : AMD64Assembler.VexRMOp.VPMOVZXWD;
                    }
                    case S8: {
                        return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXWQ : AMD64Assembler.VexRMOp.VPMOVZXWQ;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case S4: {
                return extendMode == ExtendMode.SIGN_EXTEND ? AMD64Assembler.VexRMOp.VPMOVSXDQ : AMD64Assembler.VexRMOp.VPMOVZXDQ;
            }
        }
        throw GraalError.shouldNotReachHere();
    }

    public final void loadAndExtendSSE(ExtendMode extendMode, Register dst, Stride strideDst, AMD64Address src, Stride strideSrc) {
        boolean signExtend = extendMode == ExtendMode.SIGN_EXTEND;
        switch (strideSrc) {
            case S1: {
                switch (strideDst) {
                    case S2: {
                        if (signExtend) {
                            this.pmovsxbw(dst, src);
                        } else {
                            this.pmovzxbw(dst, src);
                        }
                        return;
                    }
                    case S4: {
                        if (signExtend) {
                            this.pmovsxbd(dst, src);
                        } else {
                            this.pmovzxbd(dst, src);
                        }
                        return;
                    }
                    case S8: {
                        if (signExtend) {
                            this.pmovsxbq(dst, src);
                        } else {
                            this.pmovzxbq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case S2: {
                switch (strideDst) {
                    case S4: {
                        if (signExtend) {
                            this.pmovsxwd(dst, src);
                        } else {
                            this.pmovzxwd(dst, src);
                        }
                        return;
                    }
                    case S8: {
                        if (signExtend) {
                            this.pmovsxwq(dst, src);
                        } else {
                            this.pmovzxwq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case S4: {
                if (signExtend) {
                    this.pmovsxdq(dst, src);
                } else {
                    this.pmovzxdq(dst, src);
                }
                return;
            }
        }
        throw GraalError.shouldNotReachHere();
    }

    public final void loadAndExtendSSE(ExtendMode extendMode, Register dst, Stride strideDst, Register src, Stride strideSrc) {
        boolean signExtend = extendMode == ExtendMode.SIGN_EXTEND;
        switch (strideSrc) {
            case S1: {
                switch (strideDst) {
                    case S2: {
                        if (signExtend) {
                            this.pmovsxbw(dst, src);
                        } else {
                            this.pmovzxbw(dst, src);
                        }
                        return;
                    }
                    case S4: {
                        if (signExtend) {
                            this.pmovsxbd(dst, src);
                        } else {
                            this.pmovzxbd(dst, src);
                        }
                        return;
                    }
                    case S8: {
                        if (signExtend) {
                            this.pmovsxbq(dst, src);
                        } else {
                            this.pmovzxbq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case S2: {
                switch (strideDst) {
                    case S4: {
                        if (signExtend) {
                            this.pmovsxwd(dst, src);
                        } else {
                            this.pmovzxwd(dst, src);
                        }
                        return;
                    }
                    case S8: {
                        if (signExtend) {
                            this.pmovsxwq(dst, src);
                        } else {
                            this.pmovzxwq(dst, src);
                        }
                        return;
                    }
                }
                throw GraalError.shouldNotReachHere();
            }
            case S4: {
                if (signExtend) {
                    this.pmovsxdq(dst, src);
                } else {
                    this.pmovzxdq(dst, src);
                }
                return;
            }
        }
        throw GraalError.shouldNotReachHere();
    }

    public final void packuswb(AVXKind.AVXSize size, Register dst, Register src) {
        this.packuswb(size, dst, dst, src);
    }

    public final void packuswb(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        this.simdRVMOp(AMD64Assembler.VexRVMOp.VPACKUSWB, AMD64Assembler.SSEOp.PACKUSWB, size, dst, src1, src2, false);
    }

    public final void packusdw(AVXKind.AVXSize size, Register dst, Register src) {
        this.packusdw(size, dst, dst, src);
    }

    public final void packusdw(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        this.simdRVMOp(AMD64Assembler.VexRVMOp.VPACKUSDW, AMD64Assembler.SSEOp.PACKUSDW, size, dst, src1, src2, false);
    }

    public final void palignr(AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        this.palignr(size, dst, dst, src, imm8);
    }

    public final void palignr(AVXKind.AVXSize size, Register dst, Register src1, Register src2, int imm8) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMIOp.VPALIGNR.emit((AMD64Assembler)this, size, dst, src1, src2, imm8);
        } else {
            if (!dst.equals((Object)src1)) {
                this.movdqu(dst, src1);
            }
            this.palignr(dst, src2, imm8);
        }
    }

    public final void pand(AVXKind.AVXSize size, Register dst, Register src) {
        this.pand(size, dst, dst, src);
    }

    public final void pand(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPAND.emit((AMD64Assembler)this, size, dst, src1, src2);
        } else {
            if (!dst.equals((Object)src1)) {
                this.movdqu(dst, src1);
            }
            this.pand(dst, src2);
        }
    }

    public final void pand(AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPAND.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pand(dst, src);
        }
    }

    public final void pandU(AVXKind.AVXSize size, Register dst, AMD64Address src, Register tmp) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPAND.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.movdqu(tmp, src);
            this.pand(dst, tmp);
        }
    }

    public final void pandn(AVXKind.AVXSize size, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPANDN.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pandn(dst, src);
        }
    }

    public final void por(AVXKind.AVXSize size, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPOR.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.por(dst, src);
        }
    }

    public final void pxor(AVXKind.AVXSize size, Register dst, Register src) {
        this.pxor(size, dst, dst, src);
    }

    public final void pxor(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPXOR.emit((AMD64Assembler)this, size, dst, src1, src2);
        } else {
            if (!dst.equals((Object)src1)) {
                this.movdqu(dst, src1);
            }
            this.pxor(dst, src2);
        }
    }

    public final void psllw(AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (this.isAVX()) {
            AMD64Assembler.VexShiftOp.VPSLLW.emit((AMD64Assembler)this, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                this.movdqu(dst, src);
            }
            this.psllw(dst, imm8);
        }
    }

    public final void psrlw(AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (this.isAVX()) {
            AMD64Assembler.VexShiftOp.VPSRLW.emit((AMD64Assembler)this, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                this.movdqu(dst, src);
            }
            this.psrlw(dst, imm8);
        }
    }

    public final void pslld(AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (this.isAVX()) {
            AMD64Assembler.VexShiftOp.VPSLLD.emit((AMD64Assembler)this, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                this.movdqu(dst, src);
            }
            this.pslld(dst, imm8);
        }
    }

    public final void psrld(AVXKind.AVXSize size, Register dst, Register src, int imm8) {
        if (this.isAVX()) {
            AMD64Assembler.VexShiftOp.VPSRLD.emit((AMD64Assembler)this, size, dst, src, imm8);
        } else {
            if (!dst.equals((Object)src)) {
                this.movdqu(dst, src);
            }
            this.psrld(dst, imm8);
        }
    }

    public final void pshufb(AVXKind.AVXSize size, Register dst, Register src1, Register src2) {
        this.simdRVMOp(AMD64Assembler.VexRVMOp.VPSHUFB, AMD64Assembler.SSEOp.PSHUFB, size, dst, src1, src2, false);
    }

    public final void pshufb(AVXKind.AVXSize size, Register dst, Register src) {
        this.pshufb(size, dst, dst, src);
    }

    public final void pshufb(AVXKind.AVXSize size, Register dst, AMD64Address src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRVMOp.VPSHUFB.emit((AMD64Assembler)this, size, dst, dst, src);
        } else {
            this.pshufb(dst, src);
        }
    }

    public final void ptest(AVXKind.AVXSize size, Register dst, Register src) {
        if (this.isAVX()) {
            AMD64Assembler.VexRMOp.VPTEST.emit((AMD64Assembler)this, size, dst, src);
        } else {
            this.ptest(dst, src);
        }
    }

    public final void ptestU(AVXKind.AVXSize size, Register dst, AMD64Address src, Register tmp) {
        if (this.isAVX()) {
            AMD64Assembler.VexRMOp.VPTEST.emit((AMD64Assembler)this, size, dst, src);
        } else {
            this.movdqu(tmp, src);
            this.ptest(dst, tmp);
        }
    }

    public boolean isAVX() {
        return this.supports(AMD64.CPUFeature.AVX);
    }

    public static enum ExtendMode {
        ZERO_EXTEND,
        SIGN_EXTEND;

    }
}

