/* generate yieldpoint without checking threadSwith request
   */
  private static void expandUnconditionalYieldpoint(Instruction s, IR ir, RVMMethod meth) {
    // split the basic block after the yieldpoint, create a new
    // block at the end of the IR to hold the yieldpoint,
    // remove the yieldpoint (to prepare to out it in the new block at the end)
    BasicBlock thisBlock = s.getBasicBlock();
    BasicBlock nextBlock = thisBlock.splitNodeWithLinksAt(s, ir);
    BasicBlock yieldpoint = thisBlock.createSubBlock(s.getBytecodeIndex(), ir);
    thisBlock.insertOut(yieldpoint);
    yieldpoint.insertOut(nextBlock);
    ir.cfg.addLastInCodeOrder(yieldpoint);
    s.remove();

    // change thread switch instruction into call to thread switch routine
    // NOTE: must make s the call instruction: it is the GC point!
    //       must also inform the GCMap that s has been moved!!!
    Offset offset = meth.getOffset();
    LocationOperand loc = new LocationOperand(offset);
    Operand guard = TG();
    Operand target = MemoryOperand.D(Magic.getTocPointer().plus(offset), (byte) 4, loc, guard);
    MIR_Call.mutate0(s, CALL_SAVE_VOLATILE, null, null, target, MethodOperand.STATIC(meth));
    yieldpoint.appendInstruction(s);
    ir.MIRInfo.gcIRMap.moveToEnd(s);

    yieldpoint.appendInstruction(MIR_Branch.create(IA32_JMP, nextBlock.makeJumpTarget()));

    // make a jump to yield block
    thisBlock.appendInstruction(MIR_Branch.create(IA32_JMP, yieldpoint.makeJumpTarget()));
  }
  private static void expandYieldpoint(
      Instruction s, IR ir, RVMMethod meth, IA32ConditionOperand ypCond) {
    // split the basic block after the yieldpoint, create a new
    // block at the end of the IR to hold the yieldpoint,
    // remove the yieldpoint (to prepare to out it in the new block at the end)
    BasicBlock thisBlock = s.getBasicBlock();
    BasicBlock nextBlock = thisBlock.splitNodeWithLinksAt(s, ir);
    BasicBlock yieldpoint = thisBlock.createSubBlock(s.getBytecodeIndex(), ir, 0);
    thisBlock.insertOut(yieldpoint);
    yieldpoint.insertOut(nextBlock);
    ir.cfg.addLastInCodeOrder(yieldpoint);
    s.remove();

    // change thread switch instruction into call to thread switch routine
    // NOTE: must make s the call instruction: it is the GC point!
    //       must also inform the GCMap that s has been moved!!!
    Offset offset = meth.getOffset();
    LocationOperand loc = new LocationOperand(offset);
    Operand guard = TG();
    Operand target;
    if (JTOC_REGISTER == null) {
      target = MemoryOperand.D(Magic.getTocPointer().plus(offset), (byte) 4, loc, guard);
    } else {
      target = MemoryOperand.BD(ir.regpool.makeTocOp().asRegister(), offset, (byte) 8, loc, guard);
    }

    MIR_Call.mutate0(s, CALL_SAVE_VOLATILE, null, null, target, MethodOperand.STATIC(meth));
    yieldpoint.appendInstruction(s);
    ir.MIRInfo.gcIRMap.moveToEnd(s);

    yieldpoint.appendInstruction(MIR_Branch.create(IA32_JMP, nextBlock.makeJumpTarget()));

    // Check to see if threadSwitch requested
    Offset tsr = Entrypoints.takeYieldpointField.getOffset();
    MemoryOperand M = MemoryOperand.BD(ir.regpool.makeTROp(), tsr, (byte) 4, null, null);
    thisBlock.appendInstruction(MIR_Compare.create(IA32_CMP, M, IC(0)));
    thisBlock.appendInstruction(
        MIR_CondBranch.create(
            IA32_JCC, ypCond, yieldpoint.makeJumpTarget(), BranchProfileOperand.never()));
  }
  /**
   * Attempt to generate a boolean compare opcode from a conditional branch.
   *
   * <pre>
   * 1)   IF .. GOTO A          replaced by  BOOLEAN_CMP x=..
   *      x = 0
   *      GOTO B
   *   A: x = 1
   *   B: ...
   * </pre>
   *
   * <p>Precondition: <code>IfCmp.conforms(<i>cb</i>)</code>
   *
   * @param ir governing IR
   * @param bb basic block of cb
   * @param cb conditional branch instruction
   * @return true if the transformation succeeds, false otherwise
   */
  private boolean generateBooleanCompare(IR ir, BasicBlock bb, Instruction cb, BasicBlock tb) {

    if ((cb.operator() != INT_IFCMP) && (cb.operator() != REF_IFCMP)) {
      return false;
    }
    // make sure this is the last branch in the block
    if (cb.nextInstructionInCodeOrder().operator() != BBEND) {
      return false;
    }
    Operand val1 = IfCmp.getVal1(cb);
    Operand val2 = IfCmp.getVal2(cb);
    ConditionOperand condition = IfCmp.getCond(cb);
    // "not taken" path
    BasicBlock fb = cb.getBasicBlock().getNotTakenNextBlock();
    // make sure it's a diamond
    if (tb.getNumberOfNormalOut() != 1) {
      return false;
    }
    if (fb.getNumberOfNormalOut() != 1) {
      return false;
    }
    BasicBlock jb = fb.getNormalOut().nextElement(); // join block
    // make sure it's a diamond
    if (!tb.pointsOut(jb)) {
      return false;
    }
    Instruction ti = tb.firstRealInstruction();
    Instruction fi = fb.firstRealInstruction();
    // make sure the instructions in target blocks are either both moves
    // or both returns
    if (ti == null || fi == null) {
      return false;
    }
    if (ti.operator() != fi.operator()) {
      return false;
    }
    if (ti.operator != RETURN && ti.operator() != INT_MOVE) {
      return false;
    }
    //
    // WARNING: This code is currently NOT exercised!
    //
    if (ti.operator() == RETURN) {
      // make sure each of the target blocks contains only one instruction
      if (ti != tb.lastRealInstruction()) {
        return false;
      }
      if (fi != fb.lastRealInstruction()) {
        return false;
      }
      Operand tr = Return.getVal(ti);
      Operand fr = Return.getVal(fi);
      // make sure we're returning constants
      if (!(tr instanceof IntConstantOperand) || !(fr instanceof IntConstantOperand)) {
        return false;
      }
      int tv = ((IntConstantOperand) tr).value;
      int fv = ((IntConstantOperand) fr).value;
      if (!((tv == 1 && fv == 0) || (tv == 1 && fv == 0))) {
        return false;
      }
      RegisterOperand t = ir.regpool.makeTemp(TypeReference.Boolean);
      // Cases 1) and 2)
      if (tv == 0) {
        condition = condition.flipCode();
      }
      booleanCompareHelper(cb, t, val1.copy(), val2.copy(), condition);
      cb.insertAfter(Return.create(RETURN, t.copyD2U()));
    } else { // (ti.operator() == INT_MOVE)
      // make sure each of the target blocks only does the move
      if (ti != tb.lastRealInstruction() && ti.nextInstructionInCodeOrder().operator() != GOTO) {
        return false;
      }
      if (fi != fb.lastRealInstruction() && fi.nextInstructionInCodeOrder().operator() != GOTO) {
        return false;
      }
      RegisterOperand t = Move.getResult(ti);
      // make sure both moves are to the same register
      if (t.getRegister() != Move.getResult(fi).getRegister()) {
        return false;
      }
      Operand tr = Move.getVal(ti);
      Operand fr = Move.getVal(fi);
      // make sure we're assigning constants
      if (!(tr instanceof IntConstantOperand) || !(fr instanceof IntConstantOperand)) {
        return false;
      }
      int tv = ((IntConstantOperand) tr).value;
      int fv = ((IntConstantOperand) fr).value;
      if (!((tv == 1 && fv == 0) || (tv == 0 && fv == 1))) {
        return false;
      }
      // Cases 3) and 4)
      if (tv == 0) {
        condition = condition.flipCode();
      }
      booleanCompareHelper(cb, t.copyRO(), val1.copy(), val2.copy(), condition);
      Instruction next = cb.nextInstructionInCodeOrder();
      if (next.operator() == GOTO) {
        Goto.setTarget(next, jb.makeJumpTarget());
      } else {
        cb.insertAfter(jb.makeGOTO());
      }
    }
    // fixup CFG
    bb.deleteOut(tb);
    bb.deleteOut(fb);
    bb.insertOut(jb); // Note: if we processed returns,
    // jb is the exit node.
    return true;
  }
  /**
   * @param ir the IR to expand
   * @return return value is garbage for IA32
   */
  public static int expand(IR ir) {
    PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet().asIA32();
    MachineCodeOffsets mcOffsets = ir.MIRInfo.mcOffsets;

    for (Instruction next, p = ir.firstInstructionInCodeOrder(); p != null; p = next) {
      next = p.nextInstructionInCodeOrder();
      mcOffsets.setMachineCodeOffset(p, -1);

      switch (p.getOpcode()) {
        case IA32_MOVAPS_opcode:
          // a reg-reg move turned into a memory move where we can't guarantee alignment
          if (MIR_Move.getResult(p).isMemory() || MIR_Move.getValue(p).isMemory()) {
            MIR_Move.mutate(p, IA32_MOVSS, MIR_Move.getClearResult(p), MIR_Move.getClearValue(p));
          }
          break;

        case IA32_MOVAPD_opcode:
          // a reg-reg move turned into a memory move where we can't guarantee alignment
          if (MIR_Move.getResult(p).isMemory() || MIR_Move.getValue(p).isMemory()) {
            MIR_Move.mutate(p, IA32_MOVSD, MIR_Move.getClearResult(p), MIR_Move.getClearValue(p));
          }
          break;

        case IA32_TEST_opcode:
          // don't bother telling rest of compiler that memory operand
          // must be first; we can just commute it here.
          if (MIR_Test.getVal2(p).isMemory()) {
            Operand tmp = MIR_Test.getClearVal1(p);
            MIR_Test.setVal1(p, MIR_Test.getClearVal2(p));
            MIR_Test.setVal2(p, tmp);
          }
          break;

        case NULL_CHECK_opcode:
          {
            // mutate this into a TRAPIF, and then fall through to the the
            // TRAP_IF case.
            Operand ref = NullCheck.getRef(p);
            MIR_TrapIf.mutate(
                p,
                IA32_TRAPIF,
                null,
                ref.copy(),
                IC(0),
                IA32ConditionOperand.EQ(),
                TrapCodeOperand.NullPtr());
          }
          // There is no break statement here on purpose!
        case IA32_TRAPIF_opcode:
          {
            // split the basic block right before the IA32_TRAPIF
            BasicBlock thisBlock = p.getBasicBlock();
            BasicBlock trap = thisBlock.createSubBlock(p.getBytecodeIndex(), ir, 0f);
            thisBlock.insertOut(trap);
            BasicBlock nextBlock = thisBlock.splitNodeWithLinksAt(p, ir);
            thisBlock.insertOut(trap);
            TrapCodeOperand tc = MIR_TrapIf.getClearTrapCode(p);
            p.remove();
            mcOffsets.setMachineCodeOffset(nextBlock.firstInstruction(), -1);
            // add code to thisBlock to conditionally jump to trap
            Instruction cmp =
                MIR_Compare.create(IA32_CMP, MIR_TrapIf.getVal1(p), MIR_TrapIf.getVal2(p));
            if (p.isMarkedAsPEI()) {
              // The trap if was explictly marked, which means that it has
              // a memory operand into which we've folded a null check.
              // Actually need a GC map for both the compare and the INT.
              cmp.markAsPEI();
              cmp.copyPosition(p);
              ir.MIRInfo.gcIRMap.insertTwin(p, cmp);
            }
            thisBlock.appendInstruction(cmp);
            thisBlock.appendInstruction(
                MIR_CondBranch.create(
                    IA32_JCC, MIR_TrapIf.getCond(p), trap.makeJumpTarget(), null));

            // add block at end to hold trap instruction, and
            // insert trap sequence
            ir.cfg.addLastInCodeOrder(trap);
            if (tc.isArrayBounds()) {
              // attempt to store index expression in processor object for
              // C trap handler
              Operand index = MIR_TrapIf.getVal2(p);
              if (!(index instanceof RegisterOperand || index instanceof IntConstantOperand)) {
                index = IC(0xdeadbeef); // index was spilled, and
                // we can't get it back here.
              }
              MemoryOperand mo =
                  MemoryOperand.BD(
                      ir.regpool.makeTROp(),
                      ArchEntrypoints.arrayIndexTrapParamField.getOffset(),
                      (byte) 4,
                      null,
                      null);
              trap.appendInstruction(MIR_Move.create(IA32_MOV, mo, index.copy()));
            }
            // NOTE: must make p the trap instruction: it is the GC point!
            // IMPORTANT: must also inform the GCMap that the instruction has
            // been moved!!!
            trap.appendInstruction(MIR_Trap.mutate(p, IA32_INT, null, tc));
            ir.MIRInfo.gcIRMap.moveToEnd(p);

            if (tc.isStackOverflow()) {
              // only stackoverflow traps resume at next instruction.
              trap.appendInstruction(MIR_Branch.create(IA32_JMP, nextBlock.makeJumpTarget()));
            }
          }
          break;

        case IA32_FMOV_ENDING_LIVE_RANGE_opcode:
          {
            Operand result = MIR_Move.getResult(p);
            Operand value = MIR_Move.getValue(p);
            if (result.isRegister() && value.isRegister()) {
              if (result.similar(value)) {
                // eliminate useless move
                p.remove();
              } else {
                int i = PhysicalRegisterSet.getFPRIndex(result.asRegister().getRegister());
                int j = PhysicalRegisterSet.getFPRIndex(value.asRegister().getRegister());
                if (i == 0) {
                  MIR_XChng.mutate(p, IA32_FXCH, result, value);
                } else if (j == 0) {
                  MIR_XChng.mutate(p, IA32_FXCH, value, result);
                } else {
                  expandFmov(p, phys);
                }
              }
            } else {
              expandFmov(p, phys);
            }
            break;
          }

        case DUMMY_DEF_opcode:
        case DUMMY_USE_opcode:
        case REQUIRE_ESP_opcode:
        case ADVISE_ESP_opcode:
          p.remove();
          break;

        case IA32_FMOV_opcode:
          expandFmov(p, phys);
          break;

        case IA32_MOV_opcode:
          // Replace result = IA32_MOV 0 with result = IA32_XOR result, result
          if (MIR_Move.getResult(p).isRegister()
              && MIR_Move.getValue(p).isIntConstant()
              && MIR_Move.getValue(p).asIntConstant().value == 0) {
            // Calculate what flags are defined in coming instructions before a use of a flag or
            // BBend
            Instruction x = next;
            int futureDefs = 0;
            while (!BBend.conforms(x) && !PhysicalDefUse.usesEFLAGS(x.operator())) {
              futureDefs |= x.operator().implicitDefs;
              x = x.nextInstructionInCodeOrder();
            }
            // If the flags will be destroyed prior to use or we reached the end of the basic block
            if (BBend.conforms(x)
                || (futureDefs & PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF)
                    == PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) {
              Operand result = MIR_Move.getClearResult(p);
              MIR_BinaryAcc.mutate(p, IA32_XOR, result, result.copy());
            }
          }
          break;

        case IA32_SET__B_opcode:
          // Replace <cmp>, set__b, movzx__b with xor, <cmp>, set__b
          if (MIR_Set.getResult(p).isRegister()
              && MIR_Unary.conforms(next)
              && (next.operator() == IA32_MOVZX__B)
              && MIR_Unary.getResult(next).isRegister()
              && MIR_Unary.getVal(next).similar(MIR_Unary.getResult(next))
              && MIR_Unary.getVal(next).similar(MIR_Set.getResult(p))) {
            // Find instruction in this basic block that defines flags
            Instruction x = p.prevInstructionInCodeOrder();
            Operand result = MIR_Unary.getResult(next);
            boolean foundCmp = false;
            outer:
            while (!Label.conforms(x)) {
              Enumeration<Operand> e = x.getUses();
              while (e.hasMoreElements()) {
                // We can't use an xor to clear the register if that register is
                // used by the <cmp> or intervening instruction
                if (e.nextElement().similar(result)) {
                  break outer;
                }
              }
              if (PhysicalDefUse.definesEFLAGS(x.operator())
                  && !PhysicalDefUse.usesEFLAGS(x.operator())) {
                // we found a <cmp> that doesn't use the result or the flags
                // that would be clobbered by the xor
                foundCmp = true;
                break outer;
              }
              x = x.prevInstructionInCodeOrder();
            }
            if (foundCmp) {
              // We found the <cmp>, mutate the movzx__b into an xor and insert it before the <cmp>
              next.remove();
              MIR_BinaryAcc.mutate(next, IA32_XOR, result, MIR_Unary.getVal(next));
              x.insertBefore(next);
              // get ready for the next instruction
              next = p.nextInstructionInCodeOrder();
            }
          }
          break;

        case IA32_LEA_opcode:
          {
            // Sometimes we're over eager in BURS in using LEAs and after register
            // allocation we can simplify to the accumulate form
            // replace reg1 = LEA [reg1 + reg2] with reg1 = reg1 + reg2
            // replace reg1 = LEA [reg1 + c1] with reg1 = reg1 + c1
            // replace reg1 = LEA [reg1 << c1] with reg1 = reg1 << c1
            MemoryOperand value = MIR_Lea.getValue(p);
            RegisterOperand result = MIR_Lea.getResult(p);
            if ((value.base != null && value.base.getRegister() == result.getRegister())
                || (value.index != null && value.index.getRegister() == result.getRegister())) {
              // Calculate what flags are defined in coming instructions before a use of a flag or
              // BBend
              Instruction x = next;
              int futureDefs = 0;
              while (!BBend.conforms(x) && !PhysicalDefUse.usesEFLAGS(x.operator())) {
                futureDefs |= x.operator().implicitDefs;
                x = x.nextInstructionInCodeOrder();
              }
              // If the flags will be destroyed prior to use or we reached the end of the basic
              // block
              if (BBend.conforms(x)
                  || (futureDefs & PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF)
                      == PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) {
                if (value.base != null
                    && value.index != null
                    && value.index.getRegister() == result.getRegister()
                    && value.disp.isZero()
                    && value.scale == 0) {
                  // reg1 = lea [base + reg1] -> add reg1, base
                  MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.base);
                } else if (value.base != null
                    && value.base.getRegister() == result.getRegister()
                    && value.index != null
                    && value.disp.isZero()
                    && value.scale == 0) {
                  // reg1 = lea [reg1 + index] -> add reg1, index
                  MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.index);
                } else if (value.base != null
                    && value.base.getRegister() == result.getRegister()
                    && value.index == null) {
                  if (VM.VerifyAssertions) VM._assert(fits(value.disp, 32));
                  // reg1 = lea [reg1 + disp] -> add reg1, disp
                  MIR_BinaryAcc.mutate(p, IA32_ADD, result, IC(value.disp.toInt()));
                } else if (value.base == null
                    && value.index != null
                    && value.index.getRegister() == result.getRegister()
                    && value.scale == 0) {
                  if (VM.VerifyAssertions) VM._assert(fits(value.disp, 32));
                  // reg1 = lea [reg1 + disp] -> add reg1, disp
                  MIR_BinaryAcc.mutate(p, IA32_ADD, result, IC(value.disp.toInt()));
                } else if (value.base == null
                    && value.index != null
                    && value.index.getRegister() == result.getRegister()
                    && value.disp.isZero()) {
                  // reg1 = lea [reg1 << scale] -> shl reg1, scale
                  if (value.scale == 0) {
                    p.remove();
                  } else if (value.scale == 1) {
                    MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.index);
                  } else {
                    MIR_BinaryAcc.mutate(p, IA32_SHL, result, IC(value.scale));
                  }
                }
              }
            }
          }
          break;

        case IA32_FCLEAR_opcode:
          expandFClear(p, ir);
          break;

        case IA32_JCC2_opcode:
          p.insertBefore(
              MIR_CondBranch.create(
                  IA32_JCC,
                  MIR_CondBranch2.getCond1(p),
                  MIR_CondBranch2.getTarget1(p),
                  MIR_CondBranch2.getBranchProfile1(p)));
          MIR_CondBranch.mutate(
              p,
              IA32_JCC,
              MIR_CondBranch2.getCond2(p),
              MIR_CondBranch2.getTarget2(p),
              MIR_CondBranch2.getBranchProfile2(p));
          break;

        case CALL_SAVE_VOLATILE_opcode:
          p.changeOperatorTo(IA32_CALL);
          break;

        case IA32_LOCK_CMPXCHG_opcode:
          p.insertBefore(MIR_Empty.create(IA32_LOCK));
          p.changeOperatorTo(IA32_CMPXCHG);
          break;

        case IA32_LOCK_CMPXCHG8B_opcode:
          p.insertBefore(MIR_Empty.create(IA32_LOCK));
          p.changeOperatorTo(IA32_CMPXCHG8B);
          break;

        case YIELDPOINT_PROLOGUE_opcode:
          expandYieldpoint(
              p, ir, Entrypoints.optThreadSwitchFromPrologueMethod, IA32ConditionOperand.NE());
          break;

        case YIELDPOINT_EPILOGUE_opcode:
          expandYieldpoint(
              p, ir, Entrypoints.optThreadSwitchFromEpilogueMethod, IA32ConditionOperand.NE());
          break;

        case YIELDPOINT_BACKEDGE_opcode:
          expandYieldpoint(
              p, ir, Entrypoints.optThreadSwitchFromBackedgeMethod, IA32ConditionOperand.GT());
          break;

        case YIELDPOINT_OSR_opcode:
          // must yield, does not check threadSwitch request
          expandUnconditionalYieldpoint(p, ir, Entrypoints.optThreadSwitchFromOsrOptMethod);
          break;
      }
    }
    return 0;
  }