/** * Insert code before a return instruction to restore the volatile and volatile registers. * * @param inst the return instruction */ private void restoreVolatileRegisters(Instruction inst) { GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); // Restore every GPR int i = 0; for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) { Register r = e.nextElement(); int location = saveVolatileGPRLocation[i]; Operand M = new StackLocationOperand(true, -location, WORDSIZE); inst.insertBefore( MIR_Move.create(IA32_MOV, new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD), M)); } }
/** * Insert code before a return instruction to restore the nonvolatile registers. * * @param inst the return instruction */ private void restoreNonVolatiles(Instruction inst) { GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); int nNonvolatileGPRS = ir.compiledMethod.getNumberOfNonvolatileGPRs(); int n = nNonvolatileGPRS - 1; for (Enumeration<Register> e = phys.enumerateNonvolatileGPRsBackwards(); e.hasMoreElements() && n >= 0; n--) { Register nv = e.nextElement(); int offset = getNonvolatileGPROffset(n); Operand M = new StackLocationOperand(true, -offset, WORDSIZE); inst.insertBefore( MIR_Move.create(IA32_MOV, new RegisterOperand(nv, PRIMITIVE_TYPE_FOR_WORD), M)); } }
/** * Insert code into the epilogue to restore the floating point state. * * @param inst the return instruction after the epilogue. */ private void restoreFloatingPointState(Instruction inst) { if (SSE2_FULL) { GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); for (int i = 0; i < 8; i++) { inst.insertBefore( MIR_Move.create( IA32_MOVQ, new RegisterOperand(phys.getFPR(i), TypeReference.Double), new StackLocationOperand( true, -fsaveLocation + (i * BYTES_IN_DOUBLE), BYTES_IN_DOUBLE))); } } else { Operand M = new StackLocationOperand(true, -fsaveLocation, 4); inst.insertBefore(MIR_FSave.create(IA32_FRSTOR, M)); } }
/** * Insert an explicit stack overflow check in the prologue <em>before</em> buying the stack frame. * SIDE EFFECT: mutates the plg into a trap instruction. We need to mutate so that the trap * instruction is in the GC map data structures. * * @param plg the prologue instruction */ private void insertBigFrameStackOverflowCheck(Instruction plg) { if (!ir.method.isInterruptible()) { plg.remove(); return; } if (ir.compiledMethod.isSaveVolatile()) { return; } PhysicalRegisterSet phys = (PhysicalRegisterSet) ir.regpool.getPhysicalRegisterSet(); Register ESP = phys.getESP(); Register ECX = phys.getECX(); // ECX := active Thread Stack Limit MemoryOperand M = MemoryOperand.BD( ir.regpool.makeTROp(), Entrypoints.stackLimitField.getOffset(), (byte) WORDSIZE, null, null); plg.insertBefore( MIR_Move.create(IA32_MOV, new RegisterOperand((ECX), PRIMITIVE_TYPE_FOR_WORD), M)); // ECX += frame Size int frameSize = getFrameFixedSize(); plg.insertBefore( MIR_BinaryAcc.create( IA32_ADD, new RegisterOperand(ECX, PRIMITIVE_TYPE_FOR_WORD), VM.BuildFor32Addr ? IC(frameSize) : LC(frameSize))); // Trap if ESP <= ECX MIR_TrapIf.mutate( plg, IA32_TRAPIF, null, new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), new RegisterOperand(ECX, PRIMITIVE_TYPE_FOR_WORD), IA32ConditionOperand.LE(), TrapCodeOperand.StackOverflow()); }
@Override public void insertUnspillBefore(Instruction s, Register r, Register type, int location) { Operator move = getMoveOperator(type); byte size = getSizeOfType(type); RegisterOperand rOp; if (type.isFloat()) { rOp = F(r); } else if (type.isDouble()) { rOp = D(r); } else { if (VM.BuildFor64Addr && type.isInteger()) { rOp = new RegisterOperand(r, TypeReference.Int); } else { rOp = new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD); } } StackLocationOperand spillLoc = new StackLocationOperand(true, -location, size); Instruction unspillOp = MIR_Move.create(move, rOp, spillLoc); if (VERBOSE_DEBUG) { System.out.println("INSERT_UNSPILL_BEFORE: " + "Inserting " + unspillOp + " before " + s); } s.insertBefore(unspillOp); }
/** * Insert the prologue for a normal method. * * <p>Assume we are inserting the prologue for method B called from method A. * * <ul> * <li>Perform a stack overflow check. * <li>Store a back pointer to A's frame * <li>Store B's compiled method id * <li>Adjust frame pointer to point to B's frame * <li>Save any used non-volatile registers * </ul> */ @Override public void insertNormalPrologue() { PhysicalRegisterSet phys = (PhysicalRegisterSet) ir.regpool.getPhysicalRegisterSet(); Register ESP = phys.getESP(); MemoryOperand fpHome = MemoryOperand.BD( ir.regpool.makeTROp(), ArchEntrypoints.framePointerField.getOffset(), (byte) WORDSIZE, null, null); // the prologue instruction Instruction plg = ir.firstInstructionInCodeOrder().nextInstructionInCodeOrder(); // inst is the instruction immediately after the IR_PROLOGUE // instruction Instruction inst = plg.nextInstructionInCodeOrder(); int frameFixedSize = getFrameFixedSize(); ir.compiledMethod.setFrameFixedSize(frameFixedSize); // I. Buy a stackframe (including overflow check) // NOTE: We play a little game here. If the frame we are buying is // very small (less than 256) then we can be sloppy with the // stackoverflow check and actually allocate the frame in the guard // region. We'll notice when this frame calls someone and take the // stackoverflow in the callee. We can't do this if the frame is too big, // because growing the stack in the callee and/or handling a hardware trap // in this frame will require most of the guard region to complete. // See libvm.C. if (frameFixedSize >= 256) { // 1. Insert Stack overflow check. insertBigFrameStackOverflowCheck(plg); // 2. Save caller's frame pointer inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome)); // 3. Set my frame pointer to current value of stackpointer inst.insertBefore( MIR_Move.create( IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD))); // 4. Store my compiled method id int cmid = ir.compiledMethod.getId(); inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, VM.BuildFor32Addr ? IC(cmid) : LC(cmid))); } else { // 1. Save caller's frame pointer inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome)); // 2. Set my frame pointer to current value of stackpointer inst.insertBefore( MIR_Move.create( IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD))); // 3. Store my compiled method id int cmid = ir.compiledMethod.getId(); inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, VM.BuildFor32Addr ? IC(cmid) : LC(cmid))); // 4. Insert Stack overflow check. insertNormalStackOverflowCheck(plg); } // II. Save any used volatile and non-volatile registers if (ir.compiledMethod.isSaveVolatile()) { saveVolatiles(inst); saveFloatingPointState(inst); } saveNonVolatiles(inst); }
/** * expand an FMOV pseudo-insruction. * * @param s the instruction to expand * @param phys controlling physical register set */ private static void expandFmov(Instruction s, PhysicalRegisterSet phys) { Operand result = MIR_Move.getResult(s); Operand value = MIR_Move.getValue(s); if (result.isRegister() && value.isRegister()) { if (result.similar(value)) { // eliminate useless move s.remove(); } else { int i = PhysicalRegisterSet.getFPRIndex(result.asRegister().getRegister()); int j = PhysicalRegisterSet.getFPRIndex(value.asRegister().getRegister()); if (j == 0) { // We have FMOV Fi, F0 // Expand as: // FST F(i) (copy F0 to F(i)) MIR_Move.mutate(s, IA32_FST, D(phys.getFPR(i)), D(phys.getFPR(0))); } else { // We have FMOV Fi, Fj // Expand as: // FLD Fj (push Fj on FP stack). // FSTP F(i+1) (copy F0 to F(i+1) and then pop register stack) s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value)); MIR_Move.mutate(s, IA32_FSTP, D(phys.getFPR(i + 1)), D(phys.getFPR(0))); } } } else if (value instanceof MemoryOperand) { if (result instanceof MemoryOperand) { // We have FMOV M1, M2 // Expand as: // FLD M1 (push M1 on FP stack). // FSTP M2 (copy F0 to M2 and pop register stack) s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value)); MIR_Move.mutate(s, IA32_FSTP, result, D(phys.getFPR(0))); } else { // We have FMOV Fi, M // Expand as: // FLD M (push M on FP stack). // FSTP F(i+1) (copy F0 to F(i+1) and pop register stack) if (VM.VerifyAssertions) VM._assert(result.isRegister()); int i = PhysicalRegisterSet.getFPRIndex(result.asRegister().getRegister()); s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value)); MIR_Move.mutate(s, IA32_FSTP, D(phys.getFPR(i + 1)), D(phys.getFPR(0))); } } else { // We have FMOV M, Fi if (VM.VerifyAssertions) VM._assert(value.isRegister()); if (VM.VerifyAssertions) { VM._assert(result instanceof MemoryOperand); } int i = PhysicalRegisterSet.getFPRIndex(value.asRegister().getRegister()); if (i != 0) { // Expand as: // FLD Fi (push Fi on FP stack). // FSTP M (store F0 in M and pop register stack); s.insertBefore(MIR_Move.create(IA32_FLD, D(phys.getFPR(0)), value)); MIR_Move.mutate(s, IA32_FSTP, result, D(phys.getFPR(0))); } else { // Expand as: // FST M (store F0 in M); MIR_Move.mutate(s, IA32_FST, result, value); } } }
/** * @param ir the IR to expand * @return return value is garbage for IA32 */ public static int expand(IR ir) { PhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet().asIA32(); MachineCodeOffsets mcOffsets = ir.MIRInfo.mcOffsets; for (Instruction next, p = ir.firstInstructionInCodeOrder(); p != null; p = next) { next = p.nextInstructionInCodeOrder(); mcOffsets.setMachineCodeOffset(p, -1); switch (p.getOpcode()) { case IA32_MOVAPS_opcode: // a reg-reg move turned into a memory move where we can't guarantee alignment if (MIR_Move.getResult(p).isMemory() || MIR_Move.getValue(p).isMemory()) { MIR_Move.mutate(p, IA32_MOVSS, MIR_Move.getClearResult(p), MIR_Move.getClearValue(p)); } break; case IA32_MOVAPD_opcode: // a reg-reg move turned into a memory move where we can't guarantee alignment if (MIR_Move.getResult(p).isMemory() || MIR_Move.getValue(p).isMemory()) { MIR_Move.mutate(p, IA32_MOVSD, MIR_Move.getClearResult(p), MIR_Move.getClearValue(p)); } break; case IA32_TEST_opcode: // don't bother telling rest of compiler that memory operand // must be first; we can just commute it here. if (MIR_Test.getVal2(p).isMemory()) { Operand tmp = MIR_Test.getClearVal1(p); MIR_Test.setVal1(p, MIR_Test.getClearVal2(p)); MIR_Test.setVal2(p, tmp); } break; case NULL_CHECK_opcode: { // mutate this into a TRAPIF, and then fall through to the the // TRAP_IF case. Operand ref = NullCheck.getRef(p); MIR_TrapIf.mutate( p, IA32_TRAPIF, null, ref.copy(), IC(0), IA32ConditionOperand.EQ(), TrapCodeOperand.NullPtr()); } // There is no break statement here on purpose! case IA32_TRAPIF_opcode: { // split the basic block right before the IA32_TRAPIF BasicBlock thisBlock = p.getBasicBlock(); BasicBlock trap = thisBlock.createSubBlock(p.getBytecodeIndex(), ir, 0f); thisBlock.insertOut(trap); BasicBlock nextBlock = thisBlock.splitNodeWithLinksAt(p, ir); thisBlock.insertOut(trap); TrapCodeOperand tc = MIR_TrapIf.getClearTrapCode(p); p.remove(); mcOffsets.setMachineCodeOffset(nextBlock.firstInstruction(), -1); // add code to thisBlock to conditionally jump to trap Instruction cmp = MIR_Compare.create(IA32_CMP, MIR_TrapIf.getVal1(p), MIR_TrapIf.getVal2(p)); if (p.isMarkedAsPEI()) { // The trap if was explictly marked, which means that it has // a memory operand into which we've folded a null check. // Actually need a GC map for both the compare and the INT. cmp.markAsPEI(); cmp.copyPosition(p); ir.MIRInfo.gcIRMap.insertTwin(p, cmp); } thisBlock.appendInstruction(cmp); thisBlock.appendInstruction( MIR_CondBranch.create( IA32_JCC, MIR_TrapIf.getCond(p), trap.makeJumpTarget(), null)); // add block at end to hold trap instruction, and // insert trap sequence ir.cfg.addLastInCodeOrder(trap); if (tc.isArrayBounds()) { // attempt to store index expression in processor object for // C trap handler Operand index = MIR_TrapIf.getVal2(p); if (!(index instanceof RegisterOperand || index instanceof IntConstantOperand)) { index = IC(0xdeadbeef); // index was spilled, and // we can't get it back here. } MemoryOperand mo = MemoryOperand.BD( ir.regpool.makeTROp(), ArchEntrypoints.arrayIndexTrapParamField.getOffset(), (byte) 4, null, null); trap.appendInstruction(MIR_Move.create(IA32_MOV, mo, index.copy())); } // NOTE: must make p the trap instruction: it is the GC point! // IMPORTANT: must also inform the GCMap that the instruction has // been moved!!! trap.appendInstruction(MIR_Trap.mutate(p, IA32_INT, null, tc)); ir.MIRInfo.gcIRMap.moveToEnd(p); if (tc.isStackOverflow()) { // only stackoverflow traps resume at next instruction. trap.appendInstruction(MIR_Branch.create(IA32_JMP, nextBlock.makeJumpTarget())); } } break; case IA32_FMOV_ENDING_LIVE_RANGE_opcode: { Operand result = MIR_Move.getResult(p); Operand value = MIR_Move.getValue(p); if (result.isRegister() && value.isRegister()) { if (result.similar(value)) { // eliminate useless move p.remove(); } else { int i = PhysicalRegisterSet.getFPRIndex(result.asRegister().getRegister()); int j = PhysicalRegisterSet.getFPRIndex(value.asRegister().getRegister()); if (i == 0) { MIR_XChng.mutate(p, IA32_FXCH, result, value); } else if (j == 0) { MIR_XChng.mutate(p, IA32_FXCH, value, result); } else { expandFmov(p, phys); } } } else { expandFmov(p, phys); } break; } case DUMMY_DEF_opcode: case DUMMY_USE_opcode: case REQUIRE_ESP_opcode: case ADVISE_ESP_opcode: p.remove(); break; case IA32_FMOV_opcode: expandFmov(p, phys); break; case IA32_MOV_opcode: // Replace result = IA32_MOV 0 with result = IA32_XOR result, result if (MIR_Move.getResult(p).isRegister() && MIR_Move.getValue(p).isIntConstant() && MIR_Move.getValue(p).asIntConstant().value == 0) { // Calculate what flags are defined in coming instructions before a use of a flag or // BBend Instruction x = next; int futureDefs = 0; while (!BBend.conforms(x) && !PhysicalDefUse.usesEFLAGS(x.operator())) { futureDefs |= x.operator().implicitDefs; x = x.nextInstructionInCodeOrder(); } // If the flags will be destroyed prior to use or we reached the end of the basic block if (BBend.conforms(x) || (futureDefs & PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) == PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) { Operand result = MIR_Move.getClearResult(p); MIR_BinaryAcc.mutate(p, IA32_XOR, result, result.copy()); } } break; case IA32_SET__B_opcode: // Replace <cmp>, set__b, movzx__b with xor, <cmp>, set__b if (MIR_Set.getResult(p).isRegister() && MIR_Unary.conforms(next) && (next.operator() == IA32_MOVZX__B) && MIR_Unary.getResult(next).isRegister() && MIR_Unary.getVal(next).similar(MIR_Unary.getResult(next)) && MIR_Unary.getVal(next).similar(MIR_Set.getResult(p))) { // Find instruction in this basic block that defines flags Instruction x = p.prevInstructionInCodeOrder(); Operand result = MIR_Unary.getResult(next); boolean foundCmp = false; outer: while (!Label.conforms(x)) { Enumeration<Operand> e = x.getUses(); while (e.hasMoreElements()) { // We can't use an xor to clear the register if that register is // used by the <cmp> or intervening instruction if (e.nextElement().similar(result)) { break outer; } } if (PhysicalDefUse.definesEFLAGS(x.operator()) && !PhysicalDefUse.usesEFLAGS(x.operator())) { // we found a <cmp> that doesn't use the result or the flags // that would be clobbered by the xor foundCmp = true; break outer; } x = x.prevInstructionInCodeOrder(); } if (foundCmp) { // We found the <cmp>, mutate the movzx__b into an xor and insert it before the <cmp> next.remove(); MIR_BinaryAcc.mutate(next, IA32_XOR, result, MIR_Unary.getVal(next)); x.insertBefore(next); // get ready for the next instruction next = p.nextInstructionInCodeOrder(); } } break; case IA32_LEA_opcode: { // Sometimes we're over eager in BURS in using LEAs and after register // allocation we can simplify to the accumulate form // replace reg1 = LEA [reg1 + reg2] with reg1 = reg1 + reg2 // replace reg1 = LEA [reg1 + c1] with reg1 = reg1 + c1 // replace reg1 = LEA [reg1 << c1] with reg1 = reg1 << c1 MemoryOperand value = MIR_Lea.getValue(p); RegisterOperand result = MIR_Lea.getResult(p); if ((value.base != null && value.base.getRegister() == result.getRegister()) || (value.index != null && value.index.getRegister() == result.getRegister())) { // Calculate what flags are defined in coming instructions before a use of a flag or // BBend Instruction x = next; int futureDefs = 0; while (!BBend.conforms(x) && !PhysicalDefUse.usesEFLAGS(x.operator())) { futureDefs |= x.operator().implicitDefs; x = x.nextInstructionInCodeOrder(); } // If the flags will be destroyed prior to use or we reached the end of the basic // block if (BBend.conforms(x) || (futureDefs & PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) == PhysicalDefUse.maskAF_CF_OF_PF_SF_ZF) { if (value.base != null && value.index != null && value.index.getRegister() == result.getRegister() && value.disp.isZero() && value.scale == 0) { // reg1 = lea [base + reg1] -> add reg1, base MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.base); } else if (value.base != null && value.base.getRegister() == result.getRegister() && value.index != null && value.disp.isZero() && value.scale == 0) { // reg1 = lea [reg1 + index] -> add reg1, index MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.index); } else if (value.base != null && value.base.getRegister() == result.getRegister() && value.index == null) { if (VM.VerifyAssertions) VM._assert(fits(value.disp, 32)); // reg1 = lea [reg1 + disp] -> add reg1, disp MIR_BinaryAcc.mutate(p, IA32_ADD, result, IC(value.disp.toInt())); } else if (value.base == null && value.index != null && value.index.getRegister() == result.getRegister() && value.scale == 0) { if (VM.VerifyAssertions) VM._assert(fits(value.disp, 32)); // reg1 = lea [reg1 + disp] -> add reg1, disp MIR_BinaryAcc.mutate(p, IA32_ADD, result, IC(value.disp.toInt())); } else if (value.base == null && value.index != null && value.index.getRegister() == result.getRegister() && value.disp.isZero()) { // reg1 = lea [reg1 << scale] -> shl reg1, scale if (value.scale == 0) { p.remove(); } else if (value.scale == 1) { MIR_BinaryAcc.mutate(p, IA32_ADD, result, value.index); } else { MIR_BinaryAcc.mutate(p, IA32_SHL, result, IC(value.scale)); } } } } } break; case IA32_FCLEAR_opcode: expandFClear(p, ir); break; case IA32_JCC2_opcode: p.insertBefore( MIR_CondBranch.create( IA32_JCC, MIR_CondBranch2.getCond1(p), MIR_CondBranch2.getTarget1(p), MIR_CondBranch2.getBranchProfile1(p))); MIR_CondBranch.mutate( p, IA32_JCC, MIR_CondBranch2.getCond2(p), MIR_CondBranch2.getTarget2(p), MIR_CondBranch2.getBranchProfile2(p)); break; case CALL_SAVE_VOLATILE_opcode: p.changeOperatorTo(IA32_CALL); break; case IA32_LOCK_CMPXCHG_opcode: p.insertBefore(MIR_Empty.create(IA32_LOCK)); p.changeOperatorTo(IA32_CMPXCHG); break; case IA32_LOCK_CMPXCHG8B_opcode: p.insertBefore(MIR_Empty.create(IA32_LOCK)); p.changeOperatorTo(IA32_CMPXCHG8B); break; case YIELDPOINT_PROLOGUE_opcode: expandYieldpoint( p, ir, Entrypoints.optThreadSwitchFromPrologueMethod, IA32ConditionOperand.NE()); break; case YIELDPOINT_EPILOGUE_opcode: expandYieldpoint( p, ir, Entrypoints.optThreadSwitchFromEpilogueMethod, IA32ConditionOperand.NE()); break; case YIELDPOINT_BACKEDGE_opcode: expandYieldpoint( p, ir, Entrypoints.optThreadSwitchFromBackedgeMethod, IA32ConditionOperand.GT()); break; case YIELDPOINT_OSR_opcode: // must yield, does not check threadSwitch request expandUnconditionalYieldpoint(p, ir, Entrypoints.optThreadSwitchFromOsrOptMethod); break; } } return 0; }