/** * Runs the instruction list translator. * * @return The translated instruction list */ private InstructionList run() { IArchitectureDescription arch = m_data.getArchitectureDescription(); // don't use register reusing for now (screws up the instruction scheduler) final boolean bUseRegisterRemover = /*true*/ false && arch.hasNonDestructiveOperations(); InstructionList ilIn = bUseRegisterRemover ? new UnneededPseudoRegistersRemover( arch, InstructionList.EInstructionListType.GENERIC, m_setReusedRegisters) .optimize(m_ilIn) : m_ilIn; for (IInstruction instruction : ilIn) translateInstruction(instruction); return m_ilOut; }
/** * Returns an aligned/unaligned move instruction depending on the operand and, if the operand is * an address, on the displacement. The method assumes that any addresses (both base addresses and * indices) are aligned at vector boundaries. * * @param arch The architecture description * @param specDatatype A datatype or <code>null</code> if a generic data type is to be used (if * all SIMD vectors have the same length in bits, <code>null</code> is fine) * @param rgOperands The operands to examine and from which to determine what move instruction to * use * @return The name of the move instruction to use to move the operands <code>rgOperands</code> */ public static TypeBaseIntrinsicEnum getMovFpr( CodeGeneratorSharedObjects data, boolean bIsLoad, Specifier specDatatype, IOperand... rgOperands) { if (data.getOptions().isAlwaysUseNonalignedMoves()) return bIsLoad ? TypeBaseIntrinsicEnum.LOAD_FPR_UNALIGNED : TypeBaseIntrinsicEnum.STORE_FPR_UNALIGNED; int nVectorLength = -1; for (IOperand op : rgOperands) { if (op instanceof IOperand.Address) { // assume that base addresses are aligned at vector boundaries // so if the displacement is not a multiple of the vector length, we need to do an // unaligned load if (nVectorLength == -1) { Specifier specType = specDatatype == null ? Globals.BASE_DATATYPES[0] : specDatatype; nVectorLength = data.getArchitectureDescription().getSIMDVectorLength(specType) * ArchitectureDescriptionManager.getTypeSize(specType); } if ((((IOperand.Address) op).getDisplacement() % nVectorLength) != 0) return bIsLoad ? TypeBaseIntrinsicEnum.LOAD_FPR_UNALIGNED : TypeBaseIntrinsicEnum.STORE_FPR_UNALIGNED; } } return bIsLoad ? TypeBaseIntrinsicEnum.LOAD_FPR_ALIGNED : TypeBaseIntrinsicEnum.STORE_FPR_ALIGNED; }
/** * Creates the architecture-specific instructions to implement the generic instruction <code> * instruction</code> for the intrinsic <code>intrinsic</code>. * * @param instruction The generic instruction * @param intrinsic The intrinsic corresponding to <code>instruction</code> * @param rgSourceOps The array of operands of the generic instruction * @param rgDestArgs The array of intrinsic arguments * @param rgPermSourceToDest The argument permutation source → destination (where source is * the generic instruction, destination is the target architecture specific instruction) * @param rgPermDestToSource The argument permutation destination → source (where source is * the generic instruction, destination is the target architecture specific instruction) * @param nOutputArgDestIndex The index of the output argument in the array of intrinsic * arguments, <code>rgDestArgs</code> * @param bIntrinsicHasSharedResult <code>true</code> iff the intrinsic requires that an argument * is a shared in/out */ private void createInstructions( Instruction instruction, Intrinsic intrinsic, IOperand[] rgSourceOps, Argument[] rgDestArgs, int[] rgPermSourceToDest, int[] rgPermDestToSource, int nOutputArgDestIndex, boolean bIntrinsicHasSharedResult) { // maps operands to substitute operands within the actual generated computation instruction Map<IOperand, IOperand> mapSubstitutions = new HashMap<>(); IOperand[] rgDestOps = new IOperand[rgDestArgs.length]; IOperand opSourceOutput = rgSourceOps[rgSourceOps.length - 1]; boolean bHasNonCompatibleResultOperand = false; IOperand opTmpResultOperand = null; if (bIntrinsicHasSharedResult) { // find the operand which, in the intrinsic, is both input and output IOperand opShared = rgSourceOps[rgPermDestToSource[nOutputArgDestIndex]]; // if the respective input and the output arguments are different, move the value of the input // to the result // the result will then be overwritten by the intrinsic if (!opSourceOutput.equals(opShared)) { IOperand opOut = opSourceOutput; boolean bIsOneOfNonSharedInputArgsResult = InstructionListTranslator.getIndexOfNonSharedInputArgsResult(rgSourceOps, opShared) != -1; if (!(opSourceOutput instanceof IOperand.IRegisterOperand) || bIsOneOfNonSharedInputArgsResult) { bHasNonCompatibleResultOperand = true; opTmpResultOperand = new IOperand.PseudoRegister(TypeRegisterType.SIMD); opOut = opTmpResultOperand; } // opOut can replace both opShared (the input operand, which in the architecture-specific // intrinsic // is also an output argument) and opOut (the operand, to which the result is written) mapSubstitutions.put(opShared, opOut); if (!bIsOneOfNonSharedInputArgsResult) mapSubstitutions.put(opSourceOutput, opOut); Instruction instrNewMov = new Instruction( getMovFpr(opShared instanceof IOperand.Address, opShared), opShared, opOut); instrNewMov.setParameterAssignment(instruction.getParameterAssignment()); translateInstruction(instrNewMov); } } // gather operands and issue move instructions for non-compatible operands for (int i = 0; i < rgSourceOps.length; i++) { if (rgPermSourceToDest[i] != UNDEFINED) { boolean bIsResultOperand = i == rgSourceOps.length - 1; IOperand opSubstitute = mapSubstitutions.get(rgSourceOps[i]); if (opSubstitute != null) { // if already a non-compatible result operand has been found, // substitute the corresponding operand with the temporary one rgDestOps[rgPermSourceToDest[i]] = opSubstitute; } else { boolean bIsCompatible = isCompatible(rgSourceOps[i], rgDestArgs[rgPermSourceToDest[i]]); rgDestOps[rgPermSourceToDest[i]] = bIsCompatible ? rgSourceOps[i] : new IOperand.PseudoRegister(TypeRegisterType.SIMD); if (!bIsCompatible) { if (bIsResultOperand) { // this is the result operand // move instruction will be generated after issuing the main instruction bHasNonCompatibleResultOperand = true; opTmpResultOperand = rgDestOps[rgPermSourceToDest[i]]; } else { // mov arg_i, tmp mapSubstitutions.put(rgSourceOps[i], rgDestOps[rgPermSourceToDest[i]]); Instruction instrNewMov = new Instruction( getMovFpr(rgSourceOps[i] instanceof IOperand.Address, rgSourceOps[i]), rgSourceOps[i], rgDestOps[rgPermSourceToDest[i]]); instrNewMov.setParameterAssignment(instruction.getParameterAssignment()); translateInstruction(instrNewMov); } } } } } // add the main instruction //// // if (instruction.getIntrinsicBaseName ().equals (TypeBaseIntrinsicEnum.DIVIDE.value ())) // { // IOperand.PseudoRegister opTmp = new IOperand.PseudoRegister (TypeRegisterType.SIMD); // m_ilOut.addInstruction (new Instruction ("vrcpps", rgDestOps[0], opTmp)); // m_ilOut.addInstruction (new Instruction ("vmulps", rgDestOps[1], opTmp, rgDestOps[2])); // } // else //// String strInstruction = intrinsic.getName(); boolean bIsLoad = intrinsic.getBaseName().equals(TypeBaseIntrinsicEnum.LOAD_FPR_ALIGNED.value()); boolean bIsStore = intrinsic.getBaseName().equals(TypeBaseIntrinsicEnum.STORE_FPR_ALIGNED.value()); if (bIsLoad || bIsStore) { Intrinsic i = m_data .getArchitectureDescription() .getIntrinsic(getMovFpr(bIsLoad, rgDestOps).value(), m_specDatatype); if (i != null) strInstruction = i.getName(); } Instruction instrNew = new Instruction(strInstruction, instruction.getIntrinsic(), rgDestOps); instrNew.setParameterAssignment(instruction.getParameterAssignment()); m_ilOut.addInstruction(instrNew); // add a move-result instruction if needed if (bHasNonCompatibleResultOperand) { // mov tmp, result Instruction instrNewMov = new Instruction( getMovFpr(opTmpResultOperand instanceof IOperand.Address, opTmpResultOperand), opTmpResultOperand, rgSourceOps[rgSourceOps.length - 1]); instrNewMov.setParameterAssignment(instruction.getParameterAssignment()); translateInstruction(instrNewMov); } }
/** * Finds the intrinsic for the instruction <code>instruction</code>. * * @param instruction The instruction for which to find the corresponding intrinsic. * @return The instrinsic corresponding to the instruction <code>instruction</code> */ private Intrinsic getIntrinsicForInstruction(Instruction instruction) { return m_data .getArchitectureDescription() .getIntrinsic(instruction.getInstructionName(), m_specDatatype); }