/**
   * Returns an aligned/unaligned move instruction depending on the operand and, if the operand is
   * an address, on the displacement. The method assumes that any addresses (both base addresses and
   * indices) are aligned at vector boundaries.
   *
   * @param arch The architecture description
   * @param specDatatype A datatype or <code>null</code> if a generic data type is to be used (if
   *     all SIMD vectors have the same length in bits, <code>null</code> is fine)
   * @param rgOperands The operands to examine and from which to determine what move instruction to
   *     use
   * @return The name of the move instruction to use to move the operands <code>rgOperands</code>
   */
  public static TypeBaseIntrinsicEnum getMovFpr(
      CodeGeneratorSharedObjects data,
      boolean bIsLoad,
      Specifier specDatatype,
      IOperand... rgOperands) {
    if (data.getOptions().isAlwaysUseNonalignedMoves())
      return bIsLoad
          ? TypeBaseIntrinsicEnum.LOAD_FPR_UNALIGNED
          : TypeBaseIntrinsicEnum.STORE_FPR_UNALIGNED;

    int nVectorLength = -1;

    for (IOperand op : rgOperands) {
      if (op instanceof IOperand.Address) {
        // assume that base addresses are aligned at vector boundaries
        // so if the displacement is not a multiple of the vector length, we need to do an
        // unaligned load

        if (nVectorLength == -1) {
          Specifier specType = specDatatype == null ? Globals.BASE_DATATYPES[0] : specDatatype;
          nVectorLength =
              data.getArchitectureDescription().getSIMDVectorLength(specType)
                  * ArchitectureDescriptionManager.getTypeSize(specType);
        }

        if ((((IOperand.Address) op).getDisplacement() % nVectorLength) != 0)
          return bIsLoad
              ? TypeBaseIntrinsicEnum.LOAD_FPR_UNALIGNED
              : TypeBaseIntrinsicEnum.STORE_FPR_UNALIGNED;
      }
    }

    return bIsLoad
        ? TypeBaseIntrinsicEnum.LOAD_FPR_ALIGNED
        : TypeBaseIntrinsicEnum.STORE_FPR_ALIGNED;
  }
  /**
   * Runs the instruction list translator.
   *
   * @return The translated instruction list
   */
  private InstructionList run() {
    IArchitectureDescription arch = m_data.getArchitectureDescription();

    // don't use register reusing for now (screws up the instruction scheduler)
    final boolean bUseRegisterRemover = /*true*/ false && arch.hasNonDestructiveOperations();

    InstructionList ilIn =
        bUseRegisterRemover
            ? new UnneededPseudoRegistersRemover(
                    arch, InstructionList.EInstructionListType.GENERIC, m_setReusedRegisters)
                .optimize(m_ilIn)
            : m_ilIn;

    for (IInstruction instruction : ilIn) translateInstruction(instruction);

    return m_ilOut;
  }
  /**
   * Creates the architecture-specific instructions to implement the generic instruction <code>
   * instruction</code> for the intrinsic <code>intrinsic</code>.
   *
   * @param instruction The generic instruction
   * @param intrinsic The intrinsic corresponding to <code>instruction</code>
   * @param rgSourceOps The array of operands of the generic instruction
   * @param rgDestArgs The array of intrinsic arguments
   * @param rgPermSourceToDest The argument permutation source &rarr; destination (where source is
   *     the generic instruction, destination is the target architecture specific instruction)
   * @param rgPermDestToSource The argument permutation destination &rarr; source (where source is
   *     the generic instruction, destination is the target architecture specific instruction)
   * @param nOutputArgDestIndex The index of the output argument in the array of intrinsic
   *     arguments, <code>rgDestArgs</code>
   * @param bIntrinsicHasSharedResult <code>true</code> iff the intrinsic requires that an argument
   *     is a shared in/out
   */
  private void createInstructions(
      Instruction instruction,
      Intrinsic intrinsic,
      IOperand[] rgSourceOps,
      Argument[] rgDestArgs,
      int[] rgPermSourceToDest,
      int[] rgPermDestToSource,
      int nOutputArgDestIndex,
      boolean bIntrinsicHasSharedResult) {
    // maps operands to substitute operands within the actual generated computation instruction
    Map<IOperand, IOperand> mapSubstitutions = new HashMap<>();

    IOperand[] rgDestOps = new IOperand[rgDestArgs.length];
    IOperand opSourceOutput = rgSourceOps[rgSourceOps.length - 1];

    boolean bHasNonCompatibleResultOperand = false;
    IOperand opTmpResultOperand = null;

    if (bIntrinsicHasSharedResult) {
      // find the operand which, in the intrinsic, is both input and output
      IOperand opShared = rgSourceOps[rgPermDestToSource[nOutputArgDestIndex]];

      // if the respective input and the output arguments are different, move the value of the input
      // to the result
      // the result will then be overwritten by the intrinsic
      if (!opSourceOutput.equals(opShared)) {
        IOperand opOut = opSourceOutput;
        boolean bIsOneOfNonSharedInputArgsResult =
            InstructionListTranslator.getIndexOfNonSharedInputArgsResult(rgSourceOps, opShared)
                != -1;
        if (!(opSourceOutput instanceof IOperand.IRegisterOperand)
            || bIsOneOfNonSharedInputArgsResult) {
          bHasNonCompatibleResultOperand = true;
          opTmpResultOperand = new IOperand.PseudoRegister(TypeRegisterType.SIMD);
          opOut = opTmpResultOperand;
        }

        // opOut can replace both opShared (the input operand, which in the architecture-specific
        // intrinsic
        // is also an output argument) and opOut (the operand, to which the result is written)
        mapSubstitutions.put(opShared, opOut);
        if (!bIsOneOfNonSharedInputArgsResult) mapSubstitutions.put(opSourceOutput, opOut);

        Instruction instrNewMov =
            new Instruction(
                getMovFpr(opShared instanceof IOperand.Address, opShared), opShared, opOut);
        instrNewMov.setParameterAssignment(instruction.getParameterAssignment());
        translateInstruction(instrNewMov);
      }
    }

    // gather operands and issue move instructions for non-compatible operands
    for (int i = 0; i < rgSourceOps.length; i++) {
      if (rgPermSourceToDest[i] != UNDEFINED) {
        boolean bIsResultOperand = i == rgSourceOps.length - 1;

        IOperand opSubstitute = mapSubstitutions.get(rgSourceOps[i]);
        if (opSubstitute != null) {
          // if already a non-compatible result operand has been found,
          // substitute the corresponding operand with the temporary one

          rgDestOps[rgPermSourceToDest[i]] = opSubstitute;
        } else {
          boolean bIsCompatible = isCompatible(rgSourceOps[i], rgDestArgs[rgPermSourceToDest[i]]);
          rgDestOps[rgPermSourceToDest[i]] =
              bIsCompatible ? rgSourceOps[i] : new IOperand.PseudoRegister(TypeRegisterType.SIMD);

          if (!bIsCompatible) {
            if (bIsResultOperand) {
              // this is the result operand
              // move instruction will be generated after issuing the main instruction

              bHasNonCompatibleResultOperand = true;
              opTmpResultOperand = rgDestOps[rgPermSourceToDest[i]];
            } else {
              // mov arg_i, tmp
              mapSubstitutions.put(rgSourceOps[i], rgDestOps[rgPermSourceToDest[i]]);

              Instruction instrNewMov =
                  new Instruction(
                      getMovFpr(rgSourceOps[i] instanceof IOperand.Address, rgSourceOps[i]),
                      rgSourceOps[i],
                      rgDestOps[rgPermSourceToDest[i]]);
              instrNewMov.setParameterAssignment(instruction.getParameterAssignment());

              translateInstruction(instrNewMov);
            }
          }
        }
      }
    }

    // add the main instruction
    ////
    // if (instruction.getIntrinsicBaseName ().equals (TypeBaseIntrinsicEnum.DIVIDE.value ()))
    // {
    //	IOperand.PseudoRegister opTmp = new IOperand.PseudoRegister (TypeRegisterType.SIMD);
    //	m_ilOut.addInstruction (new Instruction ("vrcpps", rgDestOps[0], opTmp));
    //	m_ilOut.addInstruction (new Instruction ("vmulps", rgDestOps[1], opTmp, rgDestOps[2]));
    // }
    // else
    ////
    String strInstruction = intrinsic.getName();

    boolean bIsLoad =
        intrinsic.getBaseName().equals(TypeBaseIntrinsicEnum.LOAD_FPR_ALIGNED.value());
    boolean bIsStore =
        intrinsic.getBaseName().equals(TypeBaseIntrinsicEnum.STORE_FPR_ALIGNED.value());
    if (bIsLoad || bIsStore) {
      Intrinsic i =
          m_data
              .getArchitectureDescription()
              .getIntrinsic(getMovFpr(bIsLoad, rgDestOps).value(), m_specDatatype);
      if (i != null) strInstruction = i.getName();
    }

    Instruction instrNew = new Instruction(strInstruction, instruction.getIntrinsic(), rgDestOps);
    instrNew.setParameterAssignment(instruction.getParameterAssignment());
    m_ilOut.addInstruction(instrNew);

    // add a move-result instruction if needed
    if (bHasNonCompatibleResultOperand) {
      // mov tmp, result
      Instruction instrNewMov =
          new Instruction(
              getMovFpr(opTmpResultOperand instanceof IOperand.Address, opTmpResultOperand),
              opTmpResultOperand,
              rgSourceOps[rgSourceOps.length - 1]);
      instrNewMov.setParameterAssignment(instruction.getParameterAssignment());
      translateInstruction(instrNewMov);
    }
  }
 /**
  * Finds the intrinsic for the instruction <code>instruction</code>.
  *
  * @param instruction The instruction for which to find the corresponding intrinsic.
  * @return The instrinsic corresponding to the instruction <code>instruction</code>
  */
 private Intrinsic getIntrinsicForInstruction(Instruction instruction) {
   return m_data
       .getArchitectureDescription()
       .getIntrinsic(instruction.getInstructionName(), m_specDatatype);
 }