Пример #1
0
  public void prepareLevel0() {
    m_instance_data.begin(Buffer.READ_WRITE);
    Buffer attributes = m_instance_data.attributes();

    // init min max
    for (int i = 0; i < m_leaf_node_ids.length; ++i) m_leaf_node_ids[i] = 0;
    for (int i = 0; i < m_min_temp.length; ++i) m_min_temp[i] = Double.MAX_VALUE;
    for (int i = 0; i < m_max_temp.length; ++i) m_max_temp[i] = -Double.MAX_VALUE;

    /*	for (int i = 0; i < m_instance_data.rows(); ++i)
    {
    	for (int att = 0; att < m_num_attributes; ++att)
    	{
    		int node_offset = (int)( att );

    		double value = attributes.read( (att + i * m_num_attributes) * DirectMemory.DOUBLE_SIZE);
    		System.out.println("Att " + att + " value "+ value);
    		double min =m_min_temp[node_offset];
    		double max =m_max_temp[node_offset];
    		if (value > max)
    			m_max_temp[node_offset]= value;
    		if (value < min)
    			m_min_temp[node_offset]= value;
    	}
    }
    */
    m_instance_data.commit();
  }
Пример #2
0
  /**
   * CPU build implementation
   *
   * @param level
   */
  public void buildCPU(int level) {
    int prev_level = level - 1;
    long start_node = 0;
    long end_node = 0;
    if (level > 1) {
      start_node = max_level_id(level - 2) + 1;
      end_node = max_level_id(level - 1);
    }

    m_instance_data.begin(Buffer.READ_WRITE);
    // find min_max
    for (int i = 0; i < m_instance_data.rows(); ++i) {
      int node_id = m_leaf_node_ids[i];
      if (!(node_id >= start_node && node_id <= end_node))
        throw new RuntimeException("All instances should be in the leaves");
      for (int att = 0; att < m_num_attributes; ++att) {
        int node_offset = node_id * m_num_attributes + att;
        Buffer attributes = m_instance_data.attributes();
        double value = attributes.read((att + i * m_num_attributes) * DirectMemory.DOUBLE_SIZE);
        double min = m_min_temp[node_offset];
        double max = m_max_temp[node_offset];
        if (value > max) m_max_temp[node_offset] = value;
        if (value < min) m_min_temp[node_offset] = value;
      }
    }

    // split by mean of widest dim
    int split_candidate = -1;
    double max_range = -1;

    for (int node = (int) start_node; node <= end_node; ++node) {
      // 1 thread
      for (int att = 0; att < m_num_attributes; ++att) {
        double min_val = m_min_temp[(node * m_num_attributes + att)];
        double max_val = m_max_temp[(node * m_num_attributes + att)];

        double abs_min_val = m_min_temp[att];
        double abs_max_val = m_max_temp[att];
        double range = (abs_max_val - abs_min_val);
        if (range > 0) {
          double node_range = (max_val - min_val) / range;
          if (node_range > max_range) {
            split_candidate = att;
            max_range = node_range;
          }
        }
      }
      m_node_split_dim[node] = split_candidate;
      m_node_split_value[node] =
          m_min_temp[(node * m_num_attributes + split_candidate)] + max_range / 2;
    }

    for (int i = 0; i < m_instance_data.rows(); ++i) {
      int node_id = m_leaf_node_ids[i];
      int split_dim = m_node_split_dim[node_id];
      double split_value = m_node_split_value[node_id];

      double att_value =
          m_instance_data
              .attributes()
              .read((split_dim + i * m_num_attributes) * DirectMemory.DOUBLE_SIZE);
      if (att_value > split_value) m_leaf_node_ids[i] = (int) child(node_id) + 1;
      else m_leaf_node_ids[i] = (int) child(node_id);
    }

    m_instance_data.commit();
  }