public void parseGroup(Group g) throws Hdf5Exception, EndOfSequenceException { startGroup(g); java.util.List members = g.getMemberList(); // NOTE: parsing contents twice to ensure subgroups are handled before datasets // This is mainly because synapse_props groups will need to be parsed before dataset of // connections for (int j = 0; j < members.size(); j++) { HObject obj = (HObject) members.get(j); if (obj instanceof Group) { Group subGroup = (Group) obj; logger.logComment("--------- Found a sub group: " + subGroup.getName()); parseGroup(subGroup); } } for (int j = 0; j < members.size(); j++) { HObject obj = (HObject) members.get(j); if (obj instanceof Dataset) { Dataset ds = (Dataset) obj; logger.logComment("Found a dataset: " + ds.getName()); dataSet(ds); } } endGroup(g); }
static void calculateEigenvector(String file, String chr, int binsize) throws IOException { if (!file.endsWith("hic")) { System.err.println("Only 'hic' files are supported"); System.exit(-1); } // Load the expected density function, if it exists. Map<Integer, DensityFunction> zoomToDensityMap = null; String densityFile = file + ".densities"; if (FileUtils.resourceExists(densityFile)) { InputStream is = null; try { is = ParsingUtils.openInputStream(densityFile); zoomToDensityMap = DensityUtil.readDensities(is); } finally { if (is != null) is.close(); } } else { System.err.println("Densities file doesn't exist"); System.exit(-1); } SeekableStream ss = IGVSeekableStreamFactory.getStreamFor(file); Dataset dataset = (new DatasetReader(ss)).read(); Chromosome[] tmp = dataset.getChromosomes(); Map<String, Chromosome> chromosomeMap = new HashMap<String, Chromosome>(); for (Chromosome c : tmp) { chromosomeMap.put(c.getName(), c); } if (!chromosomeMap.containsKey(chr)) { System.err.println("Unknown chromosome: " + chr); System.exit(-1); } int zoomIdx = 0; boolean found = false; for (; zoomIdx < HiCGlobals.zoomBinSizes.length; zoomIdx++) { if (HiCGlobals.zoomBinSizes[zoomIdx] == binsize) { found = true; break; } } if (!found) { System.err.println("Unknown bin size: " + binsize); System.exit(-1); } Matrix matrix = dataset.getMatrix(chromosomeMap.get(chr), chromosomeMap.get(chr)); MatrixZoomData zd = matrix.getObservedMatrix(zoomIdx); final DensityFunction df = zoomToDensityMap.get(zd.getZoom()); double[] eigenvector = zd.computeEigenvector(df, 0); for (double ev : eigenvector) System.out.print(ev + " "); System.out.println(); }
public static Dataset fromfile(String fname) { Dataset dt = new Dataset(fname); dt.parsestories(); Stopwords.genstopwords(); return dt; }
@Override public void run() { builder = new Builder(); builder.build(); dataset = new Dataset(); dataset.read(); params = new Params(); for (String group : dataset.groups()) { String filename = Execution.getFile("dumped-" + group + ".gz"); out = IOUtils.openOutHard(filename); processExamples(group, dataset.examples(group)); out.close(); LogInfo.logs("Finished dumping to %s", filename); StopWatchSet.logStats(); } }
public void dataSet(Dataset d) throws Hdf5Exception { logger.logComment("----- Looking through dataset: " + d); ArrayList<Attribute> attrs = Hdf5Utils.parseDatasetForAttributes(d); for (Attribute attribute : attrs) { logger.logComment( "Dataset: " + d.getName() + " has attribute: " + attribute.getName() + " = " + Hdf5Utils.getFirstStringValAttr(attrs, attribute.getName())); } float[][] data = Hdf5Utils.parse2Ddataset(d); logger.logComment("Data has size: (" + data.length + ", " + data[0].length + ")"); if (inPopulations && currentCellGroup != null) { for (int i = 0; i < data.length; i++) { int id = (int) data[i][0]; float x = data[i][1]; float y = data[i][2]; float z = data[i][3]; PositionRecord posRec = new PositionRecord(id, x, y, z); if (data[0].length == 5) { posRec.setNodeId((int) data[i][4]); } this.project.generatedCellPositions.addPosition(currentCellGroup, posRec); } } if (inProjections && currentNetConn != null) { logger.logComment("Adding info for NetConn: " + currentNetConn); int id_col = -1; int pre_cell_id_col = -1; int pre_segment_id_col = -1; int pre_fraction_along_col = -1; int post_cell_id_col = -1; int post_segment_id_col = -1; int post_fraction_along_col = -1; int prop_delay_col = -1; for (Attribute attribute : attrs) { String storedInColumn = Hdf5Utils.getFirstStringValAttr(attrs, attribute.getName()); if (storedInColumn.equals(NetworkMLConstants.CONNECTION_ID_ATTR)) { id_col = Integer.parseInt(attribute.getName().substring("column_".length())); logger.logComment("id col: " + id_col); } else if (storedInColumn.equals(NetworkMLConstants.PRE_CELL_ID_ATTR)) { pre_cell_id_col = Integer.parseInt(attribute.getName().substring("column_".length())); } else if (storedInColumn.equals(NetworkMLConstants.PRE_SEGMENT_ID_ATTR)) { pre_segment_id_col = Integer.parseInt(attribute.getName().substring("column_".length())); logger.logComment("pre_segment_id_col: " + pre_segment_id_col); } else if (storedInColumn.equals(NetworkMLConstants.PRE_FRACT_ALONG_ATTR)) { pre_fraction_along_col = Integer.parseInt(attribute.getName().substring("column_".length())); logger.logComment("pre_fraction_along_col: " + pre_fraction_along_col); } else if (storedInColumn.equals(NetworkMLConstants.POST_CELL_ID_ATTR)) { post_cell_id_col = Integer.parseInt(attribute.getName().substring("column_".length())); } else if (storedInColumn.equals(NetworkMLConstants.POST_SEGMENT_ID_ATTR)) { post_segment_id_col = Integer.parseInt(attribute.getName().substring("column_".length())); } else if (storedInColumn.equals(NetworkMLConstants.POST_FRACT_ALONG_ATTR)) { post_fraction_along_col = Integer.parseInt(attribute.getName().substring("column_".length())); } else if (storedInColumn.startsWith(NetworkMLConstants.PROP_DELAY_ATTR)) { prop_delay_col = Integer.parseInt(attribute.getName().substring("column_".length())); } for (String synType : getConnectionSynTypes()) { if (storedInColumn.endsWith(synType)) { ConnSpecificProps cp = null; for (ConnSpecificProps currCp : localConnProps) { if (currCp.synapseType.equals(synType)) cp = currCp; } if (cp == null) { cp = new ConnSpecificProps(synType); cp.internalDelay = -1; cp.weight = -1; localConnProps.add(cp); } if (storedInColumn.startsWith(NetworkMLConstants.INTERNAL_DELAY_ATTR)) { cp.internalDelay = Integer.parseInt( attribute .getName() .substring("column_".length())); // store the col num temporarily.. } if (storedInColumn.startsWith(NetworkMLConstants.WEIGHT_ATTR)) { cp.weight = Integer.parseInt( attribute .getName() .substring("column_".length())); // store the col num temporarily.. } } } } for (int i = 0; i < data.length; i++) { int pre_seg_id = 0; float pre_fract_along = 0.5f; int post_seg_id = 0; float post_fract_along = 0.5f; int id = (int) data[i][id_col]; int pre_cell_id = (int) data[i][pre_cell_id_col]; int post_cell_id = (int) data[i][post_cell_id_col]; float prop_delay = 0; if (pre_segment_id_col >= 0) pre_seg_id = (int) data[i][pre_segment_id_col]; if (pre_fraction_along_col >= 0) pre_fract_along = data[i][pre_fraction_along_col]; if (post_segment_id_col >= 0) post_seg_id = (int) data[i][post_segment_id_col]; if (post_fraction_along_col >= 0) post_fract_along = data[i][post_fraction_along_col]; // (float)UnitConverter.getTime(XXXXXXXXX, UnitConverter.NEUROCONSTRUCT_UNITS, // unitSystem)+""; if (prop_delay_col >= 0) prop_delay = (float) UnitConverter.getTime( data[i][prop_delay_col], projUnitSystem, UnitConverter.NEUROCONSTRUCT_UNITS); ArrayList<ConnSpecificProps> props = new ArrayList<ConnSpecificProps>(); if (localConnProps.size() > 0) { for (ConnSpecificProps currCp : localConnProps) { logger.logComment("Pre cp: " + currCp); ConnSpecificProps cp2 = new ConnSpecificProps(currCp.synapseType); if (currCp.internalDelay > 0) // index was stored in this val... cp2.internalDelay = (float) UnitConverter.getTime( data[i][(int) currCp.internalDelay], projUnitSystem, UnitConverter.NEUROCONSTRUCT_UNITS); if (currCp.weight > 0) // index was stored in this val... cp2.weight = data[i][(int) currCp.weight]; logger.logComment("Filled cp: " + cp2); props.add(cp2); } } this.project.generatedNetworkConnections.addSynapticConnection( currentNetConn, GeneratedNetworkConnections.MORPH_NETWORK_CONNECTION, pre_cell_id, pre_seg_id, pre_fract_along, post_cell_id, post_seg_id, post_fract_along, prop_delay, props); } } if (inInputs && currentInput != null) { logger.logComment("Adding info for: " + currentInput); StimulationSettings nextStim = project.elecInputInfo.getStim(currentInput); ElectricalInput myElectricalInput = nextStim.getElectricalInput(); String electricalInputType = myElectricalInput.getType(); String cellGroup = nextStim.getCellGroup(); for (int i = 0; i < data.length; i++) { Float fileCellId = data[i][0]; Float fileSegmentId = data[i][1]; Float fractionAlong = data[i][2]; int cellId = fileCellId.intValue(); int segmentId = fileSegmentId.intValue(); SingleElectricalInput singleElectricalInputFromFile = new SingleElectricalInput( electricalInputType, cellGroup, cellId, segmentId, fractionAlong, null); this.project.generatedElecInputs.addSingleInput( currentInput, singleElectricalInputFromFile); } } }
static void dumpMatrix(String file, String chr1, String chr2, int binsize, String type) throws IOException { if (!file.endsWith("hic")) { System.err.println("Only 'hic' files are supported"); System.exit(-1); } // Load the expected density function, if it exists. Map<Integer, DensityFunction> zoomToDensityMap = null; if (type.equals("oe") || type.equals("pearson")) { String densityFile = file + ".densities"; if (FileUtils.resourceExists(densityFile)) { InputStream is = null; try { is = ParsingUtils.openInputStream(densityFile); zoomToDensityMap = DensityUtil.readDensities(is); } finally { if (is != null) is.close(); } } else { System.err.println("Densities file doesn't exist, cannot calculate O/E or Pearson's"); System.exit(-1); } } SeekableStream ss = IGVSeekableStreamFactory.getStreamFor(file); Dataset dataset = (new DatasetReader(ss)).read(); Chromosome[] tmp = dataset.getChromosomes(); Map<String, Chromosome> chromosomeMap = new HashMap<String, Chromosome>(); for (Chromosome c : tmp) { chromosomeMap.put(c.getName(), c); } if (!chromosomeMap.containsKey(chr1)) { System.err.println("Unknown chromosome: " + chr1); System.exit(-1); } else if (!chromosomeMap.containsKey(chr2)) { System.err.println("Unknown chromosome: " + chr2); System.exit(-1); } if (type.equals("oe") || type.equals("pearson")) { if (!chr1.equals(chr2)) { System.err.println("Chromosome " + chr1 + " not equal to Chromosome " + chr2); System.err.println("Currently only intrachromosomal O/E and Pearson's are supported."); System.exit(-1); } } int zoomIdx = 0; boolean found = false; for (; zoomIdx < HiCGlobals.zoomBinSizes.length; zoomIdx++) { if (HiCGlobals.zoomBinSizes[zoomIdx] == binsize) { found = true; break; } } if (!found) { System.err.println("Unknown bin size: " + binsize); } Matrix matrix = dataset.getMatrix(chromosomeMap.get(chr1), chromosomeMap.get(chr2)); MatrixZoomData zd = matrix.getObservedMatrix(zoomIdx); if (type.equals("oe") || type.equals("pearson")) { final DensityFunction df = zoomToDensityMap.get(zd.getZoom()); if (df == null) { System.err.println("Densities not calculated to this resolution."); System.exit(-1); } zd.dumpOE(df, type.equals("oe")); } else zd.dump(); }
@Override protected void makeDatasets(CancelTask cancelTask) throws IOException { super.makeDatasets(cancelTask); for (Dataset ds : datasets) ds.enhance = fmrcEnhanceMode; }