/** * Parses the predicate definitions from the CSV file. * * @param csvFile The buffered reader containing the contents of the CSV file we are trying parse. * @param ds The destination datastore for the csv file. * @return The next line to be parsed from the file. * @throws IOException If unable to read from the csvFile. */ private String parseDefinitions(final BufferedReader csvFile, final Datastore db) throws IOException { // Keep parsing lines and putting them in the newly formed nominal // variable until we get to a line indicating the end of file or a new // variable section. String line = csvFile.readLine(); while ((line != null) && Character.isDigit(line.charAt(0))) { /* * TODO Parsing predicates. * // Parse arguments - for predicate vocab element. String[] token = line.split(":|(?<!\\\\)-"); PredicateVocabElement pve = new PredicateVocabElement(db, this.stripEscChars(token[1])); for (String arg : token[2].split(",")) { pve.appendFormalArg(parseFormalArgument(arg, db)); } db.addPredVE(pve); */ // Get the next line in the file for reading. line = csvFile.readLine(); } return line; }
/** * Method to invoke when we encounter a block of text in the CSV file that is the contents of a * predicate variable. * * @param csvFile The csvFile we are currently parsing. * @param var The variable that we will be adding cells too. * @param arg The matrix template we are using when parsing individual matrix elements to put in * the spreadsheet. * @return The next line in the file that is not part of the block of text in the CSV file. * @throws IOException If unable to read the file correctly. */ private String parseMatrixVariable( final BufferedReader csvFile, final Variable var, final Argument arg) throws IOException { String line = csvFile.readLine(); while ((line != null) && Character.isDigit(line.charAt(0))) { // Split the line into tokens using a comma delimiter. String[] tokens = line.split(","); Cell newCell = var.createCell(); // Set the onset and offset from tokens in the line. newCell.setOnset(tokens[DATA_ONSET]); newCell.setOffset(tokens[DATA_OFFSET]); // Strip the brackets from the first and last argument. tokens[DATA_INDEX] = tokens[DATA_INDEX].substring(1, tokens[DATA_INDEX].length()); int end = tokens.length - 1; tokens[end] = tokens[end].substring(0, tokens[end].length() - 1); parseFormalArgs(tokens, DATA_INDEX, var.getVariableType(), (MatrixValue) newCell.getValue()); // Get the next line in the file for reading. line = csvFile.readLine(); } return line; }
/** * Method to invoke when we encounter a block of text in the CSV file that is the contents of a * variable. * * @param csvFile The csvFile we are currently parsing. * @param var The variable that we will be adding cells too. * @param The populator to use when converting the contents of the cell into a datavalue that can * be inserted into the spreadsheet. * @return The next line in the file that is not part of the block of text in the CSV file. * @throws IOException If unable to read the file correctly. */ private String parseEntries( final BufferedReader csvFile, final Variable var, final EntryPopulator populator) throws IOException { // Keep parsing lines and putting them in the newly formed nominal // variable until we get to a line indicating the end of file or a new // variable section. String line = csvFile.readLine(); while ((line != null) && Character.isDigit(line.charAt(0))) { // Split the line into tokens using a comma delimiter. String[] tokens = line.split(","); // BugzID: 1075 - If the line ends with an escaped new line - add // the next line to the current text field. while ((line != null) && line.endsWith("\\") && !line.endsWith("\\\\")) { line = csvFile.readLine(); String content = tokens[tokens.length - 1]; content = content.substring(0, content.length() - 1); tokens[tokens.length - 1] = content + '\n' + line; } Cell newCell = var.createCell(); // Set the onset and offset from tokens in the line. newCell.setOnset(tokens[DATA_ONSET]); newCell.setOffset(tokens[DATA_OFFSET]); populator.populate(tokens, newCell.getValue()); // Get the next line in the file for reading. line = csvFile.readLine(); } return line; }
/** * Method to invoke when we encounter a block of text that is a variable. * * @param csvFile The CSV file we are currently reading. * @param line The line of the CSV file we are currently reading. * @param db The data store we are populating with data from the CSV file. * @return The next String that is not part of the currently variable that we are parsing. * @throws IOException When we are unable to read from the csvFile. * @throws UserWarningException When we are unable to create variables. */ private String parseVariable( final BufferedReader csvFile, final String line, final Datastore ds, final String version) throws IOException, UserWarningException { // Determine the variable name and type. String[] tokens = line.split("\\("); String varName = this.stripEscChars(tokens[0].trim()); String varType = null; String varComment = ""; boolean varVisible = true; if (version.equals("#4")) { String[] varArgs = tokens[1].split(","); varType = varArgs[0]; varVisible = Boolean.parseBoolean(varArgs[1]); varComment = varArgs[2].substring(0, varArgs[2].indexOf(")")); } else if (version.equals("#3")) { varType = tokens[1].substring(0, tokens[1].indexOf(",")); varVisible = Boolean.parseBoolean( tokens[1].substring(tokens[1].indexOf(",") + 1, tokens[1].indexOf(")"))); } else { varType = tokens[1].substring(0, tokens[1].indexOf(")")); } // BugzID:1703 - Ignore old macshapa query variables, we don't have a // reliable mechanisim for loading their predicates. Given problems // between the untyped nature of macshapa and the typed nature of // OpenSHAPA. if (varName.equals("###QueryVar###")) { String lineEater = csvFile.readLine(); while ((lineEater != null) && Character.isDigit(lineEater.charAt(0))) { lineEater = csvFile.readLine(); } return lineEater; } // Create variable to put cells within. Argument.Type variableType = getVarType(varType); Variable newVar = ds.createVariable(varName, variableType); newVar.setHidden(!varVisible); // Read text variable. if (variableType == Argument.Type.TEXT) { return parseEntries(csvFile, newVar, new PopulateText()); } else if (variableType == Argument.Type.NOMINAL) { // Read nominal variable. return parseEntries(csvFile, newVar, new PopulateNominal()); } /* else if (getVarType(varType) == MatrixVocabElement.MatrixType.INTEGER) { // Read integer variable. return parseEntries(csvFile, newVar.getLegacyVariable(), new PopulateInteger(legacyDb)); } else if (getVarType(varType) == MatrixVocabElement.MatrixType.FLOAT) { return parseEntries(csvFile, newVar.getLegacyVariable(), new PopulateFloat(legacyDb)); }*/ else if (variableType == Argument.Type.MATRIX) { // Read matrix variable - Build vocab for matrix. String[] vocabString = tokens[1].split("(?<!\\\\)-"); // Get the vocab element for the matrix and clean it up to be // populated with arguments from the CSV file. Argument newArg = newVar.getVariableType(); // MatrixVocabElement mve = legacyDb.getMatrixVE(varName); // mve.deleteFormalArg(0); // For each of the formal arguments in the file - parse it and // create a formal argument in the matrix vocab element. for (String arg : vocabString[1].split(",")) { newArg.childArguments.add(parseFormalArgument(arg)); } newVar.setVariableType(newArg); return parseMatrixVariable(csvFile, newVar, newArg); // Read predicate variable. } /*else if (getVarType(varType) == MatrixVocabElement.MatrixType.PREDICATE) { return parsePredicateVariable(csvFile, newVar.getLegacyVariable()); }*/ throw new IllegalStateException("Unknown variable type."); }
/** * This method parses a CSV input stream and populates the database (and spreadsheet) with data. * The caller is responsible for managing the stream. * * @param inStream The stream to deserialized when populating the database. * @return populated database on sucess, null otherwise. */ public Datastore openAsCSV(final InputStream inStream) { try { LOGGER.event("open csv database from stream"); Datastore db = DatastoreFactory.newDatastore(); db.setTitleNotifier(OpenSHAPA.getApplication()); InputStreamReader isr = new InputStreamReader(inStream); BufferedReader csvFile = new BufferedReader(isr); // Read each line of the CSV file. String line = csvFile.readLine(); // If we have a version identifier parse the file using the schema // that matches that identifier. if ("#4".equalsIgnoreCase(line)) { // Version 4 includes a comment for columns. line = parseDefinitions(csvFile, db); while (line != null) { line = parseVariable(csvFile, line, db, "#4"); } } else if ("#3".equalsIgnoreCase(line)) { // Version 3 includes column visible status after the column type // Parse predicate definitions first. line = parseDefinitions(csvFile, db); while (line != null) { line = parseVariable(csvFile, line, db, "#3"); } } else if ("#2".equalsIgnoreCase(line)) { // Parse predicate definitions first. line = parseDefinitions(csvFile, db); while (line != null) { line = parseVariable(csvFile, line, db); } } else { // Use the original schema to load the file - just variables, // and no escape characters. while (line != null) { line = parseVariable(csvFile, line, db); } } csvFile.close(); isr.close(); return db; } catch (IOException e) { LOGGER.error("Unable to read line from CSV file", e); } catch (UserWarningException e) { LOGGER.error("Unable to create new variable.", e); } // Error encountered - return null. return null; }