public IDataWriter getDataWriter( String sourceNodeId, ISymmetricDialect symmetricDialect, TransformWriter transformWriter, List<IDatabaseWriterFilter> filters, List<IDatabaseWriterErrorHandler> errorHandlers, List<? extends Conflict> conflictSettings, List<ResolvedData> resolvedData) { int maxRowsBeforeFlush = parameterService.getInt("mssql.bulk.load.max.rows.before.flush", 100000); boolean fireTriggers = parameterService.is("mssql.bulk.load.fire.triggers", false); String uncPath = parameterService.getString("mssql.bulk.load.unc.path"); String rowTerminator = StringEscapeUtils.unescapeJava( parameterService.getString("mssql.bulk.load.row.terminator", "\\r\\n")); String fieldTerminator = StringEscapeUtils.unescapeJava( parameterService.getString("mssql.bulk.load.field.terminator", "||")); return new MsSqlBulkDatabaseWriter( symmetricDialect.getPlatform(), stagingManager, jdbcExtractor, maxRowsBeforeFlush, fireTriggers, uncPath, fieldTerminator, rowTerminator); }
public static boolean isValidFileName(String fileName) { if (StringUtils.isBlank(fileName)) { return false; } for (String blacklistChar : PropsValues.SYNC_FILE_BLACKLIST_CHARS) { if (fileName.contains(blacklistChar)) { return false; } } for (String blacklistLastChar : PropsValues.SYNC_FILE_BLACKLIST_CHARS_LAST) { if (blacklistLastChar.startsWith("\\u")) { blacklistLastChar = StringEscapeUtils.unescapeJava(blacklistLastChar); } if (fileName.endsWith(blacklistLastChar)) { return false; } } String nameWithoutExtension = FilenameUtils.removeExtension(fileName); for (String blacklistName : PropsValues.SYNC_FILE_BLACKLIST_NAMES) { if (nameWithoutExtension.equalsIgnoreCase(blacklistName)) { return false; } } return true; }
@Override public String getLabel() { return StringEscapeUtils.unescapeJava( MessageFormat.format( ResourceBundle.getBundle("bundles/nexu") .getString("product.selection.detected.card.button.label"), this.getTerminalIndex(), this.getTerminalLabel(), this.getAtr())); }
/** * Unescapes any Java literals found in the <code>String</code>. For example, it will turn a * sequence of <code>'\'</code> and <code>'n'</code> into a newline character, unless the <code> * '\'</code> is preceded by another <code>'\'</code>. * * @param str the <code>String</code> to unescape, may be null * @return a new unescaped <code>String</code>, <code>null</code> if null string input */ public static String unescapeJava(String str) { if (str == null) { return null; } try { StringWriter writer = new StringWriter(str.length()); unescapeJava(writer, str); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter throw new UnhandledException(ioe); } }
/** * Unescapes any Java literals found in the <code>String</code>. For example, it will turn a * sequence of <code>'\'</code> and <code>'n'</code> into a newline character, unless the <code> * '\'</code> is preceded by another <code>'\'</code>. * * @param str the <code>String</code> to unescape, may be null * @return a new unescaped <code>String</code>, <code>null</code> if null string input */ public static String unescapeJava(String str) { if (str == null) { return null; } try { StringPrintWriter writer = new StringPrintWriter(str.length()); unescapeJava(writer, str); return writer.getString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter ioe.printStackTrace(); return null; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { modelProvider.configure(aJCas.getCas()); modelProvider.getResource(); String documentText = aJCas.getDocumentText(); // Unescape text, if Chinese, Arabic and stuff are thrown in as UTF-8 escaped sequence it // will lead to an increased error rate String unescapedDocumentText = StringEscapeUtils.unescapeJava(documentText); String language = detectLanguage(unescapedDocumentText); aJCas.setDocumentLanguage(language); }
public static String recodeForDbp38(String uriString) { String ret; URI uri = null; if (uriString.startsWith("http://") || uriString.startsWith("https://")) { // First try to parse the string as an URI so that any superfluous // percent-encodings can get decoded later try { uri = new URI(uriString); } catch (Exception ex) { throw new GateRuntimeException("Could not parse URI " + uriString, ex); } // now use this constructor to-recode only the necessary parts try { String path = uri.getPath(); path = path.trim(); path = path.replaceAll(" +", "_"); uri = new URI( uri.getScheme(), null, uri.getHost(), -1, path, uri.getQuery(), uri.getFragment()); } catch (Exception ex) { throw new GateRuntimeException("Could not re-construct URI: " + uri); } ret = uri.toString(); } else { if (uriString.contains("\\u")) { uriString = StringEscapeUtils.unescapeJava(uriString); } uriString = uriString.trim(); uriString = uriString.replaceAll(" +", "_"); // We need to %-encode colons, otherwise the getPath() method will return // null ... uriString = uriString.replaceAll(":", "%3A"); try { uri = new URI(uriString); // decode and prepare for minimal percent encoding uriString = uri.getPath(); } catch (URISyntaxException ex) { // do nothing: the uriString must already be ready for percent-encoding } uriString = uriString.replaceAll(" +", "_"); try { uri = new URI(null, null, null, -1, "/" + uriString, null, null); } catch (Exception ex) { throw new GateRuntimeException("Could not re-construct URI part: " + uriString); } ret = uri.toString().substring(1); } return ret; }
public static String replaceAll(String s) { YAMLConfig config = HoloAPI.getInstance().getConfig(HoloAPI.ConfigType.MAIN); ConfigurationSection cs = config.getConfigurationSection("specialCharacters"); if (cs != null) { for (String key : cs.getKeys(false)) { if (s.contains(key)) { s = s.replace( key, StringEscapeUtils.unescapeJava( "\\u" + config.getString("specialCharacters." + key))); } } } return s; }
public JsonObject getCrmAccount(CrmSettings settings) { WebResource resource = getAmoCrmWebResource( settings.getLink() + GET_CRM_ACCOUNT_PATH + "?USER_LOGIN="******"&USER_HASH=" + settings.getToken()); ClientResponse response = resource.get(ClientResponse.class); String output = StringEscapeUtils.unescapeJava(response.getEntity(String.class)); JsonParser parser = new JsonParser(); return parser.parse(output).getAsJsonObject(); }
public static String getSanitizedFileName(String fileName, String extension) { for (String blacklistChar : PropsValues.SYNC_FILE_BLACKLIST_CHARS) { fileName = fileName.replace(blacklistChar, "_"); } for (String blacklistCharLast : PropsValues.SYNC_FILE_BLACKLIST_CHARS_LAST) { if (blacklistCharLast.startsWith("\\u")) { blacklistCharLast = StringEscapeUtils.unescapeJava(blacklistCharLast); } if (fileName.endsWith(blacklistCharLast)) { fileName = fileName.substring(0, fileName.length() - 1); } } if ((extension != null) && !extension.isEmpty()) { int x = fileName.lastIndexOf("."); if ((x == -1) || !extension.equalsIgnoreCase(fileName.substring(x + 1))) { fileName += "." + extension; } } if (fileName.length() > 255) { int x = fileName.length() - 1; if ((extension != null) && !extension.isEmpty()) { x = fileName.lastIndexOf("."); } int y = x - (fileName.length() - 255); fileName = fileName.substring(0, y) + fileName.substring(x); } return fileName; }
@Override public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); PigTextOutputFormat outputFormat; if (conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR) == null) { outputFormat = new PigTextOutputFormat(defaultDelimiter); } else { String str = conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR); str = StringEscapeUtils.unescapeJava(str); byte[] bytes = str.getBytes("UTF-8"); if (bytes.length > 1) throw new RuntimeException( String.format("Invalid separator in text output format %s", str)); outputFormat = new PigTextOutputFormat(bytes[0]); } return outputFormat.getRecordWriter(context); }
/** * Unescapes any JavaScript literals found in the <code>String</code> to a <code>Writer</code>. * * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code> into a newline * character, unless the <code>'\'</code> is preceded by another <code>'\'</code>. * * <p>A <code>null</code> string input has no effect. * * @see #unescapeJava(Writer,String) * @param out the <code>Writer</code> used to output unescaped characters * @param str the <code>String</code> to unescape, may be null * @throws IllegalArgumentException if the Writer is <code>null</code> * @throws IOException if error occurs on underlying Writer */ public static void unescapeJavaScript(Writer out, String str) throws IOException { unescapeJava(out, str); }
@Override public final void createTable(final CatalogProtos.TableDescProto tableDescProto) throws CatalogException { HiveCatalogStoreClientPool.HiveCatalogStoreClient client = null; TableDesc tableDesc = new TableDesc(tableDescProto); String[] splitted = CatalogUtil.splitFQTableName(tableDesc.getName()); String databaseName = splitted[0]; String tableName = splitted[1]; try { client = clientPool.getClient(); org.apache.hadoop.hive.metastore.api.Table table = new org.apache.hadoop.hive.metastore.api.Table(); table.setDbName(databaseName); table.setTableName(tableName); table.setParameters( new HashMap<String, String>(tableDesc.getMeta().getOptions().getAllKeyValus())); // TODO: set owner // table.setOwner(); StorageDescriptor sd = new StorageDescriptor(); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setParameters(new HashMap<String, String>()); sd.getSerdeInfo().setName(table.getTableName()); // if tajo set location method, thrift client make exception as follows: // Caused by: MetaException(message:java.lang.NullPointerException) // If you want to modify table path, you have to modify on Hive cli. if (tableDesc.isExternal()) { table.setTableType(TableType.EXTERNAL_TABLE.name()); table.putToParameters("EXTERNAL", "TRUE"); Path tablePath = new Path(tableDesc.getUri()); FileSystem fs = tablePath.getFileSystem(conf); if (fs.isFile(tablePath)) { LOG.warn("A table path is a file, but HiveCatalogStore does not allow a file path."); sd.setLocation(tablePath.getParent().toString()); } else { sd.setLocation(tablePath.toString()); } } // set column information List<Column> columns = tableDesc.getSchema().getRootColumns(); ArrayList<FieldSchema> cols = new ArrayList<FieldSchema>(columns.size()); for (Column eachField : columns) { cols.add( new FieldSchema( eachField.getSimpleName(), HiveCatalogUtil.getHiveFieldType(eachField.getDataType()), "")); } sd.setCols(cols); // set partition keys if (tableDesc.hasPartition() && tableDesc.getPartitionMethod().getPartitionType().equals(PartitionType.COLUMN)) { List<FieldSchema> partitionKeys = new ArrayList<FieldSchema>(); for (Column eachPartitionKey : tableDesc.getPartitionMethod().getExpressionSchema().getRootColumns()) { partitionKeys.add( new FieldSchema( eachPartitionKey.getSimpleName(), HiveCatalogUtil.getHiveFieldType(eachPartitionKey.getDataType()), "")); } table.setPartitionKeys(partitionKeys); } if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.RCFILE)) { String serde = tableDesc.getMeta().getOption(StorageConstants.RCFILE_SERDE); sd.setInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat.class.getName()); sd.setOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat.class.getName()); if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) { sd.getSerdeInfo() .setSerializationLib( org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe.class.getName()); } else { sd.getSerdeInfo() .setSerializationLib( org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe.class.getName()); } if (tableDesc.getMeta().getOptions().containsKey(StorageConstants.RCFILE_NULL)) { table.putToParameters( serdeConstants.SERIALIZATION_NULL_FORMAT, StringEscapeUtils.unescapeJava( tableDesc.getMeta().getOption(StorageConstants.RCFILE_NULL))); } } else if (tableDesc.getMeta().getStoreType().equals(BuiltinStorages.TEXT)) { sd.getSerdeInfo() .setSerializationLib( org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); sd.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class.getName()); sd.setOutputFormat( org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName()); String fieldDelimiter = tableDesc .getMeta() .getOption( StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); // User can use an unicode for filed delimiter such as \u0001, \001. // In this case, java console will convert this value into "\\u001". // And hive will un-espace this value again. // As a result, user can use right field delimiter. // So, we have to un-escape this value. sd.getSerdeInfo() .putToParameters( serdeConstants.SERIALIZATION_FORMAT, StringEscapeUtils.unescapeJava(fieldDelimiter)); sd.getSerdeInfo() .putToParameters( serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter)); table.getParameters().remove(StorageConstants.TEXT_DELIMITER); if (tableDesc.getMeta().containsOption(StorageConstants.TEXT_NULL)) { table.putToParameters( serdeConstants.SERIALIZATION_NULL_FORMAT, StringEscapeUtils.unescapeJava( tableDesc.getMeta().getOption(StorageConstants.TEXT_NULL))); table.getParameters().remove(StorageConstants.TEXT_NULL); } } else if (tableDesc .getMeta() .getStoreType() .equalsIgnoreCase(BuiltinStorages.SEQUENCE_FILE)) { String serde = tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_SERDE); sd.setInputFormat(org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName()); sd.setOutputFormat( org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat.class.getName()); if (StorageConstants.DEFAULT_TEXT_SERDE.equals(serde)) { sd.getSerdeInfo() .setSerializationLib( org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); String fieldDelimiter = tableDesc .getMeta() .getOption( StorageConstants.SEQUENCEFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); // User can use an unicode for filed delimiter such as \u0001, \001. // In this case, java console will convert this value into "\\u001". // And hive will un-espace this value again. // As a result, user can use right field delimiter. // So, we have to un-escape this value. sd.getSerdeInfo() .putToParameters( serdeConstants.SERIALIZATION_FORMAT, StringEscapeUtils.unescapeJava(fieldDelimiter)); sd.getSerdeInfo() .putToParameters( serdeConstants.FIELD_DELIM, StringEscapeUtils.unescapeJava(fieldDelimiter)); table.getParameters().remove(StorageConstants.SEQUENCEFILE_DELIMITER); } else { sd.getSerdeInfo() .setSerializationLib( org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class.getName()); } if (tableDesc.getMeta().containsOption(StorageConstants.SEQUENCEFILE_NULL)) { table.putToParameters( serdeConstants.SERIALIZATION_NULL_FORMAT, StringEscapeUtils.unescapeJava( tableDesc.getMeta().getOption(StorageConstants.SEQUENCEFILE_NULL))); table.getParameters().remove(StorageConstants.SEQUENCEFILE_NULL); } } else { if (tableDesc.getMeta().getStoreType().equalsIgnoreCase(BuiltinStorages.PARQUET)) { sd.setInputFormat(parquet.hive.DeprecatedParquetInputFormat.class.getName()); sd.setOutputFormat(parquet.hive.DeprecatedParquetOutputFormat.class.getName()); sd.getSerdeInfo() .setSerializationLib(parquet.hive.serde.ParquetHiveSerDe.class.getName()); } else { throw new UnsupportedException( tableDesc.getMeta().getStoreType() + " in HivecatalogStore"); } } sd.setSortCols(new ArrayList<Order>()); table.setSd(sd); client.getHiveClient().createTable(table); } catch (Throwable t) { throw new TajoInternalError(t); } finally { if (client != null) client.release(); } }
public static String unescapeJava(String string) { return string == null ? null : StringEscapeUtils.unescapeJava(string); }