/** * Parse dataX job configuration file. * * @param filename Job configure filename. * @return a JobConf instance which describes this Job configuration file. */ @SuppressWarnings("unchecked") public static JobConf loadJobConfig(String filename) { JobConf job = new JobConf(); Document document; try { SAXReader reader = new SAXReader(); document = reader.read(new File(filename)); Element rootElement = document.getRootElement(); } catch (DocumentException e) { LOG.error("DataX Can not find " + filename + " ."); throw new DataExchangeException(e.getCause()); } catch (Exception e) { LOG.error(String.format("DataX read config file %s failed .", filename)); throw new DataExchangeException(e.getCause()); } String xpath = "/jobs/job"; Element jobE = (Element) document.selectSingleNode(xpath); job.setId( jobE.attributeValue("id") == null ? "DataX_is_still_a_virgin" : jobE.attributeValue("id").trim()); JobPluginConf readerJobConf = new JobPluginConf(); Element readerE = (Element) document.selectSingleNode(xpath + "/loader"); if (null == readerE) readerE = (Element) document.selectSingleNode(xpath + "/reader"); String readerId = readerE.attributeValue("id"); readerJobConf.setId(readerId == null ? "virgin-reader" : readerId.trim()); Element readerPluinE = (Element) readerE.selectSingleNode("plugin"); readerJobConf.setName( readerPluinE.getStringValue().trim().replace("loader", "reader").toLowerCase()); Map<String, String> readerParamMap = new HashMap<String, String>(); /* * for historic reason, we need to check concurrency node first add by * bazhen.csy */ if (readerE.selectSingleNode("concurrency") != null) { Element readerConcurrencyE = (Element) readerE.selectSingleNode("concurrency"); readerParamMap.put( "concurrency", StrUtils.replaceString(readerConcurrencyE.attributeValue("core"))); } List<Element> readerParamE = (List<Element>) readerE.selectNodes("param"); for (Element e : readerParamE) { readerParamMap.put( e.attributeValue("key").toLowerCase(), StrUtils.replaceString(e.attributeValue("value").trim())); } PluginParam readerPluginParam = new DefaultPluginParam(readerParamMap); // if (!readerPluginParam.hasValue("concurrency") // || readerPluginParam.getIntValue("concurrency", 1) < 0) { // throw new IllegalArgumentException( // "Reader option [concurrency] error !"); // } readerJobConf.setPluginParams(readerPluginParam); List<JobPluginConf> writerJobConfs = new ArrayList<JobPluginConf>(); List<Element> writerEs = (List<Element>) document.selectNodes(xpath + "/dumper"); if (null == writerEs || 0 == writerEs.size()) writerEs = (List<Element>) document.selectNodes(xpath + "/writer"); for (Element writerE : writerEs) { JobPluginConf writerPluginConf = new JobPluginConf(); String writerId = writerE.attributeValue("id"); writerPluginConf.setId(writerId == null ? "virgin-writer" : writerId.trim()); String destructLimit = writerE.attributeValue("destructlimit"); if (destructLimit != null) { writerPluginConf.setDestructLimit(Integer.valueOf(destructLimit)); } Element writerPluginE = (Element) writerE.selectSingleNode("plugin"); writerPluginConf.setName( writerPluginE.getStringValue().trim().replace("dumper", "writer").toLowerCase()); Map<String, String> writerParamMap = new HashMap<String, String>(); /* * for historic reason, we need to check concurrency node add by * bazhen.csy */ if (writerE.selectSingleNode("concurrency") != null) { Element writerConcurrencyE = (Element) writerE.selectSingleNode("concurrency"); writerParamMap.put( "concurrency", StrUtils.replaceString(writerConcurrencyE.attributeValue("core"))); } List<Element> writerParamE = (List<Element>) writerE.selectNodes("param"); for (Element e : writerParamE) { writerParamMap.put( e.attributeValue("key").toLowerCase(), StrUtils.replaceString(e.attributeValue("value").trim())); } PluginParam writerPluginParam = new DefaultPluginParam(writerParamMap); writerPluginConf.setPluginParams(writerPluginParam); writerJobConfs.add(writerPluginConf); } job.setReaderConf(readerJobConf); job.setWriterConfs(writerJobConfs); return job; }