private void pisiCsv(CsvWriter csvWriter) { try { SortedMap<Date, Integer> vremena = new TreeMap<>(); Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT+1")); cal.setTime(d1); int nrow = 0; while (!cal.getTime().after(d2)) { vremena.put(cal.getTime(), nrow); cal.add(Calendar.HOUR, 1); nrow++; } int size = selektiraniPodaci.keySet().size(); csvWriter.write("Vrijeme"); HashMap<ProgramMjerenja, Integer> komponente = new HashMap<>(); Integer ncol = 0; for (ProgramMjerenja pm : selektiraniPodaci.keySet()) { try { komponente.put(pm, ncol); csvWriter.write(pm.getKomponentaId().getFormula()); csvWriter.write("obuhvat"); csvWriter.write("status"); ncol++; } catch (IOException ex) { Logger.getLogger(PfTest.class.getName()).log(Level.SEVERE, null, ex); } } csvWriter.endRecord(); Podatak[][] tablica = new Podatak[nrow][size]; for (ProgramMjerenja pm : selektiraniPodaci.keySet()) { List<Podatak> podatak = podatakFacade.getPodatak(pm, d1, d2, true, true); for (Podatak p : podatak) { Integer i = komponente.get(pm); Integer j = vremena.get(p.getVrijeme()); tablica[j][i] = p; } } for (Date d : vremena.keySet()) { try { csvWriter.write(sdf.format(d)); for (int i = 0; i < ncol; i++) { Podatak p = tablica[vremena.get(d)][i]; if (p != null) { csvWriter.write(p.getVrijednost().toString()); csvWriter.write(p.getObuhvat().toString()); csvWriter.write(Integer.toString(p.getStatus())); } else { } } csvWriter.endRecord(); } catch (IOException ex) { Logger.getLogger(PfTest.class.getName()).log(Level.SEVERE, null, ex); } } csvWriter.flush(); } catch (IOException ex) { Logger.getLogger(PfTest.class.getName()).log(Level.SEVERE, null, ex); } }
@Override public void map(LongWritable key, Text value, OutputCollector<Text, Text> oc, Reporter reporter) throws IOException { BufferedReader fin = null; InputStream is = null; try { String s3Path = value.toString(); URL url = new URL(s3Path); URLConnection conn = url.openConnection(); conn.setConnectTimeout(20000); conn.setReadTimeout(20000); is = conn.getInputStream(); is.read(); is.read(); fin = new BufferedReader(new InputStreamReader(new CBZip2InputStream(is), "UTF-8")); String currentTitle = ""; // int cnt = 0; String line = null; StringWriter merged = null; CsvWriter writer; while ((line = fin.readLine()) != null) { if ("<page>".equals(line.trim())) { String secondLine = fin.readLine(); currentTitle = new String( secondLine.substring( secondLine.indexOf(pre) + pre.length(), secondLine.indexOf(suf))); secondLine = null; } if (line.trim().startsWith("{{Infobox")) { sb = new StringBuilder(); merged = new StringWriter(); writer = new CsvWriter(merged, ','); sb.append(line); sb.append(sep); while (true) { line = fin.readLine().trim(); sb.append(line); sb.append(sep); if ("}}".equals(line)) { sb.append(line); sb.append(sep); break; } reporter.progress(); } writer.writeRecord(new String[] {currentTitle, sb.toString()}); writer.flush(); oc.collect(new Text(""), new Text(merged.toString())); reporter.progress(); reporter.setStatus(value.toString() + " processed"); sb = null; merged = null; writer = null; } line = null; } } catch (IOException ioe) { reporter.setStatus("This task didn't get fully passed"); } finally { try { fin.close(); } catch (Exception e) { e.printStackTrace(); } finally { } } }