public static String Extract(String sPattern, int iOffset, String sText) { String sResult = ""; try { Scanner document_scanner = new Scanner(sText); int iCountOcurr = 0; int iFirstLine = 0; String sLine = ""; Boolean bReading = false; Boolean bIsEmpty = true; while (document_scanner.hasNext()) { sLine = document_scanner.next(); /// Pattern detected if (sLine.indexOf(sPattern) > -1) { iCountOcurr++; } /// Number of pattern has been reached if (iCountOcurr == iOffset) { bReading = true; } if (bReading && bIsEmpty) { if (iFirstLine != 0) { /// Stop if (sLine.indexOf(sStop) > -1) { bReading = false; bIsEmpty = false; } else { sResult += " " + sLine; } } else { sResult += " " + sLine; } System.out.println("sResult:" + sResult + "\n"); iFirstLine++; } } document_scanner.close(); // Print number of times the search pattern was found // System.out.println("Found Input "+ iCountOcurr + " times"); } catch (Exception e) { sResult = "Error buscar parrafo: " + e.getMessage(); } return sResult; }
@POST @Consumes(MediaType.APPLICATION_JSON) public String GetDocumentParagraph(InputStream incomingData) { String output = ""; StringBuilder builder = new StringBuilder(); Calendar cal = Calendar.getInstance(); SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss.SSS"); try { // output += "Web Service Document ..." + sdf.format(cal.getTime()) +"<br>"; /// Reeading vars from JSON /// ---------------------------------------------------------------------------------------- BufferedReader in = new BufferedReader(new InputStreamReader(incomingData)); String line = null; while ((line = in.readLine()) != null) { builder.append(line); } cal = Calendar.getInstance(); // output += "Json stream readed: " + sdf.format(cal.getTime()) + "<br>"; /// Reeading vars from JSON /// ---------------------------------------------------------------------------------------- JSONObject jsonObject = new JSONObject(builder.toString()); String sUrl = jsonObject.getString("url"); String sPattern = jsonObject.getString("pattern"); int iOffset = jsonObject.getInt("offset"); sUrl = sUrl.replace('^', '"'); // output += "sUrl: " + sUrl + " <br>"; // output += "sPattern: " + sPattern + " <br>"; // output += "iOffset: " + iOffset + " <br>"; output += GetDocumentParagraph(sPattern, iOffset, sUrl); } catch (Exception e) { cal = Calendar.getInstance(); output += "Error: " + e.toString() + sdf.format(cal.getTime()) + "<br>"; } // return HTTP response 200 in case of success return output; }
public static String GetText(String sUrl) { String sRet = ""; try { System.out.print(" Connecting to: " + sUrl + "... \n"); InputStream inputStream = new URL(sUrl).openStream(); System.out.print(" Stream readed from: " + sUrl + "\n"); HWPFDocument docx = new HWPFDocument(inputStream); WordExtractor we = new WordExtractor(docx); sRet = we.getText(); we.close(); } catch (Exception e) { sRet = "Error al leer el archivo" + e.getMessage(); } return sRet; }