/** * Main method drives all methods * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { String usageError = "Please provide a valid option. Such as: " + "\n -add FILENAME *creates new HITs from the data provided in the given file(s)* " + "\n -delete FILENAME *deletes all of the HITs with IDs matching those given in the file(s)*" + "\n -approveAll FILENAME *approves all the assignments for all HITs with IDs in the given file(s)*"; if (args.length >= 1) { // Create an instance of this class. LexicalSubSurvey app = new LexicalSubSurvey(); File inputFile = null; try { if (args.length > 1) inputFile = new File(args[1]); if (args[0].equals("-add")) { // When -add tag is given in adds HITs to Mechanical turk depending on the URL in the // mturk.properties file String[] parts = { "NN", "NNS", "JJ", "JJR", "JJS", "RB", "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" }; ArrayList<String> pos = new ArrayList<String>(); for (int i = 0; i < parts.length; i++) { pos.add(parts[i]); } ExamplePairReader reader = new ExamplePairReader(PARSED, ALIGN); BufferedReader in = new BufferedReader( new InputStreamReader( new FileInputStream(inputFile))); // typical file name: "sub.simple.first100" DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-ddHH:mm:ss"); Date date = new Date(); // The three different experiments leave one uncommented at a time to do single groupings app.contextpr = new PrintWriter( new FileOutputStream( new File(inputFile.getName() + "ContextGivenIDs" + dateFormat.format(date)))); app.partialContextpr = new PrintWriter( new FileOutputStream( new File( inputFile.getName() + "partialContextIDs" + dateFormat.format(date)))); app.noContextpr = new PrintWriter( new FileOutputStream( new File( inputFile.getName() + "NoContextGivenIDs" + dateFormat.format(date)))); Map<String, String> codeToPOS = new HashMap<String, String>(14); codeToPOS.put("NN", "Noun"); codeToPOS.put("NNS", "Noun"); codeToPOS.put("JJ", "Adjective"); codeToPOS.put("JJR", "Adjective"); codeToPOS.put("JJS", "Adjective"); codeToPOS.put("RB", "Adverb"); codeToPOS.put("RBR", "Adverb"); codeToPOS.put("RBS", "Adverb"); codeToPOS.put("VB", "Verb"); codeToPOS.put("VBD", "Verb"); codeToPOS.put("VBG", "Verb"); codeToPOS.put("VBN", "Verb"); codeToPOS.put("VBP", "Verb"); codeToPOS.put("VBZ", "Verb"); String input = in.readLine(); Map<String, String[]> wordToSense = new HashMap<String, String[]>(25); String focusWord = ""; String sense = ""; String context = ""; String simpleWord; while (input != null) { StringTokenizer splitter = new StringTokenizer(input, "\t"); context = splitter.nextToken(); splitter.nextToken(); focusWord = splitter.nextToken(); simpleWord = splitter.nextToken(); sense = splitter.nextToken(); String[] wordAssociations = {context, sense, simpleWord}; wordToSense.put(focusWord, wordAssociations); input = in.readLine(); } for (int k = 0; k < 1000000 && reader.hasNext(); k++) { // for counted input goes through until reaches end or max number ExamplePair p = reader.next(); Alignment align = p.getAlignment(); ArrayList<Word> normalWords = p.getNormal().getWords(); ArrayList<Word> simpleWords = p.getSimple().getWords(); // creates object = list of simple words SimpleWordsList simpleWordsList = new SimpleWordsList(); for (AlignPair pair : align) { int n = pair.getNormalIndex(); int s = pair.getSimpleIndex(); Word normal = normalWords.get(n); Word simple = simpleWords.get(s); boolean diffWords = !normal.getWord().toLowerCase().equals(simple.getWord().toLowerCase()); boolean normWordSimplePOS = pos.contains(normal.getPos()); boolean posEqual = normal.getPos().equals(simple.getPos()); boolean normalIsAlreadySimple = simpleWordsList.contains(normal.getWord()); boolean doWeHaveSense = wordToSense.containsKey(normal.getWord()); if (doWeHaveSense) context = wordToSense.get(normal.getWord())[0]; boolean contextMatch = context.equals(p.getNormal().textString()); if (diffWords && normWordSimplePOS && posEqual && !normalIsAlreadySimple && doWeHaveSense && contextMatch) { String firstPart = ""; String partialFirst = ""; String wordAfterFocus = normalWords.get(n + 1).getWord(); String target = normal.getWord(); if (!(wordAfterFocus.length() == 1 && wordAfterFocus.compareTo("A") < 0)) { target += " "; } String secondPart = ""; String partialSecond = ""; sense = wordToSense.get(normal.getWord())[1]; String POS = codeToPOS.get(normal.getPos()); for (int i = 0; i < normalWords.size(); i++) { String currentWord = normalWords.get(i).getWord(); String nextWord = ""; if (i + 1 < normalWords.size()) { nextWord = normalWords.get(i + 1).getWord(); } if (i < n) { if (i > n - 3) partialFirst += currentWord; firstPart += currentWord; if (!(nextWord.length() == 1 && nextWord.compareTo("A") < 0)) { firstPart += " "; if (i > n - 3) partialFirst += " "; } } if (i > n) { if (i < n + 3) partialSecond += currentWord; secondPart += currentWord; if (!(nextWord.length() == 1 && nextWord.compareTo("A") < 0)) { secondPart += " "; if (i < n + 3) partialSecond += " "; } } } // comment out 2 out of the 3 for single grouping app.createContextGivenSurvey(firstPart, target, secondPart); app.createPartialContextGivenSurvey( partialFirst, target, partialSecond, sense, POS); app.createNoContextGivenSurvey(target, sense, POS); } } } // comment out 2 for single grouping app.contextpr.close(); app.partialContextpr.close(); app.noContextpr.close(); } else if (args[0].equals("-delete")) { // deletes the hits whose IDs are in the given file System.out.println("deleting"); // IDs are usually stored in these files: NoContextGivenIDs, NoTargetGivenIDs, // ContextGivenIDs BufferedReader fileReader = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile))); String hitId = ""; for (hitId = fileReader.readLine(); hitId != null; hitId = fileReader.readLine()) { System.out.println(hitId); app.deleteHIT(hitId); } } else if (args[0].equals( "-approveAll")) { // approves all submissions for all hits whose IDs in the given file System.out.println("approving"); // IDs are usually stored in these files: NoContextGivenIDs, NoTargetGivenIDs, // ContextGivenIDs BufferedReader fileReader = new BufferedReader(new InputStreamReader(new FileInputStream(inputFile))); String hitId = ""; for (hitId = fileReader.readLine(); hitId != null; hitId = fileReader.readLine()) { System.out.println(hitId); app.approveHIT(hitId); } } else { System.err.println("No valid options were provided"); System.out.println(usageError); } } catch (IOException e) { System.err.println("Could not find the file: \"" + args[1] + "\""); System.err.println("Please provide a valid file name"); } } else System.out.println(usageError); }