-
Notifications
You must be signed in to change notification settings - Fork 0
/
Word.java
407 lines (380 loc) · 15.8 KB
/
Word.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
//file and internet io
import java.net.URL;
import java.net.HttpURLConnection;
import java.net.UnknownHostException;
import java.net.MalformedURLException;
import java.net.URLEncoder;
import java.io.InputStream;
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import java.util.Scanner;
//xml io
import javax.xml.parsers.DocumentBuilder; //document builder
import javax.xml.parsers.DocumentBuilderFactory; //...factory
import org.w3c.dom.*; //contains Document and Element etc
import javax.xml.transform.*; //this and 3 below are xml to string
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import javax.xml.xpath.*; //xpath for removing empty text nodes
/**
* Class representing a word and
* its definition to find.
* @author Matthew Gray
*/
public class Word
{
/** The word itself */
private String word;
/** The definition of the word */
private String definition;
/** Other definitions of the word */
private Definition[] otherDefinitions;
/** Service URL for requesting definition */
private static final String SERVICE_URL =
"http://services.aonaware.com//DictService/DictService.asmx/Define?word=";
//xml indicies
//definition indicies
/** Location of the word itself */
private static final int XML_WORD_LOC = 0;
/** Location of the dictionary branch node */
private static final int XML_DICTIONARY_BRANCH_LOC = 1;
/** Location of the textual definition */
private static final int XML_DEFINITION_LOC = 2;
//dictionary indicies
/** Location of the dictionary ID */
private static final int XML_DICTIONARY_ID_LOC = 0;
/** Location of the dictionary title */
private static final int XML_DICTIONARY_NAME_LOC = 1;
/**
* Instantiates a word with no definition (in order to get one).
* @param word The word to create an object of.
*/
public Word(String word)
{
this.word = word;
}
/**
* Instantiates a word with a definition (if we know it for some reason).
* @param word The word to create an object of.
* @param definition The definition of the word.
*/
public Word(String word, String definition)
{
this(word);
this.definition = definition;
}
/**
* Gets the definitions of a word, sets the main one if necessary, and returns it.
* @param overwrite if true, then function will attempt to retrieve from online
* even if the definition is already set.
* @return String containing the main definition of the word.
*/
public String getDefinitions(ByteArrayInputStream inputStream, boolean overwrite)
{
if (overwrite || (definition == null || definition.isEmpty())) {
//get definition from online service
//if not able to access internet, then do something
try {
//now to parse the XML
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(inputStream);
removeEmptyTextNodes(doc);
NodeList list = doc.getElementsByTagName("Definition");
int numDefinitions = list.getLength();
otherDefinitions = new Definition[numDefinitions];
for (int i = 0; i < numDefinitions; i++)
{
Node node = list.item(i);
NodeList children = node.getChildNodes(); //gets each child node of the definition
//get all data
String definition = children.item(XML_DEFINITION_LOC).getTextContent();
String word = children.item(XML_WORD_LOC).getTextContent();
NodeList dictionaryNode = children.item(XML_DICTIONARY_BRANCH_LOC).getChildNodes();
String dictionaryId = dictionaryNode.item(XML_DICTIONARY_ID_LOC).getTextContent();
String dictionaryName = dictionaryNode.item(XML_DICTIONARY_NAME_LOC).getTextContent();
//now we've got all the data lets add it to the array
otherDefinitions[i] = new Definition(dictionaryName, dictionaryId, definition, word);
}
if (otherDefinitions.length == 0) {
otherDefinitions = new Definition[] {new Definition("nil", "nil", "NO DEFINITION FOUND", word)};
}
this.setMainDefinition(otherDefinitions[0].getDefinition());
return otherDefinitions[0].getDefinition();
} catch (Exception ex) {
ex.printStackTrace();
return "ERROR! " + ex;
}
} else {
return this.definition;
}
}
/**
* Gets the definitions of a word, sets the main one if necessary, and returns it.
* @param overwrite if true, then function will attempt to retrieve from online
* even if the definition is already set.
* @return String containing the main definition of the word.
*/
public static Word[] getWordsFromFile(ByteArrayInputStream inputStream)
{
try {
//now to parse the XML
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(inputStream);
removeEmptyTextNodes(doc);
NodeList wordList = doc.getElementsByTagName("WordDefinitions");
int wordCount = wordList.getLength();
Word[] returnWords = new Word[wordCount];
for (int wordi = 0; wordi < wordCount; wordi++)
{
NodeList list = ((Element)wordList.item(wordi)).getElementsByTagName("Definition");
Element wordElement = (Element)wordList.item(wordi);
String word = wordElement.getAttribute("word");
String mainDefinition = wordElement.getAttribute("mainDefinition");
int numDefinitions = list.getLength();
Definition[] wordOtherDefinitions = new Definition[numDefinitions];
for (int i = 0; i < numDefinitions; i++)
{
Node node = list.item(i);
NodeList children = node.getChildNodes(); //gets each child node of the definition
//get all data
String definition = children.item(XML_DEFINITION_LOC).getTextContent();
String theWord = children.item(XML_WORD_LOC).getTextContent();
NodeList dictionaryNode = children.item(XML_DICTIONARY_BRANCH_LOC).getChildNodes();
String dictionaryId = dictionaryNode.item(XML_DICTIONARY_ID_LOC).getTextContent();
String dictionaryName = dictionaryNode.item(XML_DICTIONARY_NAME_LOC).getTextContent();
//now we've got all the data lets add it to the array
wordOtherDefinitions[i] = new Definition(dictionaryName, dictionaryId, definition, theWord);
}
if (wordOtherDefinitions.length == 0) {
wordOtherDefinitions = new Definition[] {new Definition("nil", "nil", "NO DEFINITION FOUND", word)};
}
Word currentWord = new Word(word, mainDefinition);
currentWord.setOtherDefinitions(wordOtherDefinitions);
returnWords[wordi] = currentWord;
}
return returnWords;
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
}
/**
* Gets a ByteArrayInputStream to parse a word from the definition API.
* @return ByteArrayInputStream containing definition XML, null if trouble
* parsing or getting the definition.
*/
public ByteArrayInputStream getStreamFromOnlineXML()
{
try {
//download the xml definition page of the word
String urlSafeWord = URLEncoder.encode(this.word, "UTF-8");
InputStream response = new URL(SERVICE_URL + urlSafeWord).openStream();
Scanner responseReader = new Scanner(response).useDelimiter("\\A");
if (responseReader.hasNext()) {
//gets a clean version of xml the document can parse
String xml = responseReader.next();
response.close();
xml = xml.trim().replaceFirst("^([\\W]+)<","<");
ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("utf-8"));
return stream;
} else {
return null;
}
} catch (Exception ex) {
ex.printStackTrace();
return null;
}
}
/**
* Gets the definitions of a word, sets the main one if necessary, and returns it.
* @return String containint the definition of the word.
*/
public String getDefinitions()
{
return this.getDefinitions(getStreamFromOnlineXML(), false);
}
/**
* Gets all definitions possible for this term.
* @return Definition array containing all definitions. Returns null if trouble getting definitions.
*/
public Definition[] getAllDefinitions()
{
if (this.otherDefinitions != null) {
return this.otherDefinitions;
} else {
getDefinitions();
return this.otherDefinitions;
}
}
/**
* Gets XML string for this definition so that it may be printed.
* @return XML node that can be saved of this word.
*/
public Element getWordXMLNode(Document doc)
{
try {
Element rootWordElement = doc.createElement("WordDefinitions");
rootWordElement.setAttribute("word", this.word);
rootWordElement.setAttribute("mainDefinition", this.getMainDefinition());
if (this.otherDefinitions == null) {
this.getDefinitions();
}
for (int i = 0; i < this.otherDefinitions.length; i++) {
//main definition node
Element definition = doc.createElement("Definition");
//word
Element word = doc.createElement("Word");
word.setTextContent(otherDefinitions[i].getWord());
//dictionary
Element dictionary = doc.createElement("Dictionary");
Element id = doc.createElement("Id");
id.setTextContent(otherDefinitions[i].getId());
Element source = doc.createElement("Name");
source.setTextContent(otherDefinitions[i].getSource());
dictionary.appendChild(id);
dictionary.appendChild(source);
//definition
Element wordDefinition = doc.createElement("WordDefinition");
wordDefinition.setTextContent(otherDefinitions[i].getDefinition());
//append all the children
definition.appendChild(word);
definition.appendChild(dictionary);
definition.appendChild(wordDefinition);
rootWordElement.appendChild(definition);
}
return rootWordElement;
} catch(Exception ex) {
ex.printStackTrace();
return null;
}
}
/**
* Gets the string of an XML document containing multiple words for saving purposes.
* @param words The words to save.
* @return String containing XML containing all words.
*/
public static String getMultipleWordXML(Word[] words)
{
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.newDocument();
Element root = doc.createElement("Root");
for (int i = 0; i < words.length; i++) {
root.appendChild(words[i].getWordXMLNode(doc));
}
doc.appendChild(root);
return getStringFromDocument(doc);
} catch(Exception ex) {
ex.printStackTrace();
return null;
}
}
/**
* Sets the definition, overriding whatever may be there.
* @param definition what to set the object's definition to.
*/
public void setMainDefinition(String definition)
{
this.definition = definition;
}
/**
* Mutator for other definitions
* @param otherDefinitions What to set otherDefinitions to.
*/
public void setOtherDefinitions(Definition[] otherDefinitions)
{
this.otherDefinitions = otherDefinitions;
}
/**
* Gets the main definition.
* @return The main definition.
*/
public String getMainDefinition()
{
return this.definition;
}
/**
* Gets the word itself.
* @return the string representing the word of this object.
*/
public String getWord()
{
return this.word;
}
/**
* Returns a String representing this word and definition pair.
* @return the string representing this word and its definition.
*/
public String toString()
{
return this.getWord() + " - " + this.getDefinitions();
}
/**
* Comparison of this object with another object, potentially another word.
* If the definition of either word isn't set, then the comparison will be false.
* This was chosen to conserve resources/time when doing potential large batch
* comparisons.
* @param o the other object to compare to.
* @return whether this word is equal to another word, true if so, false if otherwise.
*/
public boolean equals(Object o)
{
if (o instanceof Word) {
Word otherWord = (Word)o;
if (this.getWord().equals(otherWord.getWord())) {
if (this.definition.equals(otherWord.getDefinitions())) {
return true;
}
}
}
return false;
}
/**
* Converts an XML document to a string.
* From Zaz Gmy on this StackOverflow post:
* http://stackoverflow.com/questions/10356258/how-do-i-convert-a-org-w3c-dom-document-object-to-a-string
* @param doc The document to convert to a String.
*/
private static String getStringFromDocument(Document doc)
{
try {
DOMSource domSource = new DOMSource(doc);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.transform(domSource, result);
return writer.toString();
} catch(TransformerException ex) {
ex.printStackTrace();
return null;
}
}
/**
* Removes empty #text nodes from a document.
* From James Murty on this StackOverflow post:
* http://stackoverflow.com/questions/978810/how-to-strip-whitespace-only-text-nodes-from-a-dom-before-serialization
* @param doc The document to remove empty text nodes from.
*/
private static void removeEmptyTextNodes(Document doc)
{
try {
XPathFactory xpathFactory = XPathFactory.newInstance();
// XPath to find empty text nodes.
XPathExpression xpathExp = xpathFactory.newXPath().compile(
"//text()[normalize-space(.) = '']");
NodeList emptyTextNodes = (NodeList)
xpathExp.evaluate(doc, XPathConstants.NODESET);
// Remove each empty text node from document.
for (int i = 0; i < emptyTextNodes.getLength(); i++) {
Node emptyTextNode = emptyTextNodes.item(i);
emptyTextNode.getParentNode().removeChild(emptyTextNode);
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
}