summaryrefslogtreecommitdiff
path: root/src/com/ensemble/wordcount
diff options
context:
space:
mode:
Diffstat (limited to 'src/com/ensemble/wordcount')
-rw-r--r--src/com/ensemble/wordcount/LengthComparator.java17
-rw-r--r--src/com/ensemble/wordcount/WordCount.java103
-rw-r--r--src/com/ensemble/wordcount/WordUtil.java108
3 files changed, 228 insertions, 0 deletions
diff --git a/src/com/ensemble/wordcount/LengthComparator.java b/src/com/ensemble/wordcount/LengthComparator.java
new file mode 100644
index 0000000..9692097
--- /dev/null
+++ b/src/com/ensemble/wordcount/LengthComparator.java
@@ -0,0 +1,17 @@
+package com.ensemble.wordcount;
+
+import java.util.Comparator;
+
+ /**
+ * A Comparator class for comparing the lengths of strings
+ * This is used for sorting the list of words by length
+ *
+ * @author Joe Robinson
+ */
+ public class LengthComparator implements Comparator<String> {
+
+ @Override
+ public int compare(String string1, String string2) {
+ return string2.length() - string1.length();
+ }
+ } \ No newline at end of file
diff --git a/src/com/ensemble/wordcount/WordCount.java b/src/com/ensemble/wordcount/WordCount.java
new file mode 100644
index 0000000..c6a63d0
--- /dev/null
+++ b/src/com/ensemble/wordcount/WordCount.java
@@ -0,0 +1,103 @@
+package com.ensemble.wordcount;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * This program may be used to count the number of occurences of words in a given file.
+ * Words are not counted if they are contained within a larger word (eg "is" and "this")
+ *
+ * @author Joe Robinson
+ *
+ */
+public class WordCount {
+
+ public static void main(String args[]) {
+
+ //Need to use a concurrent hashmap as we will be modifying it as we loop through
+ Map<String,Integer> wordCounts = new ConcurrentHashMap<String,Integer>();
+ List<String> words = new ArrayList<String>();
+ WordUtil wordUtil = new WordUtil();
+ String filename = "";
+
+ while (filename.equals("")) {
+
+ //If an argument has been entered, use it as the filename, if not prompt for one
+ if (args.length > 0 && args[0] != null) {
+ filename = args[0];
+ } else {
+ filename = readFilename();
+ }
+
+ try {
+ words = wordUtil.readWordsFromFile(filename);
+
+ } catch (IOException e) {
+ System.out.println("Could not open file. Please try another.");
+ filename = "";
+ words = new ArrayList<String>();
+ }
+
+ //Check that the file actually contains some words, if not then prompt for another
+ if (words.size() == 0) {
+ System.out.println("File does not contain any words. Please try another file.");
+ filename = "";
+ words = new ArrayList<String>();
+ }
+
+ }
+ wordCounts = wordUtil.countWords(words);
+
+ List<String> sortedWords = wordUtil.sortWords(wordCounts);
+
+ System.out.println();
+
+ for (String word : sortedWords) {
+
+ //Capitalise the first letter of the word for output
+ char firstChar = Character.toUpperCase(word.charAt(0));
+ String outputWord = firstChar + word.substring(1);
+
+ System.out.println(outputWord + ": " + wordCounts.get(word));
+ }
+ }
+
+ /**
+ * Prompt the user for a filename to read until one is entered, and confirm that it exists
+ * @return The filename, which is confirmed to exist
+ */
+ private static String readFilename() {
+
+ String filename = "";
+ boolean fileExists = false;
+ while (filename == null || filename.equals("") || !fileExists) {
+ BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
+ try {
+ System.out.println("Please enter a file name to read, or type \"q\" to quit");
+ filename = br.readLine();
+ } catch (IOException e) {
+ System.out.println("Could not read input");
+ System.exit(1);
+ }
+
+ if (filename.equals("q")) {
+ System.exit(0);
+ }
+
+ File file = new File(filename);
+
+ if (file.exists()) {
+ fileExists = true;
+ } else {
+ System.out.println("File does not exist. Try again");
+ }
+ }
+ return filename;
+ }
+}
diff --git a/src/com/ensemble/wordcount/WordUtil.java b/src/com/ensemble/wordcount/WordUtil.java
new file mode 100644
index 0000000..0344f4c
--- /dev/null
+++ b/src/com/ensemble/wordcount/WordUtil.java
@@ -0,0 +1,108 @@
+package com.ensemble.wordcount;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * This class contains various utility functions for counting words
+ * @author Joe Robinson
+ *
+ */
+public class WordUtil {
+
+ /**
+ * Reads all the words from a given file
+ *
+ * @param filename A String containing the filename or path to read from
+ * @return A List containing all words in the file
+ */
+ public List<String> readWordsFromFile(String filename) throws IOException {
+
+ FileReader fr = new FileReader(filename);
+ BufferedReader br = new BufferedReader(fr);
+
+ String line = br.readLine();
+ String[] tmpWords;
+ List<String> words = new ArrayList<String>();
+
+ while (line != null) {
+
+ tmpWords = line.split(" ");
+
+ //Remove all punctuation and convert all words to lower case for processing
+ for (String tmpWord : tmpWords) {
+ words.add(tmpWord.replaceAll("[^a-zA-Z]", "").toLowerCase());
+ }
+
+ line = br.readLine();
+ }
+
+ br.close();
+
+ return words;
+ }
+
+ /**
+ * Counts the number of occurrences of a word in a given list of words
+ * Words will not be counted if they are a substring of another word in the list
+ *
+ * @param words A list containing single word strings
+ * @return A map with the word as the key, and the number of occurrences as the value
+ */
+ public Map<String,Integer> countWords(List<String> words) {
+
+ Map<String,Integer> wordCounts = new ConcurrentHashMap<String,Integer>();
+
+ for (String newWord : words) {
+
+ //If the exact word is already a key in the map, increment it's count
+ if (wordCounts.containsKey(newWord)) {
+ wordCounts.put(newWord, wordCounts.get(newWord) + 1);
+ } else {
+
+ boolean addWord = true;
+
+ for (String countedWord : wordCounts.keySet()) {
+
+ //If an existing word in the map contains this word, don't count it
+ if (countedWord.contains(newWord)) {
+ addWord = false;
+ break;
+ //If this word contains any of the existing words, remove them from the map
+ } else if (newWord.contains(countedWord)) {
+ wordCounts.remove(countedWord);
+ }
+ }
+
+ //Add the word to the map as long as it wasn't found in an existing word
+ if (addWord) {
+ wordCounts.put(newWord, 1);
+ }
+ }
+ }
+ return wordCounts;
+ }
+
+ /**
+ * Extract a sorted list of the words in the map based on their length
+ * @param wordCounts A map containing strings as the key. Value is not used
+ *
+ * @return A list of the keys sorted into descending order of length
+ */
+ public List<String> sortWords(Map<String, ?> wordCounts) {
+
+ LengthComparator lengthCompare = new LengthComparator();
+ List<String> sortedWords = new ArrayList<String>(wordCounts.keySet());
+
+ Collections.sort(sortedWords, lengthCompare);
+
+ return sortedWords;
+ }
+
+}