From dd14976a3e13053b554e164c582084ddf8e0e833 Mon Sep 17 00:00:00 2001 From: Joe Robinson Date: Mon, 26 Jan 2015 22:32:55 +0000 Subject: Initial commit --- src/com/ensemble/wordcount/LengthComparator.java | 17 ++++ src/com/ensemble/wordcount/WordCount.java | 103 +++++++++++++++++++++ src/com/ensemble/wordcount/WordUtil.java | 108 +++++++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 src/com/ensemble/wordcount/LengthComparator.java create mode 100644 src/com/ensemble/wordcount/WordCount.java create mode 100644 src/com/ensemble/wordcount/WordUtil.java (limited to 'src/com') diff --git a/src/com/ensemble/wordcount/LengthComparator.java b/src/com/ensemble/wordcount/LengthComparator.java new file mode 100644 index 0000000..9692097 --- /dev/null +++ b/src/com/ensemble/wordcount/LengthComparator.java @@ -0,0 +1,17 @@ +package com.ensemble.wordcount; + +import java.util.Comparator; + + /** + * A Comparator class for comparing the lengths of strings + * This is used for sorting the list of words by length + * + * @author Joe Robinson + */ + public class LengthComparator implements Comparator { + + @Override + public int compare(String string1, String string2) { + return string2.length() - string1.length(); + } + } \ No newline at end of file diff --git a/src/com/ensemble/wordcount/WordCount.java b/src/com/ensemble/wordcount/WordCount.java new file mode 100644 index 0000000..c6a63d0 --- /dev/null +++ b/src/com/ensemble/wordcount/WordCount.java @@ -0,0 +1,103 @@ +package com.ensemble.wordcount; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This program may be used to count the number of occurences of words in a given file. + * Words are not counted if they are contained within a larger word (eg "is" and "this") + * + * @author Joe Robinson + * + */ +public class WordCount { + + public static void main(String args[]) { + + //Need to use a concurrent hashmap as we will be modifying it as we loop through + Map wordCounts = new ConcurrentHashMap(); + List words = new ArrayList(); + WordUtil wordUtil = new WordUtil(); + String filename = ""; + + while (filename.equals("")) { + + //If an argument has been entered, use it as the filename, if not prompt for one + if (args.length > 0 && args[0] != null) { + filename = args[0]; + } else { + filename = readFilename(); + } + + try { + words = wordUtil.readWordsFromFile(filename); + + } catch (IOException e) { + System.out.println("Could not open file. Please try another."); + filename = ""; + words = new ArrayList(); + } + + //Check that the file actually contains some words, if not then prompt for another + if (words.size() == 0) { + System.out.println("File does not contain any words. Please try another file."); + filename = ""; + words = new ArrayList(); + } + + } + wordCounts = wordUtil.countWords(words); + + List sortedWords = wordUtil.sortWords(wordCounts); + + System.out.println(); + + for (String word : sortedWords) { + + //Capitalise the first letter of the word for output + char firstChar = Character.toUpperCase(word.charAt(0)); + String outputWord = firstChar + word.substring(1); + + System.out.println(outputWord + ": " + wordCounts.get(word)); + } + } + + /** + * Prompt the user for a filename to read until one is entered, and confirm that it exists + * @return The filename, which is confirmed to exist + */ + private static String readFilename() { + + String filename = ""; + boolean fileExists = false; + while (filename == null || filename.equals("") || !fileExists) { + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); + try { + System.out.println("Please enter a file name to read, or type \"q\" to quit"); + filename = br.readLine(); + } catch (IOException e) { + System.out.println("Could not read input"); + System.exit(1); + } + + if (filename.equals("q")) { + System.exit(0); + } + + File file = new File(filename); + + if (file.exists()) { + fileExists = true; + } else { + System.out.println("File does not exist. Try again"); + } + } + return filename; + } +} diff --git a/src/com/ensemble/wordcount/WordUtil.java b/src/com/ensemble/wordcount/WordUtil.java new file mode 100644 index 0000000..0344f4c --- /dev/null +++ b/src/com/ensemble/wordcount/WordUtil.java @@ -0,0 +1,108 @@ +package com.ensemble.wordcount; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This class contains various utility functions for counting words + * @author Joe Robinson + * + */ +public class WordUtil { + + /** + * Reads all the words from a given file + * + * @param filename A String containing the filename or path to read from + * @return A List containing all words in the file + */ + public List readWordsFromFile(String filename) throws IOException { + + FileReader fr = new FileReader(filename); + BufferedReader br = new BufferedReader(fr); + + String line = br.readLine(); + String[] tmpWords; + List words = new ArrayList(); + + while (line != null) { + + tmpWords = line.split(" "); + + //Remove all punctuation and convert all words to lower case for processing + for (String tmpWord : tmpWords) { + words.add(tmpWord.replaceAll("[^a-zA-Z]", "").toLowerCase()); + } + + line = br.readLine(); + } + + br.close(); + + return words; + } + + /** + * Counts the number of occurrences of a word in a given list of words + * Words will not be counted if they are a substring of another word in the list + * + * @param words A list containing single word strings + * @return A map with the word as the key, and the number of occurrences as the value + */ + public Map countWords(List words) { + + Map wordCounts = new ConcurrentHashMap(); + + for (String newWord : words) { + + //If the exact word is already a key in the map, increment it's count + if (wordCounts.containsKey(newWord)) { + wordCounts.put(newWord, wordCounts.get(newWord) + 1); + } else { + + boolean addWord = true; + + for (String countedWord : wordCounts.keySet()) { + + //If an existing word in the map contains this word, don't count it + if (countedWord.contains(newWord)) { + addWord = false; + break; + //If this word contains any of the existing words, remove them from the map + } else if (newWord.contains(countedWord)) { + wordCounts.remove(countedWord); + } + } + + //Add the word to the map as long as it wasn't found in an existing word + if (addWord) { + wordCounts.put(newWord, 1); + } + } + } + return wordCounts; + } + + /** + * Extract a sorted list of the words in the map based on their length + * @param wordCounts A map containing strings as the key. Value is not used + * + * @return A list of the keys sorted into descending order of length + */ + public List sortWords(Map wordCounts) { + + LengthComparator lengthCompare = new LengthComparator(); + List sortedWords = new ArrayList(wordCounts.keySet()); + + Collections.sort(sortedWords, lengthCompare); + + return sortedWords; + } + +} -- cgit v1.2.3