diff options
| author | Joe Robinson <joe@lc8n.com> | 2015-01-26 22:32:55 +0000 | 
|---|---|---|
| committer | Joe Robinson <joe@lc8n.com> | 2015-01-26 22:32:55 +0000 | 
| commit | dd14976a3e13053b554e164c582084ddf8e0e833 (patch) | |
| tree | 9df5fe9853094e4612d6e502194a398b999a072e /src/com/ensemble | |
Diffstat (limited to 'src/com/ensemble')
| -rw-r--r-- | src/com/ensemble/wordcount/LengthComparator.java | 17 | ||||
| -rw-r--r-- | src/com/ensemble/wordcount/WordCount.java | 103 | ||||
| -rw-r--r-- | src/com/ensemble/wordcount/WordUtil.java | 108 | 
3 files changed, 228 insertions, 0 deletions
diff --git a/src/com/ensemble/wordcount/LengthComparator.java b/src/com/ensemble/wordcount/LengthComparator.java new file mode 100644 index 0000000..9692097 --- /dev/null +++ b/src/com/ensemble/wordcount/LengthComparator.java @@ -0,0 +1,17 @@ +package com.ensemble.wordcount; + +import java.util.Comparator; + +	/** +	 * A Comparator class for comparing the lengths of strings +	 * This is used for sorting the list of words by length +	 *  +	 * @author Joe Robinson +	 */ +	public class LengthComparator implements Comparator<String> { + +		@Override +		public int compare(String string1, String string2) { +			return string2.length() - string1.length(); +		} +	}
\ No newline at end of file diff --git a/src/com/ensemble/wordcount/WordCount.java b/src/com/ensemble/wordcount/WordCount.java new file mode 100644 index 0000000..c6a63d0 --- /dev/null +++ b/src/com/ensemble/wordcount/WordCount.java @@ -0,0 +1,103 @@ +package com.ensemble.wordcount; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This program may be used to count the number of occurences of words in a given file. + * Words are not counted if they are contained within a larger word (eg "is" and "this") + *  + * @author Joe Robinson + * + */ +public class WordCount { +	 +	public static void main(String args[]) { + +		//Need to use a concurrent hashmap as we will be modifying it as we loop through +		Map<String,Integer> wordCounts = new ConcurrentHashMap<String,Integer>(); +		List<String> words = new ArrayList<String>(); +		WordUtil wordUtil = new WordUtil(); +		String filename = ""; +		 +		while (filename.equals("")) {	 +	 +			//If an argument has been entered, use it as the filename, if not prompt for one +			if (args.length > 0 && args[0] != null) { +				filename = args[0]; +			} else { +				filename = readFilename(); +			} +			 +			try { +				words = wordUtil.readWordsFromFile(filename); +	 +			} catch (IOException e) { +				System.out.println("Could not open file. Please try another."); +				filename = ""; +				words = new ArrayList<String>(); +			} +			 +			//Check that the file actually contains some words, if not then prompt for another +			if (words.size() == 0) { +				System.out.println("File does not contain any words. Please try another file."); +				filename = ""; +				words = new ArrayList<String>(); +			} +			 +		} +		wordCounts = wordUtil.countWords(words); + +		List<String> sortedWords = wordUtil.sortWords(wordCounts); +		 +		System.out.println(); +		 +		for (String word : sortedWords) { +			 +			//Capitalise the first letter of the word for output +			char firstChar = Character.toUpperCase(word.charAt(0)); +			String outputWord = firstChar + word.substring(1); +			 +			System.out.println(outputWord + ": " + wordCounts.get(word)); +		} +	} +	 +	/** +	 * Prompt the user for a filename to read until one is entered, and confirm that it exists +	 * @return The filename, which is confirmed to exist +	 */ +	private static String readFilename() { +		 +		String filename = ""; +		boolean fileExists = false; +		while (filename == null || filename.equals("") || !fileExists) { +			BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); +			try { +				System.out.println("Please enter a file name to read, or type \"q\" to quit"); +				filename = br.readLine(); +			} catch (IOException e) { +				System.out.println("Could not read input"); +				System.exit(1); +			} +			 +			if (filename.equals("q")) { +				System.exit(0); +			} +			 +			File file = new File(filename); +			 +			if (file.exists()) { +				fileExists = true; +			} else { +				System.out.println("File does not exist. Try again"); +			} +		} +		return filename; +	} +} diff --git a/src/com/ensemble/wordcount/WordUtil.java b/src/com/ensemble/wordcount/WordUtil.java new file mode 100644 index 0000000..0344f4c --- /dev/null +++ b/src/com/ensemble/wordcount/WordUtil.java @@ -0,0 +1,108 @@ +package com.ensemble.wordcount; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This class contains various utility functions for counting words + * @author Joe Robinson + * + */ +public class WordUtil { + +	/** +	 * Reads all the words from a given file +	 *  +	 * @param filename	A String containing the filename or path to read from +	 * @return			A List containing all words in the file +	 */ +	public List<String> readWordsFromFile(String filename) throws IOException { +		 +		FileReader fr = new FileReader(filename); +		BufferedReader br = new BufferedReader(fr); +		 +		String line = br.readLine(); +		String[] tmpWords; +		List<String> words = new ArrayList<String>(); +		 +		while (line != null) { +			 +			tmpWords = line.split(" "); +			 +			//Remove all punctuation and convert all words to lower case for processing +			for (String tmpWord : tmpWords) { +				words.add(tmpWord.replaceAll("[^a-zA-Z]", "").toLowerCase()); +			} +			 +			line = br.readLine(); +		} +		 +		br.close(); +	 +		return words; +	} +	 +	/** +	 * Counts the number of occurrences of a word in a given list of words +	 * Words will not be counted if they are a substring of another word in the list +	 *  +	 * @param words	A list containing single word strings +	 * @return		A map with the word as the key, and the number of occurrences as the value +	 */ +	public Map<String,Integer> countWords(List<String> words) { +		 +		Map<String,Integer> wordCounts = new ConcurrentHashMap<String,Integer>(); +		 +		for (String newWord : words) { +			 +			//If the exact word is already a key in the map, increment it's count +			if (wordCounts.containsKey(newWord)) { +				wordCounts.put(newWord, wordCounts.get(newWord) + 1); +			} else { +				 +				boolean addWord = true; +				 +				for (String countedWord : wordCounts.keySet()) { +					 +					//If an existing word in the map contains this word, don't count it +					if (countedWord.contains(newWord)) { +						addWord = false; +						break; +					//If this word contains any of the existing words, remove them from the map +					} else if (newWord.contains(countedWord)) { +						wordCounts.remove(countedWord); +					} +				} +				 +				//Add the word to the map as long as it wasn't found in an existing word +				if (addWord) { +					wordCounts.put(newWord, 1); +				} +			} +		} +		return wordCounts; +	} +	 +	/** +	 * Extract a sorted list of the words in the map based on their length +	 * @param wordCounts	A map containing strings as the key. Value is not used +	 *  +	 * @return				A list of the keys sorted into descending order of length +	 */ +	public List<String> sortWords(Map<String, ?> wordCounts) { +		 +		LengthComparator lengthCompare = new LengthComparator(); +		List<String> sortedWords = new ArrayList<String>(wordCounts.keySet()); +		 +		Collections.sort(sortedWords, lengthCompare); +		 +		return sortedWords; +	} + +}  | 
