summaryrefslogtreecommitdiff
path: root/org/apache/commons/net/nntp/Threader.java
diff options
context:
space:
mode:
Diffstat (limited to 'org/apache/commons/net/nntp/Threader.java')
-rw-r--r--org/apache/commons/net/nntp/Threader.java481
1 files changed, 481 insertions, 0 deletions
diff --git a/org/apache/commons/net/nntp/Threader.java b/org/apache/commons/net/nntp/Threader.java
new file mode 100644
index 0000000..d702c67
--- /dev/null
+++ b/org/apache/commons/net/nntp/Threader.java
@@ -0,0 +1,481 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.commons.net.nntp;
+
+/**
+ * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
+ * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
+ * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
+ *
+ * @author rwinston <rwinston@checkfree.com>
+ *
+ */
+
+import java.util.HashMap;
+import java.util.Iterator;
+
+public class Threader {
+ private ThreadContainer root;
+ private HashMap<String,ThreadContainer> idTable;
+ private int bogusIdCount = 0;
+
+ /**
+ * The main threader entry point - The client passes in an array of Threadable objects, and
+ * the Threader constructs a connected 'graph' of messages
+ * @param messages
+ * @return
+ */
+ public Threadable thread(Threadable[] messages) {
+ if (messages == null)
+ return null;
+
+ idTable = new HashMap<String,ThreadContainer>();
+
+ // walk through each Threadable element
+ for (int i = 0; i < messages.length; ++i) {
+ if (!messages[i].isDummy())
+ buildContainer(messages[i]);
+ }
+
+ root = findRootSet();
+ idTable.clear();
+ idTable = null;
+
+ pruneEmptyContainers(root);
+
+ root.reverseChildren();
+ gatherSubjects();
+
+ if (root.next != null)
+ throw new RuntimeException("root node has a next:" + root);
+
+ for (ThreadContainer r = root.child; r != null; r = r.next) {
+ if (r.threadable == null)
+ r.threadable = r.child.threadable.makeDummy();
+ }
+
+ Threadable result = (root.child == null ? null : root.child.threadable);
+ root.flush();
+ root = null;
+
+ return result;
+ }
+
+ /**
+ *
+ * @param threadable
+ */
+ private void buildContainer(Threadable threadable) {
+ String id = threadable.messageThreadId();
+ ThreadContainer container = idTable.get(id);
+
+ // A ThreadContainer exists for this id already. This should be a forward reference, but may
+ // be a duplicate id, in which case we will need to generate a bogus placeholder id
+ if (container != null) {
+ if (container.threadable != null) { // oops! duplicate ids...
+ id = "<Bogus-id:" + (bogusIdCount++) + ">";
+ container = null;
+ } else {
+ // The container just contained a forward reference to this message, so let's
+ // fill in the threadable field of the container with this message
+ container.threadable = threadable;
+ }
+ }
+
+ // No container exists for that message Id. Create one and insert it into the hash table.
+ if (container == null) {
+ container = new ThreadContainer();
+ container.threadable = threadable;
+ idTable.put(id, container);
+ }
+
+ // Iterate through all of the references and create ThreadContainers for any references that
+ // don't have them.
+ ThreadContainer parentRef = null;
+ {
+ String[] references = threadable.messageThreadReferences();
+ for (int i = 0; i < references.length; ++i) {
+ String refString = references[i];
+ ThreadContainer ref = idTable.get(refString);
+
+ // if this id doesnt have a container, create one
+ if (ref == null) {
+ ref = new ThreadContainer();
+ idTable.put(refString, ref);
+ }
+
+ // Link references together in the order they appear in the References: header,
+ // IF they dont have a have a parent already &&
+ // IF it will not cause a circular reference
+ if ((parentRef != null)
+ && (ref.parent == null)
+ && (parentRef != ref)
+ && !(parentRef.findChild(ref))) {
+ // Link ref into the parent's child list
+ ref.parent = parentRef;
+ ref.next = parentRef.child;
+ parentRef.child = ref;
+ }
+ parentRef = ref;
+ }
+ }
+
+ // parentRef is now set to the container of the last element in the references field. make that
+ // be the parent of this container, unless doing so causes a circular reference
+ if (parentRef != null
+ && (parentRef == container || container.findChild(parentRef)))
+ parentRef = null;
+
+ // if it has a parent already, its because we saw this message in a References: field, and presumed
+ // a parent based on the other entries in that field. Now that we have the actual message, we can
+ // throw away the old parent and use this new one
+ if (container.parent != null) {
+ ThreadContainer rest, prev;
+
+ for (prev = null, rest = container.parent.child;
+ rest != null;
+ prev = rest, rest = rest.next) {
+ if (rest == container)
+ break;
+ }
+
+ if (rest == null) {
+ throw new RuntimeException(
+ "Didnt find "
+ + container
+ + " in parent"
+ + container.parent);
+ }
+
+ // Unlink this container from the parent's child list
+ if (prev == null)
+ container.parent.child = container.next;
+ else
+ prev.next = container.next;
+
+ container.next = null;
+ container.parent = null;
+ }
+
+ // If we have a parent, link container into the parents child list
+ if (parentRef != null) {
+ container.parent = parentRef;
+ container.next = parentRef.child;
+ parentRef.child = container;
+ }
+ }
+
+ /**
+ * Find the root set of all existing ThreadContainers
+ * @return root the ThreadContainer representing the root node
+ */
+ private ThreadContainer findRootSet() {
+ ThreadContainer root = new ThreadContainer();
+ Iterator<String> iter = idTable.keySet().iterator();
+
+ while (iter.hasNext()) {
+ Object key = iter.next();
+ ThreadContainer c = idTable.get(key);
+ if (c.parent == null) {
+ if (c.next != null)
+ throw new RuntimeException(
+ "c.next is " + c.next.toString());
+ c.next = root.child;
+ root.child = c;
+ }
+ }
+ return root;
+ }
+
+ /**
+ * Delete any empty or dummy ThreadContainers
+ * @param parent
+ */
+ private void pruneEmptyContainers(ThreadContainer parent) {
+ ThreadContainer container, prev, next;
+ for (prev = null, container = parent.child, next = container.next;
+ container != null;
+ prev = container,
+ container = next,
+ next = (container == null ? null : container.next)) {
+
+ // Is it empty and without any children? If so,delete it
+ if (container.threadable == null && container.child == null) {
+ if (prev == null)
+ parent.child = container.next;
+ else
+ prev.next = container.next;
+
+ // Set container to prev so that prev keeps its same value the next time through the loop
+ container = prev;
+ }
+
+ // Else if empty, with kids, and (not at root or only one kid)
+ else if (
+ container.threadable == null
+ && container.child != null
+ && (container.parent != null
+ || container.child.next == null)) {
+ // We have an invalid/expired message with kids. Promote the kids to this level.
+ ThreadContainer tail;
+ ThreadContainer kids = container.child;
+
+ // Remove this container and replace with 'kids'.
+ if (prev == null)
+ parent.child = kids;
+ else
+ prev.next = kids;
+
+ // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
+ // i.e. splice kids into the list in place of container
+ for (tail = kids; tail.next != null; tail = tail.next)
+ tail.parent = container.parent;
+
+ tail.parent = container.parent;
+ tail.next = container.next;
+
+ // next currently points to the item after the inserted items in the chain - reset that so we process the newly
+ // promoted items next time round
+ next = kids;
+
+ // Set container to prev so that prev keeps its same value the next time through the loop
+ container = prev;
+ } else if (container.child != null) {
+ // A real message , with kids
+ // Iterate over the children
+ pruneEmptyContainers(container);
+ }
+ }
+ }
+
+ /**
+ * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
+ */
+ private void gatherSubjects() {
+
+ int count = 0;
+
+ for (ThreadContainer c = root.child; c != null; c = c.next)
+ count++;
+
+ // TODO verify this will avoid rehashing
+ HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
+ count = 0;
+
+ for (ThreadContainer c = root.child; c != null; c = c.next) {
+ Threadable threadable = c.threadable;
+
+ // No threadable? If so, it is a dummy node in the root set.
+ // Only root set members may be dummies, and they alway have at least 2 kids
+ // Take the first kid as representative of the subject
+ if (threadable == null)
+ threadable = c.child.threadable;
+
+ String subj = threadable.simplifiedSubject();
+
+ if (subj == null || subj == "")
+ continue;
+
+ ThreadContainer old = subjectTable.get(subj);
+
+ // Add this container to the table iff:
+ // - There exists no container with this subject
+ // - or this is a dummy container and the old one is not - the dummy one is
+ // more interesting as a root, so put it in the table instead
+ // - The container in the table has a "Re:" version of this subject, and
+ // this container has a non-"Re:" version of this subject. The non-"Re:" version
+ // is the more interesting of the two.
+ if (old == null
+ || (c.threadable == null && old.threadable != null)
+ || (old.threadable != null
+ && old.threadable.subjectIsReply()
+ && c.threadable != null
+ && !c.threadable.subjectIsReply())) {
+ subjectTable.put(subj, c);
+ count++;
+ }
+ }
+
+ // If the table is empty, we're done
+ if (count == 0)
+ return;
+
+ // subjectTable is now populated with one entry for each subject which occurs in the
+ // root set. Iterate over the root set, and gather together the difference.
+ ThreadContainer prev, c, rest;
+ for (prev = null, c = root.child, rest = c.next;
+ c != null;
+ prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
+ Threadable threadable = c.threadable;
+
+ // is it a dummy node?
+ if (threadable == null)
+ threadable = c.child.threadable;
+
+ String subj = threadable.simplifiedSubject();
+
+ // Dont thread together all subjectless messages
+ if (subj == null || subj == "")
+ continue;
+
+ ThreadContainer old = subjectTable.get(subj);
+
+ if (old == c) // That's us
+ continue;
+
+ // We have now found another container in the root set with the same subject
+ // Remove the "second" message from the root set
+ if (prev == null)
+ root.child = c.next;
+ else
+ prev.next = c.next;
+ c.next = null;
+
+ if (old.threadable == null && c.threadable == null) {
+ // both dummies - merge them
+ ThreadContainer tail;
+ for (tail = old.child;
+ tail != null && tail.next != null;
+ tail = tail.next);
+
+ tail.next = c.child;
+
+ for (tail = c.child; tail != null; tail = tail.next)
+ tail.parent = old;
+
+ c.child = null;
+ } else if (
+ old.threadable == null
+ || (c.threadable != null
+ && c.threadable.subjectIsReply()
+ && !old.threadable.subjectIsReply())) {
+ // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
+ c.parent = old;
+ c.next = old.child;
+ old.child = c;
+ } else {
+ // else make the old and new messages be children of a new dummy container.
+ // We create a new container object for old.msg and empty the old container
+ ThreadContainer newc = new ThreadContainer();
+ newc.threadable = old.threadable;
+ newc.child = old.child;
+
+ for (ThreadContainer tail = newc.child;
+ tail != null;
+ tail = tail.next)
+ tail.parent = newc;
+
+ old.threadable = null;
+ old.child = null;
+
+ c.parent = old;
+ newc.parent = old;
+
+ // Old is now a dummy- give it 2 kids , c and newc
+ old.child = c;
+ c.next = newc;
+ }
+ // We've done a merge, so keep the same prev
+ c = prev;
+ }
+
+ subjectTable.clear();
+ subjectTable = null;
+
+ }
+}
+
+/**
+ * A placeholder utility class, used for constructing a tree of Threadables
+ * Originall implementation by Jamie Zawinski.
+ * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
+ * Threadable objects
+ * @author Rory Winston <rwinston@checkfree.com>
+ */
+class ThreadContainer {
+ Threadable threadable;
+ ThreadContainer parent;
+ ThreadContainer prev;
+ ThreadContainer next;
+ ThreadContainer child;
+
+ /**
+ *
+ * @param container
+ * @return true if child is under self's tree. Detects circular references
+ */
+ boolean findChild(ThreadContainer target) {
+ if (child == null)
+ return false;
+
+ else if (child == target)
+ return true;
+ else
+ return child.findChild(target);
+ }
+
+ // Copy the ThreadContainer tree structure down into the underlying Threadable objects
+ // (Make the Threadable tree look like the ThreadContainer tree)
+ void flush() {
+ if (parent != null && threadable == null)
+ throw new RuntimeException("no threadable in " + this.toString());
+
+ parent = null;
+
+ if (threadable != null)
+ threadable.setChild(child == null ? null : child.threadable);
+
+ if (child != null) {
+ child.flush();
+ child = null;
+ }
+
+ if (threadable != null)
+ threadable.setNext(next == null ? null : next.threadable);
+
+ if (next != null) {
+ next.flush();
+ next = null;
+ }
+
+ threadable = null;
+ }
+
+ /**
+ * Reverse the entire set of children
+ *
+ */
+ void reverseChildren() {
+ if (child != null) {
+ ThreadContainer kid, prev, rest;
+ for (prev = null, kid = child, rest = kid.next;
+ kid != null;
+ prev = kid,
+ kid = rest,
+ rest = (rest == null ? null : rest.next))
+ kid.next = prev;
+
+ child = prev;
+
+ // Do it for the kids
+ for (kid = child; kid != null; kid = kid.next)
+ kid.reverseChildren();
+ }
+ }
+}