001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018
019 package org.apache.commons.net.nntp;
020
021 /**
022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025 *
026 * @author rwinston <rwinston@checkfree.com>
027 *
028 */
029
030 import java.util.HashMap;
031 import java.util.Iterator;
032
033 public class Threader {
034 private ThreadContainer root;
035 private HashMap<String,ThreadContainer> idTable;
036 private int bogusIdCount = 0;
037
038 /**
039 * The main threader entry point - The client passes in an array of Threadable objects, and
040 * the Threader constructs a connected 'graph' of messages
041 * @param messages
042 * @return
043 */
044 public Threadable thread(Threadable[] messages) {
045 if (messages == null)
046 return null;
047
048 idTable = new HashMap<String,ThreadContainer>();
049
050 // walk through each Threadable element
051 for (int i = 0; i < messages.length; ++i) {
052 if (!messages[i].isDummy())
053 buildContainer(messages[i]);
054 }
055
056 root = findRootSet();
057 idTable.clear();
058 idTable = null;
059
060 pruneEmptyContainers(root);
061
062 root.reverseChildren();
063 gatherSubjects();
064
065 if (root.next != null)
066 throw new RuntimeException("root node has a next:" + root);
067
068 for (ThreadContainer r = root.child; r != null; r = r.next) {
069 if (r.threadable == null)
070 r.threadable = r.child.threadable.makeDummy();
071 }
072
073 Threadable result = (root.child == null ? null : root.child.threadable);
074 root.flush();
075 root = null;
076
077 return result;
078 }
079
080 /**
081 *
082 * @param threadable
083 */
084 private void buildContainer(Threadable threadable) {
085 String id = threadable.messageThreadId();
086 ThreadContainer container = idTable.get(id);
087
088 // A ThreadContainer exists for this id already. This should be a forward reference, but may
089 // be a duplicate id, in which case we will need to generate a bogus placeholder id
090 if (container != null) {
091 if (container.threadable != null) { // oops! duplicate ids...
092 id = "<Bogus-id:" + (bogusIdCount++) + ">";
093 container = null;
094 } else {
095 // The container just contained a forward reference to this message, so let's
096 // fill in the threadable field of the container with this message
097 container.threadable = threadable;
098 }
099 }
100
101 // No container exists for that message Id. Create one and insert it into the hash table.
102 if (container == null) {
103 container = new ThreadContainer();
104 container.threadable = threadable;
105 idTable.put(id, container);
106 }
107
108 // Iterate through all of the references and create ThreadContainers for any references that
109 // don't have them.
110 ThreadContainer parentRef = null;
111 {
112 String[] references = threadable.messageThreadReferences();
113 for (int i = 0; i < references.length; ++i) {
114 String refString = references[i];
115 ThreadContainer ref = idTable.get(refString);
116
117 // if this id doesnt have a container, create one
118 if (ref == null) {
119 ref = new ThreadContainer();
120 idTable.put(refString, ref);
121 }
122
123 // Link references together in the order they appear in the References: header,
124 // IF they dont have a have a parent already &&
125 // IF it will not cause a circular reference
126 if ((parentRef != null)
127 && (ref.parent == null)
128 && (parentRef != ref)
129 && !(parentRef.findChild(ref))) {
130 // Link ref into the parent's child list
131 ref.parent = parentRef;
132 ref.next = parentRef.child;
133 parentRef.child = ref;
134 }
135 parentRef = ref;
136 }
137 }
138
139 // parentRef is now set to the container of the last element in the references field. make that
140 // be the parent of this container, unless doing so causes a circular reference
141 if (parentRef != null
142 && (parentRef == container || container.findChild(parentRef)))
143 parentRef = null;
144
145 // if it has a parent already, its because we saw this message in a References: field, and presumed
146 // a parent based on the other entries in that field. Now that we have the actual message, we can
147 // throw away the old parent and use this new one
148 if (container.parent != null) {
149 ThreadContainer rest, prev;
150
151 for (prev = null, rest = container.parent.child;
152 rest != null;
153 prev = rest, rest = rest.next) {
154 if (rest == container)
155 break;
156 }
157
158 if (rest == null) {
159 throw new RuntimeException(
160 "Didnt find "
161 + container
162 + " in parent"
163 + container.parent);
164 }
165
166 // Unlink this container from the parent's child list
167 if (prev == null)
168 container.parent.child = container.next;
169 else
170 prev.next = container.next;
171
172 container.next = null;
173 container.parent = null;
174 }
175
176 // If we have a parent, link container into the parents child list
177 if (parentRef != null) {
178 container.parent = parentRef;
179 container.next = parentRef.child;
180 parentRef.child = container;
181 }
182 }
183
184 /**
185 * Find the root set of all existing ThreadContainers
186 * @return root the ThreadContainer representing the root node
187 */
188 private ThreadContainer findRootSet() {
189 ThreadContainer root = new ThreadContainer();
190 Iterator<String> iter = idTable.keySet().iterator();
191
192 while (iter.hasNext()) {
193 Object key = iter.next();
194 ThreadContainer c = idTable.get(key);
195 if (c.parent == null) {
196 if (c.next != null)
197 throw new RuntimeException(
198 "c.next is " + c.next.toString());
199 c.next = root.child;
200 root.child = c;
201 }
202 }
203 return root;
204 }
205
206 /**
207 * Delete any empty or dummy ThreadContainers
208 * @param parent
209 */
210 private void pruneEmptyContainers(ThreadContainer parent) {
211 ThreadContainer container, prev, next;
212 for (prev = null, container = parent.child, next = container.next;
213 container != null;
214 prev = container,
215 container = next,
216 next = (container == null ? null : container.next)) {
217
218 // Is it empty and without any children? If so,delete it
219 if (container.threadable == null && container.child == null) {
220 if (prev == null)
221 parent.child = container.next;
222 else
223 prev.next = container.next;
224
225 // Set container to prev so that prev keeps its same value the next time through the loop
226 container = prev;
227 }
228
229 // Else if empty, with kids, and (not at root or only one kid)
230 else if (
231 container.threadable == null
232 && container.child != null
233 && (container.parent != null
234 || container.child.next == null)) {
235 // We have an invalid/expired message with kids. Promote the kids to this level.
236 ThreadContainer tail;
237 ThreadContainer kids = container.child;
238
239 // Remove this container and replace with 'kids'.
240 if (prev == null)
241 parent.child = kids;
242 else
243 prev.next = kids;
244
245 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
246 // i.e. splice kids into the list in place of container
247 for (tail = kids; tail.next != null; tail = tail.next)
248 tail.parent = container.parent;
249
250 tail.parent = container.parent;
251 tail.next = container.next;
252
253 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
254 // promoted items next time round
255 next = kids;
256
257 // Set container to prev so that prev keeps its same value the next time through the loop
258 container = prev;
259 } else if (container.child != null) {
260 // A real message , with kids
261 // Iterate over the children
262 pruneEmptyContainers(container);
263 }
264 }
265 }
266
267 /**
268 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
269 */
270 private void gatherSubjects() {
271
272 int count = 0;
273
274 for (ThreadContainer c = root.child; c != null; c = c.next)
275 count++;
276
277 // TODO verify this will avoid rehashing
278 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
279 count = 0;
280
281 for (ThreadContainer c = root.child; c != null; c = c.next) {
282 Threadable threadable = c.threadable;
283
284 // No threadable? If so, it is a dummy node in the root set.
285 // Only root set members may be dummies, and they alway have at least 2 kids
286 // Take the first kid as representative of the subject
287 if (threadable == null)
288 threadable = c.child.threadable;
289
290 String subj = threadable.simplifiedSubject();
291
292 if (subj == null || subj == "")
293 continue;
294
295 ThreadContainer old = subjectTable.get(subj);
296
297 // Add this container to the table iff:
298 // - There exists no container with this subject
299 // - or this is a dummy container and the old one is not - the dummy one is
300 // more interesting as a root, so put it in the table instead
301 // - The container in the table has a "Re:" version of this subject, and
302 // this container has a non-"Re:" version of this subject. The non-"Re:" version
303 // is the more interesting of the two.
304 if (old == null
305 || (c.threadable == null && old.threadable != null)
306 || (old.threadable != null
307 && old.threadable.subjectIsReply()
308 && c.threadable != null
309 && !c.threadable.subjectIsReply())) {
310 subjectTable.put(subj, c);
311 count++;
312 }
313 }
314
315 // If the table is empty, we're done
316 if (count == 0)
317 return;
318
319 // subjectTable is now populated with one entry for each subject which occurs in the
320 // root set. Iterate over the root set, and gather together the difference.
321 ThreadContainer prev, c, rest;
322 for (prev = null, c = root.child, rest = c.next;
323 c != null;
324 prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
325 Threadable threadable = c.threadable;
326
327 // is it a dummy node?
328 if (threadable == null)
329 threadable = c.child.threadable;
330
331 String subj = threadable.simplifiedSubject();
332
333 // Dont thread together all subjectless messages
334 if (subj == null || subj == "")
335 continue;
336
337 ThreadContainer old = subjectTable.get(subj);
338
339 if (old == c) // That's us
340 continue;
341
342 // We have now found another container in the root set with the same subject
343 // Remove the "second" message from the root set
344 if (prev == null)
345 root.child = c.next;
346 else
347 prev.next = c.next;
348 c.next = null;
349
350 if (old.threadable == null && c.threadable == null) {
351 // both dummies - merge them
352 ThreadContainer tail;
353 for (tail = old.child;
354 tail != null && tail.next != null;
355 tail = tail.next);
356
357 tail.next = c.child;
358
359 for (tail = c.child; tail != null; tail = tail.next)
360 tail.parent = old;
361
362 c.child = null;
363 } else if (
364 old.threadable == null
365 || (c.threadable != null
366 && c.threadable.subjectIsReply()
367 && !old.threadable.subjectIsReply())) {
368 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
369 c.parent = old;
370 c.next = old.child;
371 old.child = c;
372 } else {
373 // else make the old and new messages be children of a new dummy container.
374 // We create a new container object for old.msg and empty the old container
375 ThreadContainer newc = new ThreadContainer();
376 newc.threadable = old.threadable;
377 newc.child = old.child;
378
379 for (ThreadContainer tail = newc.child;
380 tail != null;
381 tail = tail.next)
382 tail.parent = newc;
383
384 old.threadable = null;
385 old.child = null;
386
387 c.parent = old;
388 newc.parent = old;
389
390 // Old is now a dummy- give it 2 kids , c and newc
391 old.child = c;
392 c.next = newc;
393 }
394 // We've done a merge, so keep the same prev
395 c = prev;
396 }
397
398 subjectTable.clear();
399 subjectTable = null;
400
401 }
402 }
403
404 /**
405 * A placeholder utility class, used for constructing a tree of Threadables
406 * Originall implementation by Jamie Zawinski.
407 * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a>
408 * Threadable objects
409 * @author Rory Winston <rwinston@checkfree.com>
410 */
411 class ThreadContainer {
412 Threadable threadable;
413 ThreadContainer parent;
414 ThreadContainer prev;
415 ThreadContainer next;
416 ThreadContainer child;
417
418 /**
419 *
420 * @param container
421 * @return true if child is under self's tree. Detects circular references
422 */
423 boolean findChild(ThreadContainer target) {
424 if (child == null)
425 return false;
426
427 else if (child == target)
428 return true;
429 else
430 return child.findChild(target);
431 }
432
433 // Copy the ThreadContainer tree structure down into the underlying Threadable objects
434 // (Make the Threadable tree look like the ThreadContainer tree)
435 void flush() {
436 if (parent != null && threadable == null)
437 throw new RuntimeException("no threadable in " + this.toString());
438
439 parent = null;
440
441 if (threadable != null)
442 threadable.setChild(child == null ? null : child.threadable);
443
444 if (child != null) {
445 child.flush();
446 child = null;
447 }
448
449 if (threadable != null)
450 threadable.setNext(next == null ? null : next.threadable);
451
452 if (next != null) {
453 next.flush();
454 next = null;
455 }
456
457 threadable = null;
458 }
459
460 /**
461 * Reverse the entire set of children
462 *
463 */
464 void reverseChildren() {
465 if (child != null) {
466 ThreadContainer kid, prev, rest;
467 for (prev = null, kid = child, rest = kid.next;
468 kid != null;
469 prev = kid,
470 kid = rest,
471 rest = (rest == null ? null : rest.next))
472 kid.next = prev;
473
474 child = prev;
475
476 // Do it for the kids
477 for (kid = child; kid != null; kid = kid.next)
478 kid.reverseChildren();
479 }
480 }
481 }