View Javadoc
1   /*
2    * Diff Match and Patch
3    *
4    * Copyright 2006 Google Inc.
5    * http://code.google.com/p/google-diff-match-patch/
6    *
7    * Licensed under the Apache License, Version 2.0 (the "License");
8    * you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package name.fraser.neil.plaintext;
21  
22  import java.io.UnsupportedEncodingException;
23  import java.net.URLDecoder;
24  import java.net.URLEncoder;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.HashMap;
28  import java.util.LinkedList;
29  import java.util.List;
30  import java.util.ListIterator;
31  import java.util.Map;
32  import java.util.Stack;
33  import java.util.regex.Matcher;
34  import java.util.regex.Pattern;
35  
36  /*
37   * Functions for diff, match and patch.
38   * Computes the difference between two texts to create a patch.
39   * Applies the patch onto another text, allowing for errors.
40   *
41   * @author fraser@google.com (Neil Fraser)
42   */
43  
44  /**
45   * Class containing the diff, match and patch methods.
46   * Also contains the behaviour settings.
47   */
48  public class diff_match_patch {
49  
50    // Defaults.
51    // Set these on your diff_match_patch instance to override the defaults.
52  
53    /**
54     * Number of seconds to map a diff before giving up (0 for infinity).
55     */
56    public float Diff_Timeout = 1.0f;
57    /**
58     * Cost of an empty edit operation in terms of edit characters.
59     */
60    public short Diff_EditCost = 4;
61    /**
62     * At what point is no match declared (0.0 = perfection, 1.0 = very loose).
63     */
64    public float Match_Threshold = 0.5f;
65    /**
66     * How far to search for a match (0 = exact location, 1000+ = broad match).
67     * A match this many characters away from the expected location will add
68     * 1.0 to the score (0.0 is a perfect match).
69     */
70    public int Match_Distance = 1000;
71    /**
72     * When deleting a large block of text (over ~64 characters), how close do
73     * the contents have to be to match the expected contents. (0.0 = perfection,
74     * 1.0 = very loose).  Note that Match_Threshold controls how closely the
75     * end points of a delete need to match.
76     */
77    public float Patch_DeleteThreshold = 0.5f;
78    /**
79     * Chunk size for context length.
80     */
81    public short Patch_Margin = 4;
82  
83    /**
84     * The number of bits in an int.
85     */
86    private short Match_MaxBits = 32;
87  
88    /**
89     * Internal class for returning results from diff_linesToChars().
90     * Other less paranoid languages just use a three-element array.
91     */
92    protected static class LinesToCharsResult {
93      protected String chars1;
94      protected String chars2;
95      protected List<String> lineArray;
96  
97      protected LinesToCharsResult(String chars1, String chars2,
98          List<String> lineArray) {
99        this.chars1 = chars1;
100       this.chars2 = chars2;
101       this.lineArray = lineArray;
102     }
103   }
104 
105 
106   //  DIFF FUNCTIONS
107 
108 
109   /**
110    * The data structure representing a diff is a Linked list of Diff objects:
111    * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"),
112    *  Diff(Operation.EQUAL, " world.")}
113    * which means: delete "Hello", add "Goodbye" and keep " world."
114    */
115   public enum Operation {
116     DELETE, INSERT, EQUAL
117   }
118 
119   /**
120    * Find the differences between two texts.
121    * Run a faster, slightly less optimal diff.
122    * This method allows the 'checklines' of diff_main() to be optional.
123    * Most of the time checklines is wanted, so default to true.
124    * @param text1 Old string to be diffed.
125    * @param text2 New string to be diffed.
126    * @return Linked List of Diff objects.
127    */
128   public LinkedList<Diff> diff_main(String text1, String text2) {
129     return diff_main(text1, text2, true);
130   }
131 
132   /**
133    * Find the differences between two texts.
134    * @param text1 Old string to be diffed.
135    * @param text2 New string to be diffed.
136    * @param checklines Speedup flag.  If false, then don't run a
137    *     line-level diff first to identify the changed areas.
138    *     If true, then run a faster slightly less optimal diff.
139    * @return Linked List of Diff objects.
140    */
141   public LinkedList<Diff> diff_main(String text1, String text2,
142                                     boolean checklines) {
143     // Set a deadline by which time the diff must be complete.
144     long deadline;
145     if (Diff_Timeout <= 0) {
146       deadline = Long.MAX_VALUE;
147     } else {
148       deadline = System.currentTimeMillis() + (long) (Diff_Timeout * 1000);
149     }
150     return diff_main(text1, text2, checklines, deadline);
151   }
152 
153   /**
154    * Find the differences between two texts.  Simplifies the problem by
155    * stripping any common prefix or suffix off the texts before diffing.
156    * @param text1 Old string to be diffed.
157    * @param text2 New string to be diffed.
158    * @param checklines Speedup flag.  If false, then don't run a
159    *     line-level diff first to identify the changed areas.
160    *     If true, then run a faster slightly less optimal diff.
161    * @param deadline Time when the diff should be complete by.  Used
162    *     internally for recursive calls.  Users should set DiffTimeout instead.
163    * @return Linked List of Diff objects.
164    */
165   private LinkedList<Diff> diff_main(String text1, String text2,
166                                      boolean checklines, long deadline) {
167     // Check for null inputs.
168     if (text1 == null || text2 == null) {
169       throw new IllegalArgumentException("Null inputs. (diff_main)");
170     }
171 
172     // Check for equality (speedup).
173     LinkedList<Diff> diffs;
174     if (text1.equals(text2)) {
175       diffs = new LinkedList<Diff>();
176       if (text1.length() != 0) {
177         diffs.add(new Diff(Operation.EQUAL, text1));
178       }
179       return diffs;
180     }
181 
182     // Trim off common prefix (speedup).
183     int commonlength = diff_commonPrefix(text1, text2);
184     String commonprefix = text1.substring(0, commonlength);
185     text1 = text1.substring(commonlength);
186     text2 = text2.substring(commonlength);
187 
188     // Trim off common suffix (speedup).
189     commonlength = diff_commonSuffix(text1, text2);
190     String commonsuffix = text1.substring(text1.length() - commonlength);
191     text1 = text1.substring(0, text1.length() - commonlength);
192     text2 = text2.substring(0, text2.length() - commonlength);
193 
194     // Compute the diff on the middle block.
195     diffs = diff_compute(text1, text2, checklines, deadline);
196 
197     // Restore the prefix and suffix.
198     if (commonprefix.length() != 0) {
199       diffs.addFirst(new Diff(Operation.EQUAL, commonprefix));
200     }
201     if (commonsuffix.length() != 0) {
202       diffs.addLast(new Diff(Operation.EQUAL, commonsuffix));
203     }
204 
205     diff_cleanupMerge(diffs);
206     return diffs;
207   }
208 
209   /**
210    * Find the differences between two texts.  Assumes that the texts do not
211    * have any common prefix or suffix.
212    * @param text1 Old string to be diffed.
213    * @param text2 New string to be diffed.
214    * @param checklines Speedup flag.  If false, then don't run a
215    *     line-level diff first to identify the changed areas.
216    *     If true, then run a faster slightly less optimal diff.
217    * @param deadline Time when the diff should be complete by.
218    * @return Linked List of Diff objects.
219    */
220   private LinkedList<Diff> diff_compute(String text1, String text2,
221                                         boolean checklines, long deadline) {
222     LinkedList<Diff> diffs = new LinkedList<Diff>();
223 
224     if (text1.length() == 0) {
225       // Just add some text (speedup).
226       diffs.add(new Diff(Operation.INSERT, text2));
227       return diffs;
228     }
229 
230     if (text2.length() == 0) {
231       // Just delete some text (speedup).
232       diffs.add(new Diff(Operation.DELETE, text1));
233       return diffs;
234     }
235 
236     String longtext = text1.length() > text2.length() ? text1 : text2;
237     String shorttext = text1.length() > text2.length() ? text2 : text1;
238     int i = longtext.indexOf(shorttext);
239     if (i != -1) {
240       // Shorter text is inside the longer text (speedup).
241       Operation op = (text1.length() > text2.length()) ?
242                      Operation.DELETE : Operation.INSERT;
243       diffs.add(new Diff(op, longtext.substring(0, i)));
244       diffs.add(new Diff(Operation.EQUAL, shorttext));
245       diffs.add(new Diff(op, longtext.substring(i + shorttext.length())));
246       return diffs;
247     }
248 
249     if (shorttext.length() == 1) {
250       // Single character string.
251       // After the previous speedup, the character can't be an equality.
252       diffs.add(new Diff(Operation.DELETE, text1));
253       diffs.add(new Diff(Operation.INSERT, text2));
254       return diffs;
255     }
256 
257     // Check to see if the problem can be split in two.
258     String[] hm = diff_halfMatch(text1, text2);
259     if (hm != null) {
260       // A half-match was found, sort out the return data.
261       String text1_a = hm[0];
262       String text1_b = hm[1];
263       String text2_a = hm[2];
264       String text2_b = hm[3];
265       String mid_common = hm[4];
266       // Send both pairs off for separate processing.
267       LinkedList<Diff> diffs_a = diff_main(text1_a, text2_a,
268                                            checklines, deadline);
269       LinkedList<Diff> diffs_b = diff_main(text1_b, text2_b,
270                                            checklines, deadline);
271       // Merge the results.
272       diffs = diffs_a;
273       diffs.add(new Diff(Operation.EQUAL, mid_common));
274       diffs.addAll(diffs_b);
275       return diffs;
276     }
277 
278     if (checklines && text1.length() > 100 && text2.length() > 100) {
279       return diff_lineMode(text1, text2, deadline);
280     }
281 
282     return diff_bisect(text1, text2, deadline);
283   }
284 
285   /**
286    * Do a quick line-level diff on both strings, then rediff the parts for
287    * greater accuracy.
288    * This speedup can produce non-minimal diffs.
289    * @param text1 Old string to be diffed.
290    * @param text2 New string to be diffed.
291    * @param deadline Time when the diff should be complete by.
292    * @return Linked List of Diff objects.
293    */
294   private LinkedList<Diff> diff_lineMode(String text1, String text2,
295                                          long deadline) {
296     // Scan the text on a line-by-line basis first.
297     LinesToCharsResult b = diff_linesToChars(text1, text2);
298     text1 = b.chars1;
299     text2 = b.chars2;
300     List<String> linearray = b.lineArray;
301 
302     LinkedList<Diff> diffs = diff_main(text1, text2, false, deadline);
303 
304     // Convert the diff back to original text.
305     diff_charsToLines(diffs, linearray);
306     // Eliminate freak matches (e.g. blank lines)
307     diff_cleanupSemantic(diffs);
308 
309     // Rediff any replacement blocks, this time character-by-character.
310     // Add a dummy entry at the end.
311     diffs.add(new Diff(Operation.EQUAL, ""));
312     int count_delete = 0;
313     int count_insert = 0;
314     String text_delete = "";
315     String text_insert = "";
316     ListIterator<Diff> pointer = diffs.listIterator();
317     Diff thisDiff = pointer.next();
318     while (thisDiff != null) {
319       switch (thisDiff.operation) {
320       case INSERT:
321         count_insert++;
322         text_insert += thisDiff.text;
323         break;
324       case DELETE:
325         count_delete++;
326         text_delete += thisDiff.text;
327         break;
328       case EQUAL:
329         // Upon reaching an equality, check for prior redundancies.
330         if (count_delete >= 1 && count_insert >= 1) {
331           // Delete the offending records and add the merged ones.
332           pointer.previous();
333           for (int j = 0; j < count_delete + count_insert; j++) {
334             pointer.previous();
335             pointer.remove();
336           }
337           for (Diff newDiff : diff_main(text_delete, text_insert, false,
338               deadline)) {
339             pointer.add(newDiff);
340           }
341         }
342         count_insert = 0;
343         count_delete = 0;
344         text_delete = "";
345         text_insert = "";
346         break;
347       }
348       thisDiff = pointer.hasNext() ? pointer.next() : null;
349     }
350     diffs.removeLast();  // Remove the dummy entry at the end.
351 
352     return diffs;
353   }
354 
355   /**
356    * Find the 'middle snake' of a diff, split the problem in two
357    * and return the recursively constructed diff.
358    * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
359    * @param text1 Old string to be diffed.
360    * @param text2 New string to be diffed.
361    * @param deadline Time at which to bail if not yet complete.
362    * @return LinkedList of Diff objects.
363    */
364   protected LinkedList<Diff> diff_bisect(String text1, String text2,
365       long deadline) {
366     // Cache the text lengths to prevent multiple calls.
367     int text1_length = text1.length();
368     int text2_length = text2.length();
369     int max_d = (text1_length + text2_length + 1) / 2;
370     int v_offset = max_d;
371     int v_length = 2 * max_d;
372     int[] v1 = new int[v_length];
373     int[] v2 = new int[v_length];
374     for (int x = 0; x < v_length; x++) {
375       v1[x] = -1;
376       v2[x] = -1;
377     }
378     v1[v_offset + 1] = 0;
379     v2[v_offset + 1] = 0;
380     int delta = text1_length - text2_length;
381     // If the total number of characters is odd, then the front path will
382     // collide with the reverse path.
383     boolean front = (delta % 2 != 0);
384     // Offsets for start and end of k loop.
385     // Prevents mapping of space beyond the grid.
386     int k1start = 0;
387     int k1end = 0;
388     int k2start = 0;
389     int k2end = 0;
390     for (int d = 0; d < max_d; d++) {
391       // Bail out if deadline is reached.
392       if (System.currentTimeMillis() > deadline) {
393         break;
394       }
395 
396       // Walk the front path one step.
397       for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) {
398         int k1_offset = v_offset + k1;
399         int x1;
400         if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) {
401           x1 = v1[k1_offset + 1];
402         } else {
403           x1 = v1[k1_offset - 1] + 1;
404         }
405         int y1 = x1 - k1;
406         while (x1 < text1_length && y1 < text2_length
407                && text1.charAt(x1) == text2.charAt(y1)) {
408           x1++;
409           y1++;
410         }
411         v1[k1_offset] = x1;
412         if (x1 > text1_length) {
413           // Ran off the right of the graph.
414           k1end += 2;
415         } else if (y1 > text2_length) {
416           // Ran off the bottom of the graph.
417           k1start += 2;
418         } else if (front) {
419           int k2_offset = v_offset + delta - k1;
420           if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) {
421             // Mirror x2 onto top-left coordinate system.
422             int x2 = text1_length - v2[k2_offset];
423             if (x1 >= x2) {
424               // Overlap detected.
425               return diff_bisectSplit(text1, text2, x1, y1, deadline);
426             }
427           }
428         }
429       }
430 
431       // Walk the reverse path one step.
432       for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) {
433         int k2_offset = v_offset + k2;
434         int x2;
435         if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) {
436           x2 = v2[k2_offset + 1];
437         } else {
438           x2 = v2[k2_offset - 1] + 1;
439         }
440         int y2 = x2 - k2;
441         while (x2 < text1_length && y2 < text2_length
442                && text1.charAt(text1_length - x2 - 1)
443                == text2.charAt(text2_length - y2 - 1)) {
444           x2++;
445           y2++;
446         }
447         v2[k2_offset] = x2;
448         if (x2 > text1_length) {
449           // Ran off the left of the graph.
450           k2end += 2;
451         } else if (y2 > text2_length) {
452           // Ran off the top of the graph.
453           k2start += 2;
454         } else if (!front) {
455           int k1_offset = v_offset + delta - k2;
456           if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) {
457             int x1 = v1[k1_offset];
458             int y1 = v_offset + x1 - k1_offset;
459             // Mirror x2 onto top-left coordinate system.
460             x2 = text1_length - x2;
461             if (x1 >= x2) {
462               // Overlap detected.
463               return diff_bisectSplit(text1, text2, x1, y1, deadline);
464             }
465           }
466         }
467       }
468     }
469     // Diff took too long and hit the deadline or
470     // number of diffs equals number of characters, no commonality at all.
471     LinkedList<Diff> diffs = new LinkedList<Diff>();
472     diffs.add(new Diff(Operation.DELETE, text1));
473     diffs.add(new Diff(Operation.INSERT, text2));
474     return diffs;
475   }
476 
477   /**
478    * Given the location of the 'middle snake', split the diff in two parts
479    * and recurse.
480    * @param text1 Old string to be diffed.
481    * @param text2 New string to be diffed.
482    * @param x Index of split point in text1.
483    * @param y Index of split point in text2.
484    * @param deadline Time at which to bail if not yet complete.
485    * @return LinkedList of Diff objects.
486    */
487   private LinkedList<Diff> diff_bisectSplit(String text1, String text2,
488                                             int x, int y, long deadline) {
489     String text1a = text1.substring(0, x);
490     String text2a = text2.substring(0, y);
491     String text1b = text1.substring(x);
492     String text2b = text2.substring(y);
493 
494     // Compute both diffs serially.
495     LinkedList<Diff> diffs = diff_main(text1a, text2a, false, deadline);
496     LinkedList<Diff> diffsb = diff_main(text1b, text2b, false, deadline);
497 
498     diffs.addAll(diffsb);
499     return diffs;
500   }
501 
502   /**
503    * Split two texts into a list of strings.  Reduce the texts to a string of
504    * hashes where each Unicode character represents one line.
505    * @param text1 First string.
506    * @param text2 Second string.
507    * @return An object containing the encoded text1, the encoded text2 and
508    *     the List of unique strings.  The zeroth element of the List of
509    *     unique strings is intentionally blank.
510    */
511   protected LinesToCharsResult diff_linesToChars(String text1, String text2) {
512     List<String> lineArray = new ArrayList<String>();
513     Map<String, Integer> lineHash = new HashMap<String, Integer>();
514     // e.g. linearray[4] == "Hello\n"
515     // e.g. linehash.get("Hello\n") == 4
516 
517     // "\x00" is a valid character, but various debuggers don't like it.
518     // So we'll insert a junk entry to avoid generating a null character.
519     lineArray.add("");
520 
521     String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash);
522     String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash);
523     return new LinesToCharsResult(chars1, chars2, lineArray);
524   }
525 
526   /**
527    * Split a text into a list of strings.  Reduce the texts to a string of
528    * hashes where each Unicode character represents one line.
529    * @param text String to encode.
530    * @param lineArray List of unique strings.
531    * @param lineHash Map of strings to indices.
532    * @return Encoded string.
533    */
534   private String diff_linesToCharsMunge(String text, List<String> lineArray,
535                                         Map<String, Integer> lineHash) {
536     int lineStart = 0;
537     int lineEnd = -1;
538     String line;
539     StringBuilder chars = new StringBuilder();
540     // Walk the text, pulling out a substring for each line.
541     // text.split('\n') would would temporarily double our memory footprint.
542     // Modifying text would create many large strings to garbage collect.
543     while (lineEnd < text.length() - 1) {
544       lineEnd = text.indexOf('\n', lineStart);
545       if (lineEnd == -1) {
546         lineEnd = text.length() - 1;
547       }
548       line = text.substring(lineStart, lineEnd + 1);
549       lineStart = lineEnd + 1;
550 
551       if (lineHash.containsKey(line)) {
552         chars.append(String.valueOf((char) (int) lineHash.get(line)));
553       } else {
554         lineArray.add(line);
555         lineHash.put(line, lineArray.size() - 1);
556         chars.append(String.valueOf((char) (lineArray.size() - 1)));
557       }
558     }
559     return chars.toString();
560   }
561 
562   /**
563    * Rehydrate the text in a diff from a string of line hashes to real lines of
564    * text.
565    * @param diffs LinkedList of Diff objects.
566    * @param lineArray List of unique strings.
567    */
568   protected void diff_charsToLines(LinkedList<Diff> diffs,
569                                   List<String> lineArray) {
570     StringBuilder text;
571     for (Diff diff : diffs) {
572       text = new StringBuilder();
573       for (int y = 0; y < diff.text.length(); y++) {
574         text.append(lineArray.get(diff.text.charAt(y)));
575       }
576       diff.text = text.toString();
577     }
578   }
579 
580   /**
581    * Determine the common prefix of two strings
582    * @param text1 First string.
583    * @param text2 Second string.
584    * @return The number of characters common to the start of each string.
585    */
586   public int diff_commonPrefix(String text1, String text2) {
587     // Performance analysis: http://neil.fraser.name/news/2007/10/09/
588     int n = Math.min(text1.length(), text2.length());
589     for (int i = 0; i < n; i++) {
590       if (text1.charAt(i) != text2.charAt(i)) {
591         return i;
592       }
593     }
594     return n;
595   }
596 
597   /**
598    * Determine the common suffix of two strings
599    * @param text1 First string.
600    * @param text2 Second string.
601    * @return The number of characters common to the end of each string.
602    */
603   public int diff_commonSuffix(String text1, String text2) {
604     // Performance analysis: http://neil.fraser.name/news/2007/10/09/
605     int text1_length = text1.length();
606     int text2_length = text2.length();
607     int n = Math.min(text1_length, text2_length);
608     for (int i = 1; i <= n; i++) {
609       if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) {
610         return i - 1;
611       }
612     }
613     return n;
614   }
615 
616   /**
617    * Determine if the suffix of one string is the prefix of another.
618    * @param text1 First string.
619    * @param text2 Second string.
620    * @return The number of characters common to the end of the first
621    *     string and the start of the second string.
622    */
623   protected int diff_commonOverlap(String text1, String text2) {
624     // Cache the text lengths to prevent multiple calls.
625     int text1_length = text1.length();
626     int text2_length = text2.length();
627     // Eliminate the null case.
628     if (text1_length == 0 || text2_length == 0) {
629       return 0;
630     }
631     // Truncate the longer string.
632     if (text1_length > text2_length) {
633       text1 = text1.substring(text1_length - text2_length);
634     } else if (text1_length < text2_length) {
635       text2 = text2.substring(0, text1_length);
636     }
637     int text_length = Math.min(text1_length, text2_length);
638     // Quick check for the worst case.
639     if (text1.equals(text2)) {
640       return text_length;
641     }
642 
643     // Start by looking for a single character match
644     // and increase length until no match is found.
645     // Performance analysis: http://neil.fraser.name/news/2010/11/04/
646     int best = 0;
647     int length = 1;
648     while (true) {
649       String pattern = text1.substring(text_length - length);
650       int found = text2.indexOf(pattern);
651       if (found == -1) {
652         return best;
653       }
654       length += found;
655       if (found == 0 || text1.substring(text_length - length).equals(
656           text2.substring(0, length))) {
657         best = length;
658         length++;
659       }
660     }
661   }
662 
663   /**
664    * Do the two texts share a substring which is at least half the length of
665    * the longer text?
666    * This speedup can produce non-minimal diffs.
667    * @param text1 First string.
668    * @param text2 Second string.
669    * @return Five element String array, containing the prefix of text1, the
670    *     suffix of text1, the prefix of text2, the suffix of text2 and the
671    *     common middle.  Or null if there was no match.
672    */
673   protected String[] diff_halfMatch(String text1, String text2) {
674     if (Diff_Timeout <= 0) {
675       // Don't risk returning a non-optimal diff if we have unlimited time.
676       return null;
677     }
678     String longtext = text1.length() > text2.length() ? text1 : text2;
679     String shorttext = text1.length() > text2.length() ? text2 : text1;
680     if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) {
681       return null;  // Pointless.
682     }
683 
684     // First check if the second quarter is the seed for a half-match.
685     String[] hm1 = diff_halfMatchI(longtext, shorttext,
686                                    (longtext.length() + 3) / 4);
687     // Check again based on the third quarter.
688     String[] hm2 = diff_halfMatchI(longtext, shorttext,
689                                    (longtext.length() + 1) / 2);
690     String[] hm;
691     if (hm1 == null && hm2 == null) {
692       return null;
693     } else if (hm2 == null) {
694       hm = hm1;
695     } else if (hm1 == null) {
696       hm = hm2;
697     } else {
698       // Both matched.  Select the longest.
699       hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2;
700     }
701 
702     // A half-match was found, sort out the return data.
703     if (text1.length() > text2.length()) {
704       return hm;
705       //return new String[]{hm[0], hm[1], hm[2], hm[3], hm[4]};
706     } else {
707       return new String[]{hm[2], hm[3], hm[0], hm[1], hm[4]};
708     }
709   }
710 
711   /**
712    * Does a substring of shorttext exist within longtext such that the
713    * substring is at least half the length of longtext?
714    * @param longtext Longer string.
715    * @param shorttext Shorter string.
716    * @param i Start index of quarter length substring within longtext.
717    * @return Five element String array, containing the prefix of longtext, the
718    *     suffix of longtext, the prefix of shorttext, the suffix of shorttext
719    *     and the common middle.  Or null if there was no match.
720    */
721   private String[] diff_halfMatchI(String longtext, String shorttext, int i) {
722     // Start with a 1/4 length substring at position i as a seed.
723     String seed = longtext.substring(i, i + longtext.length() / 4);
724     int j = -1;
725     String best_common = "";
726     String best_longtext_a = "", best_longtext_b = "";
727     String best_shorttext_a = "", best_shorttext_b = "";
728     while ((j = shorttext.indexOf(seed, j + 1)) != -1) {
729       int prefixLength = diff_commonPrefix(longtext.substring(i),
730                                            shorttext.substring(j));
731       int suffixLength = diff_commonSuffix(longtext.substring(0, i),
732                                            shorttext.substring(0, j));
733       if (best_common.length() < suffixLength + prefixLength) {
734         best_common = shorttext.substring(j - suffixLength, j)
735             + shorttext.substring(j, j + prefixLength);
736         best_longtext_a = longtext.substring(0, i - suffixLength);
737         best_longtext_b = longtext.substring(i + prefixLength);
738         best_shorttext_a = shorttext.substring(0, j - suffixLength);
739         best_shorttext_b = shorttext.substring(j + prefixLength);
740       }
741     }
742     if (best_common.length() * 2 >= longtext.length()) {
743       return new String[]{best_longtext_a, best_longtext_b,
744                           best_shorttext_a, best_shorttext_b, best_common};
745     } else {
746       return null;
747     }
748   }
749 
750   /**
751    * Reduce the number of edits by eliminating semantically trivial equalities.
752    * @param diffs LinkedList of Diff objects.
753    */
754   public void diff_cleanupSemantic(LinkedList<Diff> diffs) {
755     if (diffs.isEmpty()) {
756       return;
757     }
758     boolean changes = false;
759     Stack<Diff> equalities = new Stack<Diff>();  // Stack of qualities.
760     String lastequality = null; // Always equal to equalities.lastElement().text
761     ListIterator<Diff> pointer = diffs.listIterator();
762     // Number of characters that changed prior to the equality.
763     int length_insertions1 = 0;
764     int length_deletions1 = 0;
765     // Number of characters that changed after the equality.
766     int length_insertions2 = 0;
767     int length_deletions2 = 0;
768     Diff thisDiff = pointer.next();
769     while (thisDiff != null) {
770       if (thisDiff.operation == Operation.EQUAL) {
771         // Equality found.
772         equalities.push(thisDiff);
773         length_insertions1 = length_insertions2;
774         length_deletions1 = length_deletions2;
775         length_insertions2 = 0;
776         length_deletions2 = 0;
777         lastequality = thisDiff.text;
778       } else {
779         // An insertion or deletion.
780         if (thisDiff.operation == Operation.INSERT) {
781           length_insertions2 += thisDiff.text.length();
782         } else {
783           length_deletions2 += thisDiff.text.length();
784         }
785         // Eliminate an equality that is smaller or equal to the edits on both
786         // sides of it.
787         if (lastequality != null && (lastequality.length()
788             <= Math.max(length_insertions1, length_deletions1))
789             && (lastequality.length()
790                 <= Math.max(length_insertions2, length_deletions2))) {
791           //System.out.println("Splitting: '" + lastequality + "'");
792           // Walk back to offending equality.
793           while (thisDiff != equalities.lastElement()) {
794             thisDiff = pointer.previous();
795           }
796           pointer.next();
797 
798           // Replace equality with a delete.
799           pointer.set(new Diff(Operation.DELETE, lastequality));
800           // Insert a corresponding an insert.
801           pointer.add(new Diff(Operation.INSERT, lastequality));
802 
803           equalities.pop();  // Throw away the equality we just deleted.
804           if (!equalities.empty()) {
805             // Throw away the previous equality (it needs to be reevaluated).
806             equalities.pop();
807           }
808           if (equalities.empty()) {
809             // There are no previous equalities, walk back to the start.
810             while (pointer.hasPrevious()) {
811               pointer.previous();
812             }
813           } else {
814             // There is a safe equality we can fall back to.
815             thisDiff = equalities.lastElement();
816             while (thisDiff != pointer.previous()) {
817               // Intentionally empty loop.
818             }
819           }
820 
821           length_insertions1 = 0;  // Reset the counters.
822           length_insertions2 = 0;
823           length_deletions1 = 0;
824           length_deletions2 = 0;
825           lastequality = null;
826           changes = true;
827         }
828       }
829       thisDiff = pointer.hasNext() ? pointer.next() : null;
830     }
831 
832     // Normalize the diff.
833     if (changes) {
834       diff_cleanupMerge(diffs);
835     }
836     diff_cleanupSemanticLossless(diffs);
837 
838     // Find any overlaps between deletions and insertions.
839     // e.g: <del>abcxxx</del><ins>xxxdef</ins>
840     //   -> <del>abc</del>xxx<ins>def</ins>
841     // e.g: <del>xxxabc</del><ins>defxxx</ins>
842     //   -> <ins>def</ins>xxx<del>abc</del>
843     // Only extract an overlap if it is as big as the edit ahead or behind it.
844     pointer = diffs.listIterator();
845     Diff prevDiff = null;
846     thisDiff = null;
847     if (pointer.hasNext()) {
848       prevDiff = pointer.next();
849       if (pointer.hasNext()) {
850         thisDiff = pointer.next();
851       }
852     }
853     while (thisDiff != null) {
854       if (prevDiff.operation == Operation.DELETE &&
855           thisDiff.operation == Operation.INSERT) {
856         String deletion = prevDiff.text;
857         String insertion = thisDiff.text;
858         int overlap_length1 = this.diff_commonOverlap(deletion, insertion);
859         int overlap_length2 = this.diff_commonOverlap(insertion, deletion);
860         if (overlap_length1 >= overlap_length2) {
861           if (overlap_length1 >= deletion.length() / 2.0 ||
862               overlap_length1 >= insertion.length() / 2.0) {
863             // Overlap found. Insert an equality and trim the surrounding edits.
864             pointer.previous();
865             pointer.add(new Diff(Operation.EQUAL,
866                                  insertion.substring(0, overlap_length1)));
867             prevDiff.text =
868                 deletion.substring(0, deletion.length() - overlap_length1);
869             thisDiff.text = insertion.substring(overlap_length1);
870             // pointer.add inserts the element before the cursor, so there is
871             // no need to step past the new element.
872           }
873         } else {
874           if (overlap_length2 >= deletion.length() / 2.0 ||
875               overlap_length2 >= insertion.length() / 2.0) {
876             // Reverse overlap found.
877             // Insert an equality and swap and trim the surrounding edits.
878             pointer.previous();
879             pointer.add(new Diff(Operation.EQUAL,
880                                  deletion.substring(0, overlap_length2)));
881             prevDiff.operation = Operation.INSERT;
882             prevDiff.text =
883               insertion.substring(0, insertion.length() - overlap_length2);
884             thisDiff.operation = Operation.DELETE;
885             thisDiff.text = deletion.substring(overlap_length2);
886             // pointer.add inserts the element before the cursor, so there is
887             // no need to step past the new element.
888           }
889         }
890         thisDiff = pointer.hasNext() ? pointer.next() : null;
891       }
892       prevDiff = thisDiff;
893       thisDiff = pointer.hasNext() ? pointer.next() : null;
894     }
895   }
896 
897   /**
898    * Look for single edits surrounded on both sides by equalities
899    * which can be shifted sideways to align the edit to a word boundary.
900    * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
901    * @param diffs LinkedList of Diff objects.
902    */
903   public void diff_cleanupSemanticLossless(LinkedList<Diff> diffs) {
904     String equality1, edit, equality2;
905     String commonString;
906     int commonOffset;
907     int score, bestScore;
908     String bestEquality1, bestEdit, bestEquality2;
909     // Create a new iterator at the start.
910     ListIterator<Diff> pointer = diffs.listIterator();
911     Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
912     Diff thisDiff = pointer.hasNext() ? pointer.next() : null;
913     Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
914     // Intentionally ignore the first and last element (don't need checking).
915     while (nextDiff != null) {
916       if (prevDiff.operation == Operation.EQUAL &&
917           nextDiff.operation == Operation.EQUAL) {
918         // This is a single edit surrounded by equalities.
919         equality1 = prevDiff.text;
920         edit = thisDiff.text;
921         equality2 = nextDiff.text;
922 
923         // First, shift the edit as far left as possible.
924         commonOffset = diff_commonSuffix(equality1, edit);
925         if (commonOffset != 0) {
926           commonString = edit.substring(edit.length() - commonOffset);
927           equality1 = equality1.substring(0, equality1.length() - commonOffset);
928           edit = commonString + edit.substring(0, edit.length() - commonOffset);
929           equality2 = commonString + equality2;
930         }
931 
932         // Second, step character by character right, looking for the best fit.
933         bestEquality1 = equality1;
934         bestEdit = edit;
935         bestEquality2 = equality2;
936         bestScore = diff_cleanupSemanticScore(equality1, edit)
937             + diff_cleanupSemanticScore(edit, equality2);
938         while (edit.length() != 0 && equality2.length() != 0
939             && edit.charAt(0) == equality2.charAt(0)) {
940           equality1 += edit.charAt(0);
941           edit = edit.substring(1) + equality2.charAt(0);
942           equality2 = equality2.substring(1);
943           score = diff_cleanupSemanticScore(equality1, edit)
944               + diff_cleanupSemanticScore(edit, equality2);
945           // The >= encourages trailing rather than leading whitespace on edits.
946           if (score >= bestScore) {
947             bestScore = score;
948             bestEquality1 = equality1;
949             bestEdit = edit;
950             bestEquality2 = equality2;
951           }
952         }
953 
954         if (!prevDiff.text.equals(bestEquality1)) {
955           // We have an improvement, save it back to the diff.
956           if (bestEquality1.length() != 0) {
957             prevDiff.text = bestEquality1;
958           } else {
959             pointer.previous(); // Walk past nextDiff.
960             pointer.previous(); // Walk past thisDiff.
961             pointer.previous(); // Walk past prevDiff.
962             pointer.remove(); // Delete prevDiff.
963             pointer.next(); // Walk past thisDiff.
964             pointer.next(); // Walk past nextDiff.
965           }
966           thisDiff.text = bestEdit;
967           if (bestEquality2.length() != 0) {
968             nextDiff.text = bestEquality2;
969           } else {
970             pointer.remove(); // Delete nextDiff.
971             nextDiff = thisDiff;
972             thisDiff = prevDiff;
973           }
974         }
975       }
976       prevDiff = thisDiff;
977       thisDiff = nextDiff;
978       nextDiff = pointer.hasNext() ? pointer.next() : null;
979     }
980   }
981 
982   /**
983    * Given two strings, compute a score representing whether the internal
984    * boundary falls on logical boundaries.
985    * Scores range from 6 (best) to 0 (worst).
986    * @param one First string.
987    * @param two Second string.
988    * @return The score.
989    */
990   private int diff_cleanupSemanticScore(String one, String two) {
991     if (one.length() == 0 || two.length() == 0) {
992       // Edges are the best.
993       return 6;
994     }
995 
996     // Each port of this function behaves slightly differently due to
997     // subtle differences in each language's definition of things like
998     // 'whitespace'.  Since this function's purpose is largely cosmetic,
999     // the choice has been made to use each language's native features
1000     // rather than force total conformity.
1001     char char1 = one.charAt(one.length() - 1);
1002     char char2 = two.charAt(0);
1003     boolean nonAlphaNumeric1 = !Character.isLetterOrDigit(char1);
1004     boolean nonAlphaNumeric2 = !Character.isLetterOrDigit(char2);
1005     boolean whitespace1 = nonAlphaNumeric1 && Character.isWhitespace(char1);
1006     boolean whitespace2 = nonAlphaNumeric2 && Character.isWhitespace(char2);
1007     boolean lineBreak1 = whitespace1
1008         && Character.getType(char1) == Character.CONTROL;
1009     boolean lineBreak2 = whitespace2
1010         && Character.getType(char2) == Character.CONTROL;
1011     boolean blankLine1 = lineBreak1 && BLANKLINEEND.matcher(one).find();
1012     boolean blankLine2 = lineBreak2 && BLANKLINESTART.matcher(two).find();
1013 
1014     if (blankLine1 || blankLine2) {
1015       // Five points for blank lines.
1016       return 5;
1017     } else if (lineBreak1 || lineBreak2) {
1018       // Four points for line breaks.
1019       return 4;
1020     } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) {
1021       // Three points for end of sentences.
1022       return 3;
1023     } else if (whitespace1 || whitespace2) {
1024       // Two points for whitespace.
1025       return 2;
1026     } else if (nonAlphaNumeric1 || nonAlphaNumeric2) {
1027       // One point for non-alphanumeric.
1028       return 1;
1029     }
1030     return 0;
1031   }
1032 
1033   // Define some regex patterns for matching boundaries.
1034   private Pattern BLANKLINEEND
1035       = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL);
1036   private Pattern BLANKLINESTART
1037       = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL);
1038 
1039   /**
1040    * Reduce the number of edits by eliminating operationally trivial equalities.
1041    * @param diffs LinkedList of Diff objects.
1042    */
1043   public void diff_cleanupEfficiency(LinkedList<Diff> diffs) {
1044     if (diffs.isEmpty()) {
1045       return;
1046     }
1047     boolean changes = false;
1048     Stack<Diff> equalities = new Stack<Diff>();  // Stack of equalities.
1049     String lastequality = null; // Always equal to equalities.lastElement().text
1050     ListIterator<Diff> pointer = diffs.listIterator();
1051     // Is there an insertion operation before the last equality.
1052     boolean pre_ins = false;
1053     // Is there a deletion operation before the last equality.
1054     boolean pre_del = false;
1055     // Is there an insertion operation after the last equality.
1056     boolean post_ins = false;
1057     // Is there a deletion operation after the last equality.
1058     boolean post_del = false;
1059     Diff thisDiff = pointer.next();
1060     Diff safeDiff = thisDiff;  // The last Diff that is known to be unsplitable.
1061     while (thisDiff != null) {
1062       if (thisDiff.operation == Operation.EQUAL) {
1063         // Equality found.
1064         if (thisDiff.text.length() < Diff_EditCost && (post_ins || post_del)) {
1065           // Candidate found.
1066           equalities.push(thisDiff);
1067           pre_ins = post_ins;
1068           pre_del = post_del;
1069           lastequality = thisDiff.text;
1070         } else {
1071           // Not a candidate, and can never become one.
1072           equalities.clear();
1073           lastequality = null;
1074           safeDiff = thisDiff;
1075         }
1076         post_ins = post_del = false;
1077       } else {
1078         // An insertion or deletion.
1079         if (thisDiff.operation == Operation.DELETE) {
1080           post_del = true;
1081         } else {
1082           post_ins = true;
1083         }
1084         /*
1085          * Five types to be split:
1086          * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
1087          * <ins>A</ins>X<ins>C</ins><del>D</del>
1088          * <ins>A</ins><del>B</del>X<ins>C</ins>
1089          * <ins>A</del>X<ins>C</ins><del>D</del>
1090          * <ins>A</ins><del>B</del>X<del>C</del>
1091          */
1092         if (lastequality != null
1093             && ((pre_ins && pre_del && post_ins && post_del)
1094                 || ((lastequality.length() < Diff_EditCost / 2)
1095                     && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0)
1096                         + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) {
1097           //System.out.println("Splitting: '" + lastequality + "'");
1098           // Walk back to offending equality.
1099           while (thisDiff != equalities.lastElement()) {
1100             thisDiff = pointer.previous();
1101           }
1102           pointer.next();
1103 
1104           // Replace equality with a delete.
1105           pointer.set(new Diff(Operation.DELETE, lastequality));
1106           // Insert a corresponding an insert.
1107           pointer.add(thisDiff = new Diff(Operation.INSERT, lastequality));
1108 
1109           equalities.pop();  // Throw away the equality we just deleted.
1110           lastequality = null;
1111           if (pre_ins && pre_del) {
1112             // No changes made which could affect previous entry, keep going.
1113             post_ins = post_del = true;
1114             equalities.clear();
1115             safeDiff = thisDiff;
1116           } else {
1117             if (!equalities.empty()) {
1118               // Throw away the previous equality (it needs to be reevaluated).
1119               equalities.pop();
1120             }
1121             if (equalities.empty()) {
1122               // There are no previous questionable equalities,
1123               // walk back to the last known safe diff.
1124               thisDiff = safeDiff;
1125             } else {
1126               // There is an equality we can fall back to.
1127               thisDiff = equalities.lastElement();
1128             }
1129             while (thisDiff != pointer.previous()) {
1130               // Intentionally empty loop.
1131             }
1132             post_ins = post_del = false;
1133           }
1134 
1135           changes = true;
1136         }
1137       }
1138       thisDiff = pointer.hasNext() ? pointer.next() : null;
1139     }
1140 
1141     if (changes) {
1142       diff_cleanupMerge(diffs);
1143     }
1144   }
1145 
1146   /**
1147    * Reorder and merge like edit sections.  Merge equalities.
1148    * Any edit section can move as long as it doesn't cross an equality.
1149    * @param diffs LinkedList of Diff objects.
1150    */
1151   public void diff_cleanupMerge(LinkedList<Diff> diffs) {
1152     diffs.add(new Diff(Operation.EQUAL, ""));  // Add a dummy entry at the end.
1153     ListIterator<Diff> pointer = diffs.listIterator();
1154     int count_delete = 0;
1155     int count_insert = 0;
1156     String text_delete = "";
1157     String text_insert = "";
1158     Diff thisDiff = pointer.next();
1159     Diff prevEqual = null;
1160     int commonlength;
1161     while (thisDiff != null) {
1162       switch (thisDiff.operation) {
1163       case INSERT:
1164         count_insert++;
1165         text_insert += thisDiff.text;
1166         prevEqual = null;
1167         break;
1168       case DELETE:
1169         count_delete++;
1170         text_delete += thisDiff.text;
1171         prevEqual = null;
1172         break;
1173       case EQUAL:
1174         if (count_delete + count_insert > 1) {
1175           boolean both_types = count_delete != 0 && count_insert != 0;
1176           // Delete the offending records.
1177           pointer.previous();  // Reverse direction.
1178           while (count_delete-- > 0) {
1179             pointer.previous();
1180             pointer.remove();
1181           }
1182           while (count_insert-- > 0) {
1183             pointer.previous();
1184             pointer.remove();
1185           }
1186           if (both_types) {
1187             // Factor out any common prefixies.
1188             commonlength = diff_commonPrefix(text_insert, text_delete);
1189             if (commonlength != 0) {
1190               if (pointer.hasPrevious()) {
1191                 thisDiff = pointer.previous();
1192                 assert thisDiff.operation == Operation.EQUAL
1193                        : "Previous diff should have been an equality.";
1194                 thisDiff.text += text_insert.substring(0, commonlength);
1195                 pointer.next();
1196               } else {
1197                 pointer.add(new Diff(Operation.EQUAL,
1198                     text_insert.substring(0, commonlength)));
1199               }
1200               text_insert = text_insert.substring(commonlength);
1201               text_delete = text_delete.substring(commonlength);
1202             }
1203             // Factor out any common suffixies.
1204             commonlength = diff_commonSuffix(text_insert, text_delete);
1205             if (commonlength != 0) {
1206               thisDiff = pointer.next();
1207               thisDiff.text = text_insert.substring(text_insert.length()
1208                   - commonlength) + thisDiff.text;
1209               text_insert = text_insert.substring(0, text_insert.length()
1210                   - commonlength);
1211               text_delete = text_delete.substring(0, text_delete.length()
1212                   - commonlength);
1213               pointer.previous();
1214             }
1215           }
1216           // Insert the merged records.
1217           if (text_delete.length() != 0) {
1218             pointer.add(new Diff(Operation.DELETE, text_delete));
1219           }
1220           if (text_insert.length() != 0) {
1221             pointer.add(new Diff(Operation.INSERT, text_insert));
1222           }
1223           // Step forward to the equality.
1224           thisDiff = pointer.hasNext() ? pointer.next() : null;
1225         } else if (prevEqual != null) {
1226           // Merge this equality with the previous one.
1227           prevEqual.text += thisDiff.text;
1228           pointer.remove();
1229           thisDiff = pointer.previous();
1230           pointer.next();  // Forward direction
1231         }
1232         count_insert = 0;
1233         count_delete = 0;
1234         text_delete = "";
1235         text_insert = "";
1236         prevEqual = thisDiff;
1237         break;
1238       }
1239       thisDiff = pointer.hasNext() ? pointer.next() : null;
1240     }
1241     if (diffs.getLast().text.length() == 0) {
1242       diffs.removeLast();  // Remove the dummy entry at the end.
1243     }
1244 
1245     /*
1246      * Second pass: look for single edits surrounded on both sides by equalities
1247      * which can be shifted sideways to eliminate an equality.
1248      * e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1249      */
1250     boolean changes = false;
1251     // Create a new iterator at the start.
1252     // (As opposed to walking the current one back.)
1253     pointer = diffs.listIterator();
1254     Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
1255     thisDiff = pointer.hasNext() ? pointer.next() : null;
1256     Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
1257     // Intentionally ignore the first and last element (don't need checking).
1258     while (nextDiff != null) {
1259       if (prevDiff.operation == Operation.EQUAL &&
1260           nextDiff.operation == Operation.EQUAL) {
1261         // This is a single edit surrounded by equalities.
1262         if (thisDiff.text.endsWith(prevDiff.text)) {
1263           // Shift the edit over the previous equality.
1264           thisDiff.text = prevDiff.text
1265               + thisDiff.text.substring(0, thisDiff.text.length()
1266                                            - prevDiff.text.length());
1267           nextDiff.text = prevDiff.text + nextDiff.text;
1268           pointer.previous(); // Walk past nextDiff.
1269           pointer.previous(); // Walk past thisDiff.
1270           pointer.previous(); // Walk past prevDiff.
1271           pointer.remove(); // Delete prevDiff.
1272           pointer.next(); // Walk past thisDiff.
1273           thisDiff = pointer.next(); // Walk past nextDiff.
1274           nextDiff = pointer.hasNext() ? pointer.next() : null;
1275           changes = true;
1276         } else if (thisDiff.text.startsWith(nextDiff.text)) {
1277           // Shift the edit over the next equality.
1278           prevDiff.text += nextDiff.text;
1279           thisDiff.text = thisDiff.text.substring(nextDiff.text.length())
1280               + nextDiff.text;
1281           pointer.remove(); // Delete nextDiff.
1282           nextDiff = pointer.hasNext() ? pointer.next() : null;
1283           changes = true;
1284         }
1285       }
1286       prevDiff = thisDiff;
1287       thisDiff = nextDiff;
1288       nextDiff = pointer.hasNext() ? pointer.next() : null;
1289     }
1290     // If shifts were made, the diff needs reordering and another shift sweep.
1291     if (changes) {
1292       diff_cleanupMerge(diffs);
1293     }
1294   }
1295 
1296   /**
1297    * loc is a location in text1, compute and return the equivalent location in
1298    * text2.
1299    * e.g. "The cat" vs "The big cat", 1->1, 5->8
1300    * @param diffs LinkedList of Diff objects.
1301    * @param loc Location within text1.
1302    * @return Location within text2.
1303    */
1304   public int diff_xIndex(LinkedList<Diff> diffs, int loc) {
1305     int chars1 = 0;
1306     int chars2 = 0;
1307     int last_chars1 = 0;
1308     int last_chars2 = 0;
1309     Diff lastDiff = null;
1310     for (Diff aDiff : diffs) {
1311       if (aDiff.operation != Operation.INSERT) {
1312         // Equality or deletion.
1313         chars1 += aDiff.text.length();
1314       }
1315       if (aDiff.operation != Operation.DELETE) {
1316         // Equality or insertion.
1317         chars2 += aDiff.text.length();
1318       }
1319       if (chars1 > loc) {
1320         // Overshot the location.
1321         lastDiff = aDiff;
1322         break;
1323       }
1324       last_chars1 = chars1;
1325       last_chars2 = chars2;
1326     }
1327     if (lastDiff != null && lastDiff.operation == Operation.DELETE) {
1328       // The location was deleted.
1329       return last_chars2;
1330     }
1331     // Add the remaining character length.
1332     return last_chars2 + (loc - last_chars1);
1333   }
1334 
1335   /**
1336    * Convert a Diff list into a pretty HTML report.
1337    * @param diffs LinkedList of Diff objects.
1338    * @return HTML representation.
1339    */
1340   public String diff_prettyHtml(LinkedList<Diff> diffs) {
1341     StringBuilder html = new StringBuilder();
1342     for (Diff aDiff : diffs) {
1343       String text = aDiff.text.replace("&", "&amp;").replace("<", "&lt;")
1344           .replace(">", "&gt;").replace("\n", "&para;<br>");
1345       switch (aDiff.operation) {
1346       case INSERT:
1347         html.append("<ins style=\"background:#e6ffe6;\">").append(text)
1348             .append("</ins>");
1349         break;
1350       case DELETE:
1351         html.append("<del style=\"background:#ffe6e6;\">").append(text)
1352             .append("</del>");
1353         break;
1354       case EQUAL:
1355         html.append("<span>").append(text).append("</span>");
1356         break;
1357       }
1358     }
1359     return html.toString();
1360   }
1361 
1362   /**
1363    * Compute and return the source text (all equalities and deletions).
1364    * @param diffs LinkedList of Diff objects.
1365    * @return Source text.
1366    */
1367   public String diff_text1(LinkedList<Diff> diffs) {
1368     StringBuilder text = new StringBuilder();
1369     for (Diff aDiff : diffs) {
1370       if (aDiff.operation != Operation.INSERT) {
1371         text.append(aDiff.text);
1372       }
1373     }
1374     return text.toString();
1375   }
1376 
1377   /**
1378    * Compute and return the destination text (all equalities and insertions).
1379    * @param diffs LinkedList of Diff objects.
1380    * @return Destination text.
1381    */
1382   public String diff_text2(LinkedList<Diff> diffs) {
1383     StringBuilder text = new StringBuilder();
1384     for (Diff aDiff : diffs) {
1385       if (aDiff.operation != Operation.DELETE) {
1386         text.append(aDiff.text);
1387       }
1388     }
1389     return text.toString();
1390   }
1391 
1392   /**
1393    * Compute the Levenshtein distance; the number of inserted, deleted or
1394    * substituted characters.
1395    * @param diffs LinkedList of Diff objects.
1396    * @return Number of changes.
1397    */
1398   public int diff_levenshtein(LinkedList<Diff> diffs) {
1399     int levenshtein = 0;
1400     int insertions = 0;
1401     int deletions = 0;
1402     for (Diff aDiff : diffs) {
1403       switch (aDiff.operation) {
1404       case INSERT:
1405         insertions += aDiff.text.length();
1406         break;
1407       case DELETE:
1408         deletions += aDiff.text.length();
1409         break;
1410       case EQUAL:
1411         // A deletion and an insertion is one substitution.
1412         levenshtein += Math.max(insertions, deletions);
1413         insertions = 0;
1414         deletions = 0;
1415         break;
1416       }
1417     }
1418     levenshtein += Math.max(insertions, deletions);
1419     return levenshtein;
1420   }
1421 
1422   /**
1423    * Crush the diff into an encoded string which describes the operations
1424    * required to transform text1 into text2.
1425    * E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
1426    * Operations are tab-separated.  Inserted text is escaped using %xx notation.
1427    * @param diffs Array of Diff objects.
1428    * @return Delta text.
1429    */
1430   public String diff_toDelta(LinkedList<Diff> diffs) {
1431     StringBuilder text = new StringBuilder();
1432     for (Diff aDiff : diffs) {
1433       switch (aDiff.operation) {
1434       case INSERT:
1435         try {
1436           text.append("+").append(URLEncoder.encode(aDiff.text, "UTF-8")
1437                                             .replace('+', ' ')).append("\t");
1438         } catch (UnsupportedEncodingException e) {
1439           // Not likely on modern system.
1440           throw new Error("This system does not support UTF-8.", e);
1441         }
1442         break;
1443       case DELETE:
1444         text.append("-").append(aDiff.text.length()).append("\t");
1445         break;
1446       case EQUAL:
1447         text.append("=").append(aDiff.text.length()).append("\t");
1448         break;
1449       }
1450     }
1451     String delta = text.toString();
1452     if (delta.length() != 0) {
1453       // Strip off trailing tab character.
1454       delta = delta.substring(0, delta.length() - 1);
1455       delta = unescapeForEncodeUriCompatability(delta);
1456     }
1457     return delta;
1458   }
1459 
1460   /**
1461    * Given the original text1, and an encoded string which describes the
1462    * operations required to transform text1 into text2, compute the full diff.
1463    * @param text1 Source string for the diff.
1464    * @param delta Delta text.
1465    * @return Array of Diff objects or null if invalid.
1466    * @throws IllegalArgumentException If invalid input.
1467    */
1468   public LinkedList<Diff> diff_fromDelta(String text1, String delta)
1469       throws IllegalArgumentException {
1470     LinkedList<Diff> diffs = new LinkedList<Diff>();
1471     int pointer = 0;  // Cursor in text1
1472     String[] tokens = delta.split("\t");
1473     for (String token : tokens) {
1474       if (token.length() == 0) {
1475         // Blank tokens are ok (from a trailing \t).
1476         continue;
1477       }
1478       // Each token begins with a one character parameter which specifies the
1479       // operation of this token (delete, insert, equality).
1480       String param = token.substring(1);
1481       switch (token.charAt(0)) {
1482       case '+':
1483         // decode would change all "+" to " "
1484         param = param.replace("+", "%2B");
1485         try {
1486           param = URLDecoder.decode(param, "UTF-8");
1487         } catch (UnsupportedEncodingException e) {
1488           // Not likely on modern system.
1489           throw new Error("This system does not support UTF-8.", e);
1490         } catch (IllegalArgumentException e) {
1491           // Malformed URI sequence.
1492           throw new IllegalArgumentException(
1493               "Illegal escape in diff_fromDelta: " + param, e);
1494         }
1495         diffs.add(new Diff(Operation.INSERT, param));
1496         break;
1497       case '-':
1498         // Fall through.
1499       case '=':
1500         int n;
1501         try {
1502           n = Integer.parseInt(param);
1503         } catch (NumberFormatException e) {
1504           throw new IllegalArgumentException(
1505               "Invalid number in diff_fromDelta: " + param, e);
1506         }
1507         if (n < 0) {
1508           throw new IllegalArgumentException(
1509               "Negative number in diff_fromDelta: " + param);
1510         }
1511         String text;
1512         try {
1513           text = text1.substring(pointer, pointer += n);
1514         } catch (StringIndexOutOfBoundsException e) {
1515           throw new IllegalArgumentException("Delta length (" + pointer
1516               + ") larger than source text length (" + text1.length()
1517               + ").", e);
1518         }
1519         if (token.charAt(0) == '=') {
1520           diffs.add(new Diff(Operation.EQUAL, text));
1521         } else {
1522           diffs.add(new Diff(Operation.DELETE, text));
1523         }
1524         break;
1525       default:
1526         // Anything else is an error.
1527         throw new IllegalArgumentException(
1528             "Invalid diff operation in diff_fromDelta: " + token.charAt(0));
1529       }
1530     }
1531     if (pointer != text1.length()) {
1532       throw new IllegalArgumentException("Delta length (" + pointer
1533           + ") smaller than source text length (" + text1.length() + ").");
1534     }
1535     return diffs;
1536   }
1537 
1538 
1539   //  MATCH FUNCTIONS
1540 
1541 
1542   /**
1543    * Locate the best instance of 'pattern' in 'text' near 'loc'.
1544    * Returns -1 if no match found.
1545    * @param text The text to search.
1546    * @param pattern The pattern to search for.
1547    * @param loc The location to search around.
1548    * @return Best match index or -1.
1549    */
1550   public int match_main(String text, String pattern, int loc) {
1551     // Check for null inputs.
1552     if (text == null || pattern == null) {
1553       throw new IllegalArgumentException("Null inputs. (match_main)");
1554     }
1555 
1556     loc = Math.max(0, Math.min(loc, text.length()));
1557     if (text.equals(pattern)) {
1558       // Shortcut (potentially not guaranteed by the algorithm)
1559       return 0;
1560     } else if (text.length() == 0) {
1561       // Nothing to match.
1562       return -1;
1563     } else if (loc + pattern.length() <= text.length()
1564         && text.substring(loc, loc + pattern.length()).equals(pattern)) {
1565       // Perfect match at the perfect spot!  (Includes case of null pattern)
1566       return loc;
1567     } else {
1568       // Do a fuzzy compare.
1569       return match_bitap(text, pattern, loc);
1570     }
1571   }
1572 
1573   /**
1574    * Locate the best instance of 'pattern' in 'text' near 'loc' using the
1575    * Bitap algorithm.  Returns -1 if no match found.
1576    * @param text The text to search.
1577    * @param pattern The pattern to search for.
1578    * @param loc The location to search around.
1579    * @return Best match index or -1.
1580    */
1581   protected int match_bitap(String text, String pattern, int loc) {
1582     assert (Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)
1583         : "Pattern too long for this application.";
1584 
1585     // Initialise the alphabet.
1586     Map<Character, Integer> s = match_alphabet(pattern);
1587 
1588     // Highest score beyond which we give up.
1589     double score_threshold = Match_Threshold;
1590     // Is there a nearby exact match? (speedup)
1591     int best_loc = text.indexOf(pattern, loc);
1592     if (best_loc != -1) {
1593       score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern),
1594           score_threshold);
1595       // What about in the other direction? (speedup)
1596       best_loc = text.lastIndexOf(pattern, loc + pattern.length());
1597       if (best_loc != -1) {
1598         score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern),
1599             score_threshold);
1600       }
1601     }
1602 
1603     // Initialise the bit arrays.
1604     int matchmask = 1 << (pattern.length() - 1);
1605     best_loc = -1;
1606 
1607     int bin_min, bin_mid;
1608     int bin_max = pattern.length() + text.length();
1609     // Empty initialization added to appease Java compiler.
1610     int[] last_rd = new int[0];
1611     for (int d = 0; d < pattern.length(); d++) {
1612       // Scan for the best match; each iteration allows for one more error.
1613       // Run a binary search to determine how far from 'loc' we can stray at
1614       // this error level.
1615       bin_min = 0;
1616       bin_mid = bin_max;
1617       while (bin_min < bin_mid) {
1618         if (match_bitapScore(d, loc + bin_mid, loc, pattern)
1619             <= score_threshold) {
1620           bin_min = bin_mid;
1621         } else {
1622           bin_max = bin_mid;
1623         }
1624         bin_mid = (bin_max - bin_min) / 2 + bin_min;
1625       }
1626       // Use the result from this iteration as the maximum for the next.
1627       bin_max = bin_mid;
1628       int start = Math.max(1, loc - bin_mid + 1);
1629       int finish = Math.min(loc + bin_mid, text.length()) + pattern.length();
1630 
1631       int[] rd = new int[finish + 2];
1632       rd[finish + 1] = (1 << d) - 1;
1633       for (int j = finish; j >= start; j--) {
1634         int charMatch;
1635         if (text.length() <= j - 1 || !s.containsKey(text.charAt(j - 1))) {
1636           // Out of range.
1637           charMatch = 0;
1638         } else {
1639           charMatch = s.get(text.charAt(j - 1));
1640         }
1641         if (d == 0) {
1642           // First pass: exact match.
1643           rd[j] = ((rd[j + 1] << 1) | 1) & charMatch;
1644         } else {
1645           // Subsequent passes: fuzzy match.
1646           rd[j] = (((rd[j + 1] << 1) | 1) & charMatch)
1647               | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1];
1648         }
1649         if ((rd[j] & matchmask) != 0) {
1650           double score = match_bitapScore(d, j - 1, loc, pattern);
1651           // This match will almost certainly be better than any existing
1652           // match.  But check anyway.
1653           if (score <= score_threshold) {
1654             // Told you so.
1655             score_threshold = score;
1656             best_loc = j - 1;
1657             if (best_loc > loc) {
1658               // When passing loc, don't exceed our current distance from loc.
1659               start = Math.max(1, 2 * loc - best_loc);
1660             } else {
1661               // Already passed loc, downhill from here on in.
1662               break;
1663             }
1664           }
1665         }
1666       }
1667       if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) {
1668         // No hope for a (better) match at greater error levels.
1669         break;
1670       }
1671       last_rd = rd;
1672     }
1673     return best_loc;
1674   }
1675 
1676   /**
1677    * Compute and return the score for a match with e errors and x location.
1678    * @param e Number of errors in match.
1679    * @param x Location of match.
1680    * @param loc Expected location of match.
1681    * @param pattern Pattern being sought.
1682    * @return Overall score for match (0.0 = good, 1.0 = bad).
1683    */
1684   private double match_bitapScore(int e, int x, int loc, String pattern) {
1685     float accuracy = (float) e / pattern.length();
1686     int proximity = Math.abs(loc - x);
1687     if (Match_Distance == 0) {
1688       // Dodge divide by zero error.
1689       return proximity == 0 ? accuracy : 1.0;
1690     }
1691     return accuracy + (proximity / (float) Match_Distance);
1692   }
1693 
1694   /**
1695    * Initialise the alphabet for the Bitap algorithm.
1696    * @param pattern The text to encode.
1697    * @return Hash of character locations.
1698    */
1699   protected Map<Character, Integer> match_alphabet(String pattern) {
1700     Map<Character, Integer> s = new HashMap<Character, Integer>();
1701     char[] char_pattern = pattern.toCharArray();
1702     for (char c : char_pattern) {
1703       s.put(c, 0);
1704     }
1705     int i = 0;
1706     for (char c : char_pattern) {
1707       s.put(c, s.get(c) | (1 << (pattern.length() - i - 1)));
1708       i++;
1709     }
1710     return s;
1711   }
1712 
1713 
1714   //  PATCH FUNCTIONS
1715 
1716 
1717   /**
1718    * Increase the context until it is unique,
1719    * but don't let the pattern expand beyond Match_MaxBits.
1720    * @param patch The patch to grow.
1721    * @param text Source text.
1722    */
1723   protected void patch_addContext(Patch patch, String text) {
1724     if (text.length() == 0) {
1725       return;
1726     }
1727     String pattern = text.substring(patch.start2, patch.start2 + patch.length1);
1728     int padding = 0;
1729 
1730     // Look for the first and last matches of pattern in text.  If two different
1731     // matches are found, increase the pattern length.
1732     while (text.indexOf(pattern) != text.lastIndexOf(pattern)
1733         && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) {
1734       padding += Patch_Margin;
1735       pattern = text.substring(Math.max(0, patch.start2 - padding),
1736           Math.min(text.length(), patch.start2 + patch.length1 + padding));
1737     }
1738     // Add one chunk for good luck.
1739     padding += Patch_Margin;
1740 
1741     // Add the prefix.
1742     String prefix = text.substring(Math.max(0, patch.start2 - padding),
1743         patch.start2);
1744     if (prefix.length() != 0) {
1745       patch.diffs.addFirst(new Diff(Operation.EQUAL, prefix));
1746     }
1747     // Add the suffix.
1748     String suffix = text.substring(patch.start2 + patch.length1,
1749         Math.min(text.length(), patch.start2 + patch.length1 + padding));
1750     if (suffix.length() != 0) {
1751       patch.diffs.addLast(new Diff(Operation.EQUAL, suffix));
1752     }
1753 
1754     // Roll back the start points.
1755     patch.start1 -= prefix.length();
1756     patch.start2 -= prefix.length();
1757     // Extend the lengths.
1758     patch.length1 += prefix.length() + suffix.length();
1759     patch.length2 += prefix.length() + suffix.length();
1760   }
1761 
1762   /**
1763    * Compute a list of patches to turn text1 into text2.
1764    * A set of diffs will be computed.
1765    * @param text1 Old text.
1766    * @param text2 New text.
1767    * @return LinkedList of Patch objects.
1768    */
1769   public LinkedList<Patch> patch_make(String text1, String text2) {
1770     if (text1 == null || text2 == null) {
1771       throw new IllegalArgumentException("Null inputs. (patch_make)");
1772     }
1773     // No diffs provided, compute our own.
1774     LinkedList<Diff> diffs = diff_main(text1, text2, true);
1775     if (diffs.size() > 2) {
1776       diff_cleanupSemantic(diffs);
1777       diff_cleanupEfficiency(diffs);
1778     }
1779     return patch_make(text1, diffs);
1780   }
1781 
1782   /**
1783    * Compute a list of patches to turn text1 into text2.
1784    * text1 will be derived from the provided diffs.
1785    * @param diffs Array of Diff objects for text1 to text2.
1786    * @return LinkedList of Patch objects.
1787    */
1788   public LinkedList<Patch> patch_make(LinkedList<Diff> diffs) {
1789     if (diffs == null) {
1790       throw new IllegalArgumentException("Null inputs. (patch_make)");
1791     }
1792     // No origin string provided, compute our own.
1793     String text1 = diff_text1(diffs);
1794     return patch_make(text1, diffs);
1795   }
1796 
1797   /**
1798    * Compute a list of patches to turn text1 into text2.
1799    * text2 is ignored, diffs are the delta between text1 and text2.
1800    * @param text1 Old text
1801    * @param text2 Ignored.
1802    * @param diffs Array of Diff objects for text1 to text2.
1803    * @return LinkedList of Patch objects.
1804    * @deprecated Prefer patch_make(String text1, LinkedList<Diff> diffs).
1805    */
1806   public LinkedList<Patch> patch_make(String text1, String text2,
1807       LinkedList<Diff> diffs) {
1808     return patch_make(text1, diffs);
1809   }
1810 
1811   /**
1812    * Compute a list of patches to turn text1 into text2.
1813    * text2 is not provided, diffs are the delta between text1 and text2.
1814    * @param text1 Old text.
1815    * @param diffs Array of Diff objects for text1 to text2.
1816    * @return LinkedList of Patch objects.
1817    */
1818   public LinkedList<Patch> patch_make(String text1, LinkedList<Diff> diffs) {
1819     if (text1 == null || diffs == null) {
1820       throw new IllegalArgumentException("Null inputs. (patch_make)");
1821     }
1822 
1823     LinkedList<Patch> patches = new LinkedList<Patch>();
1824     if (diffs.isEmpty()) {
1825       return patches;  // Get rid of the null case.
1826     }
1827     Patch patch = new Patch();
1828     int char_count1 = 0;  // Number of characters into the text1 string.
1829     int char_count2 = 0;  // Number of characters into the text2 string.
1830     // Start with text1 (prepatch_text) and apply the diffs until we arrive at
1831     // text2 (postpatch_text). We recreate the patches one by one to determine
1832     // context info.
1833     String prepatch_text = text1;
1834     String postpatch_text = text1;
1835     for (Diff aDiff : diffs) {
1836       if (patch.diffs.isEmpty() && aDiff.operation != Operation.EQUAL) {
1837         // A new patch starts here.
1838         patch.start1 = char_count1;
1839         patch.start2 = char_count2;
1840       }
1841 
1842       switch (aDiff.operation) {
1843       case INSERT:
1844         patch.diffs.add(aDiff);
1845         patch.length2 += aDiff.text.length();
1846         postpatch_text = postpatch_text.substring(0, char_count2)
1847             + aDiff.text + postpatch_text.substring(char_count2);
1848         break;
1849       case DELETE:
1850         patch.length1 += aDiff.text.length();
1851         patch.diffs.add(aDiff);
1852         postpatch_text = postpatch_text.substring(0, char_count2)
1853             + postpatch_text.substring(char_count2 + aDiff.text.length());
1854         break;
1855       case EQUAL:
1856         if (aDiff.text.length() <= 2 * Patch_Margin
1857             && !patch.diffs.isEmpty() && aDiff != diffs.getLast()) {
1858           // Small equality inside a patch.
1859           patch.diffs.add(aDiff);
1860           patch.length1 += aDiff.text.length();
1861           patch.length2 += aDiff.text.length();
1862         }
1863 
1864         if (aDiff.text.length() >= 2 * Patch_Margin) {
1865           // Time for a new patch.
1866           if (!patch.diffs.isEmpty()) {
1867             patch_addContext(patch, prepatch_text);
1868             patches.add(patch);
1869             patch = new Patch();
1870             // Unlike Unidiff, our patch lists have a rolling context.
1871             // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1872             // Update prepatch text & pos to reflect the application of the
1873             // just completed patch.
1874             prepatch_text = postpatch_text;
1875             char_count1 = char_count2;
1876           }
1877         }
1878         break;
1879       }
1880 
1881       // Update the current character count.
1882       if (aDiff.operation != Operation.INSERT) {
1883         char_count1 += aDiff.text.length();
1884       }
1885       if (aDiff.operation != Operation.DELETE) {
1886         char_count2 += aDiff.text.length();
1887       }
1888     }
1889     // Pick up the leftover patch if not empty.
1890     if (!patch.diffs.isEmpty()) {
1891       patch_addContext(patch, prepatch_text);
1892       patches.add(patch);
1893     }
1894 
1895     return patches;
1896   }
1897 
1898   /**
1899    * Given an array of patches, return another array that is identical.
1900    * @param patches Array of Patch objects.
1901    * @return Array of Patch objects.
1902    */
1903   public LinkedList<Patch> patch_deepCopy(LinkedList<Patch> patches) {
1904     LinkedList<Patch> patchesCopy = new LinkedList<Patch>();
1905     for (Patch aPatch : patches) {
1906       Patch patchCopy = new Patch();
1907       for (Diff aDiff : aPatch.diffs) {
1908         Diff diffCopy = new Diff(aDiff.operation, aDiff.text);
1909         patchCopy.diffs.add(diffCopy);
1910       }
1911       patchCopy.start1 = aPatch.start1;
1912       patchCopy.start2 = aPatch.start2;
1913       patchCopy.length1 = aPatch.length1;
1914       patchCopy.length2 = aPatch.length2;
1915       patchesCopy.add(patchCopy);
1916     }
1917     return patchesCopy;
1918   }
1919 
1920   /**
1921    * Merge a set of patches onto the text.  Return a patched text, as well
1922    * as an array of true/false values indicating which patches were applied.
1923    * @param patches Array of Patch objects
1924    * @param text Old text.
1925    * @return Two element Object array, containing the new text and an array of
1926    *      boolean values.
1927    */
1928   public Object[] patch_apply(LinkedList<Patch> patches, String text) {
1929     if (patches.isEmpty()) {
1930       return new Object[]{text, new boolean[0]};
1931     }
1932 
1933     // Deep copy the patches so that no changes are made to originals.
1934     patches = patch_deepCopy(patches);
1935 
1936     String nullPadding = patch_addPadding(patches);
1937     text = nullPadding + text + nullPadding;
1938     patch_splitMax(patches);
1939 
1940     int x = 0;
1941     // delta keeps track of the offset between the expected and actual location
1942     // of the previous patch.  If there are patches expected at positions 10 and
1943     // 20, but the first patch was found at 12, delta is 2 and the second patch
1944     // has an effective expected position of 22.
1945     int delta = 0;
1946     boolean[] results = new boolean[patches.size()];
1947     for (Patch aPatch : patches) {
1948       int expected_loc = aPatch.start2 + delta;
1949       String text1 = diff_text1(aPatch.diffs);
1950       int start_loc;
1951       int end_loc = -1;
1952       if (text1.length() > this.Match_MaxBits) {
1953         // patch_splitMax will only provide an oversized pattern in the case of
1954         // a monster delete.
1955         start_loc = match_main(text,
1956             text1.substring(0, this.Match_MaxBits), expected_loc);
1957         if (start_loc != -1) {
1958           end_loc = match_main(text,
1959               text1.substring(text1.length() - this.Match_MaxBits),
1960               expected_loc + text1.length() - this.Match_MaxBits);
1961           if (end_loc == -1 || start_loc >= end_loc) {
1962             // Can't find valid trailing context.  Drop this patch.
1963             start_loc = -1;
1964           }
1965         }
1966       } else {
1967         start_loc = match_main(text, text1, expected_loc);
1968       }
1969       if (start_loc == -1) {
1970         // No match found.  :(
1971         results[x] = false;
1972         // Subtract the delta for this failed patch from subsequent patches.
1973         delta -= aPatch.length2 - aPatch.length1;
1974       } else {
1975         // Found a match.  :)
1976         results[x] = true;
1977         delta = start_loc - expected_loc;
1978         String text2;
1979         if (end_loc == -1) {
1980           text2 = text.substring(start_loc,
1981               Math.min(start_loc + text1.length(), text.length()));
1982         } else {
1983           text2 = text.substring(start_loc,
1984               Math.min(end_loc + this.Match_MaxBits, text.length()));
1985         }
1986         if (text1.equals(text2)) {
1987           // Perfect match, just shove the replacement text in.
1988           text = text.substring(0, start_loc) + diff_text2(aPatch.diffs)
1989               + text.substring(start_loc + text1.length());
1990         } else {
1991           // Imperfect match.  Run a diff to get a framework of equivalent
1992           // indices.
1993           LinkedList<Diff> diffs = diff_main(text1, text2, false);
1994           if (text1.length() > this.Match_MaxBits
1995               && diff_levenshtein(diffs) / (float) text1.length()
1996               > this.Patch_DeleteThreshold) {
1997             // The end points match, but the content is unacceptably bad.
1998             results[x] = false;
1999           } else {
2000             diff_cleanupSemanticLossless(diffs);
2001             int index1 = 0;
2002             for (Diff aDiff : aPatch.diffs) {
2003               if (aDiff.operation != Operation.EQUAL) {
2004                 int index2 = diff_xIndex(diffs, index1);
2005                 if (aDiff.operation == Operation.INSERT) {
2006                   // Insertion
2007                   text = text.substring(0, start_loc + index2) + aDiff.text
2008                       + text.substring(start_loc + index2);
2009                 } else if (aDiff.operation == Operation.DELETE) {
2010                   // Deletion
2011                   text = text.substring(0, start_loc + index2)
2012                       + text.substring(start_loc + diff_xIndex(diffs,
2013                       index1 + aDiff.text.length()));
2014                 }
2015               }
2016               if (aDiff.operation != Operation.DELETE) {
2017                 index1 += aDiff.text.length();
2018               }
2019             }
2020           }
2021         }
2022       }
2023       x++;
2024     }
2025     // Strip the padding off.
2026     text = text.substring(nullPadding.length(), text.length()
2027         - nullPadding.length());
2028     return new Object[]{text, results};
2029   }
2030 
2031   /**
2032    * Add some padding on text start and end so that edges can match something.
2033    * Intended to be called only from within patch_apply.
2034    * @param patches Array of Patch objects.
2035    * @return The padding string added to each side.
2036    */
2037   public String patch_addPadding(LinkedList<Patch> patches) {
2038     short paddingLength = this.Patch_Margin;
2039     String nullPadding = "";
2040     for (short x = 1; x <= paddingLength; x++) {
2041       nullPadding += String.valueOf((char) x);
2042     }
2043 
2044     // Bump all the patches forward.
2045     for (Patch aPatch : patches) {
2046       aPatch.start1 += paddingLength;
2047       aPatch.start2 += paddingLength;
2048     }
2049 
2050     // Add some padding on start of first diff.
2051     Patch patch = patches.getFirst();
2052     LinkedList<Diff> diffs = patch.diffs;
2053     if (diffs.isEmpty() || diffs.getFirst().operation != Operation.EQUAL) {
2054       // Add nullPadding equality.
2055       diffs.addFirst(new Diff(Operation.EQUAL, nullPadding));
2056       patch.start1 -= paddingLength;  // Should be 0.
2057       patch.start2 -= paddingLength;  // Should be 0.
2058       patch.length1 += paddingLength;
2059       patch.length2 += paddingLength;
2060     } else if (paddingLength > diffs.getFirst().text.length()) {
2061       // Grow first equality.
2062       Diff firstDiff = diffs.getFirst();
2063       int extraLength = paddingLength - firstDiff.text.length();
2064       firstDiff.text = nullPadding.substring(firstDiff.text.length())
2065           + firstDiff.text;
2066       patch.start1 -= extraLength;
2067       patch.start2 -= extraLength;
2068       patch.length1 += extraLength;
2069       patch.length2 += extraLength;
2070     }
2071 
2072     // Add some padding on end of last diff.
2073     patch = patches.getLast();
2074     diffs = patch.diffs;
2075     if (diffs.isEmpty() || diffs.getLast().operation != Operation.EQUAL) {
2076       // Add nullPadding equality.
2077       diffs.addLast(new Diff(Operation.EQUAL, nullPadding));
2078       patch.length1 += paddingLength;
2079       patch.length2 += paddingLength;
2080     } else if (paddingLength > diffs.getLast().text.length()) {
2081       // Grow last equality.
2082       Diff lastDiff = diffs.getLast();
2083       int extraLength = paddingLength - lastDiff.text.length();
2084       lastDiff.text += nullPadding.substring(0, extraLength);
2085       patch.length1 += extraLength;
2086       patch.length2 += extraLength;
2087     }
2088 
2089     return nullPadding;
2090   }
2091 
2092   /**
2093    * Look through the patches and break up any which are longer than the
2094    * maximum limit of the match algorithm.
2095    * Intended to be called only from within patch_apply.
2096    * @param patches LinkedList of Patch objects.
2097    */
2098   public void patch_splitMax(LinkedList<Patch> patches) {
2099     short patch_size = Match_MaxBits;
2100     String precontext, postcontext;
2101     Patch patch;
2102     int start1, start2;
2103     boolean empty;
2104     Operation diff_type;
2105     String diff_text;
2106     ListIterator<Patch> pointer = patches.listIterator();
2107     Patch bigpatch = pointer.hasNext() ? pointer.next() : null;
2108     while (bigpatch != null) {
2109       if (bigpatch.length1 <= Match_MaxBits) {
2110         bigpatch = pointer.hasNext() ? pointer.next() : null;
2111         continue;
2112       }
2113       // Remove the big old patch.
2114       pointer.remove();
2115       start1 = bigpatch.start1;
2116       start2 = bigpatch.start2;
2117       precontext = "";
2118       while (!bigpatch.diffs.isEmpty()) {
2119         // Create one of several smaller patches.
2120         patch = new Patch();
2121         empty = true;
2122         patch.start1 = start1 - precontext.length();
2123         patch.start2 = start2 - precontext.length();
2124         if (precontext.length() != 0) {
2125           patch.length1 = patch.length2 = precontext.length();
2126           patch.diffs.add(new Diff(Operation.EQUAL, precontext));
2127         }
2128         while (!bigpatch.diffs.isEmpty()
2129             && patch.length1 < patch_size - Patch_Margin) {
2130           diff_type = bigpatch.diffs.getFirst().operation;
2131           diff_text = bigpatch.diffs.getFirst().text;
2132           if (diff_type == Operation.INSERT) {
2133             // Insertions are harmless.
2134             patch.length2 += diff_text.length();
2135             start2 += diff_text.length();
2136             patch.diffs.addLast(bigpatch.diffs.removeFirst());
2137             empty = false;
2138           } else if (diff_type == Operation.DELETE && patch.diffs.size() == 1
2139               && patch.diffs.getFirst().operation == Operation.EQUAL
2140               && diff_text.length() > 2 * patch_size) {
2141             // This is a large deletion.  Let it pass in one chunk.
2142             patch.length1 += diff_text.length();
2143             start1 += diff_text.length();
2144             empty = false;
2145             patch.diffs.add(new Diff(diff_type, diff_text));
2146             bigpatch.diffs.removeFirst();
2147           } else {
2148             // Deletion or equality.  Only take as much as we can stomach.
2149             diff_text = diff_text.substring(0, Math.min(diff_text.length(),
2150                 patch_size - patch.length1 - Patch_Margin));
2151             patch.length1 += diff_text.length();
2152             start1 += diff_text.length();
2153             if (diff_type == Operation.EQUAL) {
2154               patch.length2 += diff_text.length();
2155               start2 += diff_text.length();
2156             } else {
2157               empty = false;
2158             }
2159             patch.diffs.add(new Diff(diff_type, diff_text));
2160             if (diff_text.equals(bigpatch.diffs.getFirst().text)) {
2161               bigpatch.diffs.removeFirst();
2162             } else {
2163               bigpatch.diffs.getFirst().text = bigpatch.diffs.getFirst().text
2164                   .substring(diff_text.length());
2165             }
2166           }
2167         }
2168         // Compute the head context for the next patch.
2169         precontext = diff_text2(patch.diffs);
2170         precontext = precontext.substring(Math.max(0, precontext.length()
2171             - Patch_Margin));
2172         // Append the end context for this patch.
2173         if (diff_text1(bigpatch.diffs).length() > Patch_Margin) {
2174           postcontext = diff_text1(bigpatch.diffs).substring(0, Patch_Margin);
2175         } else {
2176           postcontext = diff_text1(bigpatch.diffs);
2177         }
2178         if (postcontext.length() != 0) {
2179           patch.length1 += postcontext.length();
2180           patch.length2 += postcontext.length();
2181           if (!patch.diffs.isEmpty()
2182               && patch.diffs.getLast().operation == Operation.EQUAL) {
2183             patch.diffs.getLast().text += postcontext;
2184           } else {
2185             patch.diffs.add(new Diff(Operation.EQUAL, postcontext));
2186           }
2187         }
2188         if (!empty) {
2189           pointer.add(patch);
2190         }
2191       }
2192       bigpatch = pointer.hasNext() ? pointer.next() : null;
2193     }
2194   }
2195 
2196   /**
2197    * Take a list of patches and return a textual representation.
2198    * @param patches List of Patch objects.
2199    * @return Text representation of patches.
2200    */
2201   public String patch_toText(List<Patch> patches) {
2202     StringBuilder text = new StringBuilder();
2203     for (Patch aPatch : patches) {
2204       text.append(aPatch);
2205     }
2206     return text.toString();
2207   }
2208 
2209   /**
2210    * Parse a textual representation of patches and return a List of Patch
2211    * objects.
2212    * @param textline Text representation of patches.
2213    * @return List of Patch objects.
2214    * @throws IllegalArgumentException If invalid input.
2215    */
2216   public List<Patch> patch_fromText(String textline)
2217       throws IllegalArgumentException {
2218     List<Patch> patches = new LinkedList<Patch>();
2219     if (textline.length() == 0) {
2220       return patches;
2221     }
2222     List<String> textList = Arrays.asList(textline.split("\n"));
2223     LinkedList<String> text = new LinkedList<String>(textList);
2224     Patch patch;
2225     Pattern patchHeader
2226         = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
2227     Matcher m;
2228     char sign;
2229     String line;
2230     while (!text.isEmpty()) {
2231       m = patchHeader.matcher(text.getFirst());
2232       if (!m.matches()) {
2233         throw new IllegalArgumentException(
2234             "Invalid patch string: " + text.getFirst());
2235       }
2236       patch = new Patch();
2237       patches.add(patch);
2238       patch.start1 = Integer.parseInt(m.group(1));
2239       if (m.group(2).length() == 0) {
2240         patch.start1--;
2241         patch.length1 = 1;
2242       } else if (m.group(2).equals("0")) {
2243         patch.length1 = 0;
2244       } else {
2245         patch.start1--;
2246         patch.length1 = Integer.parseInt(m.group(2));
2247       }
2248 
2249       patch.start2 = Integer.parseInt(m.group(3));
2250       if (m.group(4).length() == 0) {
2251         patch.start2--;
2252         patch.length2 = 1;
2253       } else if (m.group(4).equals("0")) {
2254         patch.length2 = 0;
2255       } else {
2256         patch.start2--;
2257         patch.length2 = Integer.parseInt(m.group(4));
2258       }
2259       text.removeFirst();
2260 
2261       while (!text.isEmpty()) {
2262         try {
2263           sign = text.getFirst().charAt(0);
2264         } catch (IndexOutOfBoundsException e) {
2265           // Blank line?  Whatever.
2266           text.removeFirst();
2267           continue;
2268         }
2269         line = text.getFirst().substring(1);
2270         line = line.replace("+", "%2B");  // decode would change all "+" to " "
2271         try {
2272           line = URLDecoder.decode(line, "UTF-8");
2273         } catch (UnsupportedEncodingException e) {
2274           // Not likely on modern system.
2275           throw new Error("This system does not support UTF-8.", e);
2276         } catch (IllegalArgumentException e) {
2277           // Malformed URI sequence.
2278           throw new IllegalArgumentException(
2279               "Illegal escape in patch_fromText: " + line, e);
2280         }
2281         if (sign == '-') {
2282           // Deletion.
2283           patch.diffs.add(new Diff(Operation.DELETE, line));
2284         } else if (sign == '+') {
2285           // Insertion.
2286           patch.diffs.add(new Diff(Operation.INSERT, line));
2287         } else if (sign == ' ') {
2288           // Minor equality.
2289           patch.diffs.add(new Diff(Operation.EQUAL, line));
2290         } else if (sign == '@') {
2291           // Start of next patch.
2292           break;
2293         } else {
2294           // WTF?
2295           throw new IllegalArgumentException(
2296               "Invalid patch mode '" + sign + "' in: " + line);
2297         }
2298         text.removeFirst();
2299       }
2300     }
2301     return patches;
2302   }
2303 
2304 
2305   /**
2306    * Class representing one diff operation.
2307    */
2308   public static class Diff {
2309     /**
2310      * One of: INSERT, DELETE or EQUAL.
2311      */
2312     public Operation operation;
2313     /**
2314      * The text associated with this diff operation.
2315      */
2316     public String text;
2317 
2318     /**
2319      * Constructor.  Initializes the diff with the provided values.
2320      * @param operation One of INSERT, DELETE or EQUAL.
2321      * @param text The text being applied.
2322      */
2323     public Diff(Operation operation, String text) {
2324       // Construct a diff with the specified operation and text.
2325       this.operation = operation;
2326       this.text = text;
2327     }
2328 
2329     /**
2330      * Display a human-readable version of this Diff.
2331      * @return text version.
2332      */
2333     public String toString() {
2334       String prettyText = this.text.replace('\n', '\u00b6');
2335       return "Diff(" + this.operation + ",\"" + prettyText + "\")";
2336     }
2337 
2338     /**
2339      * Create a numeric hash value for a Diff.
2340      * This function is not used by DMP.
2341      * @return Hash value.
2342      */
2343     @Override
2344     public int hashCode() {
2345       final int prime = 31;
2346       int result = (operation == null) ? 0 : operation.hashCode();
2347       result += prime * ((text == null) ? 0 : text.hashCode());
2348       return result;
2349     }
2350 
2351     /**
2352      * Is this Diff equivalent to another Diff?
2353      * @param obj Another Diff to compare against.
2354      * @return true or false.
2355      */
2356     @Override
2357     public boolean equals(Object obj) {
2358       if (this == obj) {
2359         return true;
2360       }
2361       if (obj == null) {
2362         return false;
2363       }
2364       if (getClass() != obj.getClass()) {
2365         return false;
2366       }
2367       Diff other = (Diff) obj;
2368       if (operation != other.operation) {
2369         return false;
2370       }
2371       if (text == null) {
2372         if (other.text != null) {
2373           return false;
2374         }
2375       } else if (!text.equals(other.text)) {
2376         return false;
2377       }
2378       return true;
2379     }
2380   }
2381 
2382 
2383   /**
2384    * Class representing one patch operation.
2385    */
2386   public static class Patch {
2387     public LinkedList<Diff> diffs;
2388     public int start1;
2389     public int start2;
2390     public int length1;
2391     public int length2;
2392 
2393     /**
2394      * Constructor.  Initializes with an empty list of diffs.
2395      */
2396     public Patch() {
2397       this.diffs = new LinkedList<Diff>();
2398     }
2399 
2400     /**
2401      * Emmulate GNU diff's format.
2402      * Header: @@ -382,8 +481,9 @@
2403      * Indicies are printed as 1-based, not 0-based.
2404      * @return The GNU diff string.
2405      */
2406     public String toString() {
2407       String coords1, coords2;
2408       if (this.length1 == 0) {
2409         coords1 = this.start1 + ",0";
2410       } else if (this.length1 == 1) {
2411         coords1 = Integer.toString(this.start1 + 1);
2412       } else {
2413         coords1 = (this.start1 + 1) + "," + this.length1;
2414       }
2415       if (this.length2 == 0) {
2416         coords2 = this.start2 + ",0";
2417       } else if (this.length2 == 1) {
2418         coords2 = Integer.toString(this.start2 + 1);
2419       } else {
2420         coords2 = (this.start2 + 1) + "," + this.length2;
2421       }
2422       StringBuilder text = new StringBuilder();
2423       text.append("@@ -").append(coords1).append(" +").append(coords2)
2424           .append(" @@\n");
2425       // Escape the body of the patch with %xx notation.
2426       for (Diff aDiff : this.diffs) {
2427         switch (aDiff.operation) {
2428         case INSERT:
2429           text.append('+');
2430           break;
2431         case DELETE:
2432           text.append('-');
2433           break;
2434         case EQUAL:
2435           text.append(' ');
2436           break;
2437         }
2438         try {
2439           text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' '))
2440               .append("\n");
2441         } catch (UnsupportedEncodingException e) {
2442           // Not likely on modern system.
2443           throw new Error("This system does not support UTF-8.", e);
2444         }
2445       }
2446       return unescapeForEncodeUriCompatability(text.toString());
2447     }
2448   }
2449 
2450   /**
2451    * Unescape selected chars for compatability with JavaScript's encodeURI.
2452    * In speed critical applications this could be dropped since the
2453    * receiving application will certainly decode these fine.
2454    * Note that this function is case-sensitive.  Thus "%3f" would not be
2455    * unescaped.  But this is ok because it is only called with the output of
2456    * URLEncoder.encode which returns uppercase hex.
2457    *
2458    * Example: "%3F" -> "?", "%24" -> "$", etc.
2459    *
2460    * @param str The string to escape.
2461    * @return The escaped string.
2462    */
2463   private static String unescapeForEncodeUriCompatability(String str) {
2464     return str.replace("%21", "!").replace("%7E", "~")
2465         .replace("%27", "'").replace("%28", "(").replace("%29", ")")
2466         .replace("%3B", ";").replace("%2F", "/").replace("%3F", "?")
2467         .replace("%3A", ":").replace("%40", "@").replace("%26", "&")
2468         .replace("%3D", "=").replace("%2B", "+").replace("%24", "$")
2469         .replace("%2C", ",").replace("%23", "#");
2470   }
2471 }