1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package name.fraser.neil.plaintext;
21
22 import java.io.UnsupportedEncodingException;
23 import java.net.URLDecoder;
24 import java.net.URLEncoder;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.HashMap;
28 import java.util.LinkedList;
29 import java.util.List;
30 import java.util.ListIterator;
31 import java.util.Map;
32 import java.util.Stack;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35
36
37
38
39
40
41
42
43
44
45
46
47
48 public class diff_match_patch {
49
50
51
52
53
54
55
56 public float Diff_Timeout = 1.0f;
57
58
59
60 public short Diff_EditCost = 4;
61
62
63
64 public float Match_Threshold = 0.5f;
65
66
67
68
69
70 public int Match_Distance = 1000;
71
72
73
74
75
76
77 public float Patch_DeleteThreshold = 0.5f;
78
79
80
81 public short Patch_Margin = 4;
82
83
84
85
86 private short Match_MaxBits = 32;
87
88
89
90
91
92 protected static class LinesToCharsResult {
93 protected String chars1;
94 protected String chars2;
95 protected List<String> lineArray;
96
97 protected LinesToCharsResult(String chars1, String chars2,
98 List<String> lineArray) {
99 this.chars1 = chars1;
100 this.chars2 = chars2;
101 this.lineArray = lineArray;
102 }
103 }
104
105
106
107
108
109
110
111
112
113
114
115 public enum Operation {
116 DELETE, INSERT, EQUAL
117 }
118
119
120
121
122
123
124
125
126
127
128 public LinkedList<Diff> diff_main(String text1, String text2) {
129 return diff_main(text1, text2, true);
130 }
131
132
133
134
135
136
137
138
139
140
141 public LinkedList<Diff> diff_main(String text1, String text2,
142 boolean checklines) {
143
144 long deadline;
145 if (Diff_Timeout <= 0) {
146 deadline = Long.MAX_VALUE;
147 } else {
148 deadline = System.currentTimeMillis() + (long) (Diff_Timeout * 1000);
149 }
150 return diff_main(text1, text2, checklines, deadline);
151 }
152
153
154
155
156
157
158
159
160
161
162
163
164
165 private LinkedList<Diff> diff_main(String text1, String text2,
166 boolean checklines, long deadline) {
167
168 if (text1 == null || text2 == null) {
169 throw new IllegalArgumentException("Null inputs. (diff_main)");
170 }
171
172
173 LinkedList<Diff> diffs;
174 if (text1.equals(text2)) {
175 diffs = new LinkedList<Diff>();
176 if (text1.length() != 0) {
177 diffs.add(new Diff(Operation.EQUAL, text1));
178 }
179 return diffs;
180 }
181
182
183 int commonlength = diff_commonPrefix(text1, text2);
184 String commonprefix = text1.substring(0, commonlength);
185 text1 = text1.substring(commonlength);
186 text2 = text2.substring(commonlength);
187
188
189 commonlength = diff_commonSuffix(text1, text2);
190 String commonsuffix = text1.substring(text1.length() - commonlength);
191 text1 = text1.substring(0, text1.length() - commonlength);
192 text2 = text2.substring(0, text2.length() - commonlength);
193
194
195 diffs = diff_compute(text1, text2, checklines, deadline);
196
197
198 if (commonprefix.length() != 0) {
199 diffs.addFirst(new Diff(Operation.EQUAL, commonprefix));
200 }
201 if (commonsuffix.length() != 0) {
202 diffs.addLast(new Diff(Operation.EQUAL, commonsuffix));
203 }
204
205 diff_cleanupMerge(diffs);
206 return diffs;
207 }
208
209
210
211
212
213
214
215
216
217
218
219
220 private LinkedList<Diff> diff_compute(String text1, String text2,
221 boolean checklines, long deadline) {
222 LinkedList<Diff> diffs = new LinkedList<Diff>();
223
224 if (text1.length() == 0) {
225
226 diffs.add(new Diff(Operation.INSERT, text2));
227 return diffs;
228 }
229
230 if (text2.length() == 0) {
231
232 diffs.add(new Diff(Operation.DELETE, text1));
233 return diffs;
234 }
235
236 String longtext = text1.length() > text2.length() ? text1 : text2;
237 String shorttext = text1.length() > text2.length() ? text2 : text1;
238 int i = longtext.indexOf(shorttext);
239 if (i != -1) {
240
241 Operation op = (text1.length() > text2.length()) ?
242 Operation.DELETE : Operation.INSERT;
243 diffs.add(new Diff(op, longtext.substring(0, i)));
244 diffs.add(new Diff(Operation.EQUAL, shorttext));
245 diffs.add(new Diff(op, longtext.substring(i + shorttext.length())));
246 return diffs;
247 }
248
249 if (shorttext.length() == 1) {
250
251
252 diffs.add(new Diff(Operation.DELETE, text1));
253 diffs.add(new Diff(Operation.INSERT, text2));
254 return diffs;
255 }
256
257
258 String[] hm = diff_halfMatch(text1, text2);
259 if (hm != null) {
260
261 String text1_a = hm[0];
262 String text1_b = hm[1];
263 String text2_a = hm[2];
264 String text2_b = hm[3];
265 String mid_common = hm[4];
266
267 LinkedList<Diff> diffs_a = diff_main(text1_a, text2_a,
268 checklines, deadline);
269 LinkedList<Diff> diffs_b = diff_main(text1_b, text2_b,
270 checklines, deadline);
271
272 diffs = diffs_a;
273 diffs.add(new Diff(Operation.EQUAL, mid_common));
274 diffs.addAll(diffs_b);
275 return diffs;
276 }
277
278 if (checklines && text1.length() > 100 && text2.length() > 100) {
279 return diff_lineMode(text1, text2, deadline);
280 }
281
282 return diff_bisect(text1, text2, deadline);
283 }
284
285
286
287
288
289
290
291
292
293
294 private LinkedList<Diff> diff_lineMode(String text1, String text2,
295 long deadline) {
296
297 LinesToCharsResult b = diff_linesToChars(text1, text2);
298 text1 = b.chars1;
299 text2 = b.chars2;
300 List<String> linearray = b.lineArray;
301
302 LinkedList<Diff> diffs = diff_main(text1, text2, false, deadline);
303
304
305 diff_charsToLines(diffs, linearray);
306
307 diff_cleanupSemantic(diffs);
308
309
310
311 diffs.add(new Diff(Operation.EQUAL, ""));
312 int count_delete = 0;
313 int count_insert = 0;
314 String text_delete = "";
315 String text_insert = "";
316 ListIterator<Diff> pointer = diffs.listIterator();
317 Diff thisDiff = pointer.next();
318 while (thisDiff != null) {
319 switch (thisDiff.operation) {
320 case INSERT:
321 count_insert++;
322 text_insert += thisDiff.text;
323 break;
324 case DELETE:
325 count_delete++;
326 text_delete += thisDiff.text;
327 break;
328 case EQUAL:
329
330 if (count_delete >= 1 && count_insert >= 1) {
331
332 pointer.previous();
333 for (int j = 0; j < count_delete + count_insert; j++) {
334 pointer.previous();
335 pointer.remove();
336 }
337 for (Diff newDiff : diff_main(text_delete, text_insert, false,
338 deadline)) {
339 pointer.add(newDiff);
340 }
341 }
342 count_insert = 0;
343 count_delete = 0;
344 text_delete = "";
345 text_insert = "";
346 break;
347 }
348 thisDiff = pointer.hasNext() ? pointer.next() : null;
349 }
350 diffs.removeLast();
351
352 return diffs;
353 }
354
355
356
357
358
359
360
361
362
363
364 protected LinkedList<Diff> diff_bisect(String text1, String text2,
365 long deadline) {
366
367 int text1_length = text1.length();
368 int text2_length = text2.length();
369 int max_d = (text1_length + text2_length + 1) / 2;
370 int v_offset = max_d;
371 int v_length = 2 * max_d;
372 int[] v1 = new int[v_length];
373 int[] v2 = new int[v_length];
374 for (int x = 0; x < v_length; x++) {
375 v1[x] = -1;
376 v2[x] = -1;
377 }
378 v1[v_offset + 1] = 0;
379 v2[v_offset + 1] = 0;
380 int delta = text1_length - text2_length;
381
382
383 boolean front = (delta % 2 != 0);
384
385
386 int k1start = 0;
387 int k1end = 0;
388 int k2start = 0;
389 int k2end = 0;
390 for (int d = 0; d < max_d; d++) {
391
392 if (System.currentTimeMillis() > deadline) {
393 break;
394 }
395
396
397 for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) {
398 int k1_offset = v_offset + k1;
399 int x1;
400 if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) {
401 x1 = v1[k1_offset + 1];
402 } else {
403 x1 = v1[k1_offset - 1] + 1;
404 }
405 int y1 = x1 - k1;
406 while (x1 < text1_length && y1 < text2_length
407 && text1.charAt(x1) == text2.charAt(y1)) {
408 x1++;
409 y1++;
410 }
411 v1[k1_offset] = x1;
412 if (x1 > text1_length) {
413
414 k1end += 2;
415 } else if (y1 > text2_length) {
416
417 k1start += 2;
418 } else if (front) {
419 int k2_offset = v_offset + delta - k1;
420 if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) {
421
422 int x2 = text1_length - v2[k2_offset];
423 if (x1 >= x2) {
424
425 return diff_bisectSplit(text1, text2, x1, y1, deadline);
426 }
427 }
428 }
429 }
430
431
432 for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) {
433 int k2_offset = v_offset + k2;
434 int x2;
435 if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) {
436 x2 = v2[k2_offset + 1];
437 } else {
438 x2 = v2[k2_offset - 1] + 1;
439 }
440 int y2 = x2 - k2;
441 while (x2 < text1_length && y2 < text2_length
442 && text1.charAt(text1_length - x2 - 1)
443 == text2.charAt(text2_length - y2 - 1)) {
444 x2++;
445 y2++;
446 }
447 v2[k2_offset] = x2;
448 if (x2 > text1_length) {
449
450 k2end += 2;
451 } else if (y2 > text2_length) {
452
453 k2start += 2;
454 } else if (!front) {
455 int k1_offset = v_offset + delta - k2;
456 if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) {
457 int x1 = v1[k1_offset];
458 int y1 = v_offset + x1 - k1_offset;
459
460 x2 = text1_length - x2;
461 if (x1 >= x2) {
462
463 return diff_bisectSplit(text1, text2, x1, y1, deadline);
464 }
465 }
466 }
467 }
468 }
469
470
471 LinkedList<Diff> diffs = new LinkedList<Diff>();
472 diffs.add(new Diff(Operation.DELETE, text1));
473 diffs.add(new Diff(Operation.INSERT, text2));
474 return diffs;
475 }
476
477
478
479
480
481
482
483
484
485
486
487 private LinkedList<Diff> diff_bisectSplit(String text1, String text2,
488 int x, int y, long deadline) {
489 String text1a = text1.substring(0, x);
490 String text2a = text2.substring(0, y);
491 String text1b = text1.substring(x);
492 String text2b = text2.substring(y);
493
494
495 LinkedList<Diff> diffs = diff_main(text1a, text2a, false, deadline);
496 LinkedList<Diff> diffsb = diff_main(text1b, text2b, false, deadline);
497
498 diffs.addAll(diffsb);
499 return diffs;
500 }
501
502
503
504
505
506
507
508
509
510
511 protected LinesToCharsResult diff_linesToChars(String text1, String text2) {
512 List<String> lineArray = new ArrayList<String>();
513 Map<String, Integer> lineHash = new HashMap<String, Integer>();
514
515
516
517
518
519 lineArray.add("");
520
521 String chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash);
522 String chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash);
523 return new LinesToCharsResult(chars1, chars2, lineArray);
524 }
525
526
527
528
529
530
531
532
533
534 private String diff_linesToCharsMunge(String text, List<String> lineArray,
535 Map<String, Integer> lineHash) {
536 int lineStart = 0;
537 int lineEnd = -1;
538 String line;
539 StringBuilder chars = new StringBuilder();
540
541
542
543 while (lineEnd < text.length() - 1) {
544 lineEnd = text.indexOf('\n', lineStart);
545 if (lineEnd == -1) {
546 lineEnd = text.length() - 1;
547 }
548 line = text.substring(lineStart, lineEnd + 1);
549 lineStart = lineEnd + 1;
550
551 if (lineHash.containsKey(line)) {
552 chars.append(String.valueOf((char) (int) lineHash.get(line)));
553 } else {
554 lineArray.add(line);
555 lineHash.put(line, lineArray.size() - 1);
556 chars.append(String.valueOf((char) (lineArray.size() - 1)));
557 }
558 }
559 return chars.toString();
560 }
561
562
563
564
565
566
567
568 protected void diff_charsToLines(LinkedList<Diff> diffs,
569 List<String> lineArray) {
570 StringBuilder text;
571 for (Diff diff : diffs) {
572 text = new StringBuilder();
573 for (int y = 0; y < diff.text.length(); y++) {
574 text.append(lineArray.get(diff.text.charAt(y)));
575 }
576 diff.text = text.toString();
577 }
578 }
579
580
581
582
583
584
585
586 public int diff_commonPrefix(String text1, String text2) {
587
588 int n = Math.min(text1.length(), text2.length());
589 for (int i = 0; i < n; i++) {
590 if (text1.charAt(i) != text2.charAt(i)) {
591 return i;
592 }
593 }
594 return n;
595 }
596
597
598
599
600
601
602
603 public int diff_commonSuffix(String text1, String text2) {
604
605 int text1_length = text1.length();
606 int text2_length = text2.length();
607 int n = Math.min(text1_length, text2_length);
608 for (int i = 1; i <= n; i++) {
609 if (text1.charAt(text1_length - i) != text2.charAt(text2_length - i)) {
610 return i - 1;
611 }
612 }
613 return n;
614 }
615
616
617
618
619
620
621
622
623 protected int diff_commonOverlap(String text1, String text2) {
624
625 int text1_length = text1.length();
626 int text2_length = text2.length();
627
628 if (text1_length == 0 || text2_length == 0) {
629 return 0;
630 }
631
632 if (text1_length > text2_length) {
633 text1 = text1.substring(text1_length - text2_length);
634 } else if (text1_length < text2_length) {
635 text2 = text2.substring(0, text1_length);
636 }
637 int text_length = Math.min(text1_length, text2_length);
638
639 if (text1.equals(text2)) {
640 return text_length;
641 }
642
643
644
645
646 int best = 0;
647 int length = 1;
648 while (true) {
649 String pattern = text1.substring(text_length - length);
650 int found = text2.indexOf(pattern);
651 if (found == -1) {
652 return best;
653 }
654 length += found;
655 if (found == 0 || text1.substring(text_length - length).equals(
656 text2.substring(0, length))) {
657 best = length;
658 length++;
659 }
660 }
661 }
662
663
664
665
666
667
668
669
670
671
672
673 protected String[] diff_halfMatch(String text1, String text2) {
674 if (Diff_Timeout <= 0) {
675
676 return null;
677 }
678 String longtext = text1.length() > text2.length() ? text1 : text2;
679 String shorttext = text1.length() > text2.length() ? text2 : text1;
680 if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) {
681 return null;
682 }
683
684
685 String[] hm1 = diff_halfMatchI(longtext, shorttext,
686 (longtext.length() + 3) / 4);
687
688 String[] hm2 = diff_halfMatchI(longtext, shorttext,
689 (longtext.length() + 1) / 2);
690 String[] hm;
691 if (hm1 == null && hm2 == null) {
692 return null;
693 } else if (hm2 == null) {
694 hm = hm1;
695 } else if (hm1 == null) {
696 hm = hm2;
697 } else {
698
699 hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2;
700 }
701
702
703 if (text1.length() > text2.length()) {
704 return hm;
705
706 } else {
707 return new String[]{hm[2], hm[3], hm[0], hm[1], hm[4]};
708 }
709 }
710
711
712
713
714
715
716
717
718
719
720
721 private String[] diff_halfMatchI(String longtext, String shorttext, int i) {
722
723 String seed = longtext.substring(i, i + longtext.length() / 4);
724 int j = -1;
725 String best_common = "";
726 String best_longtext_a = "", best_longtext_b = "";
727 String best_shorttext_a = "", best_shorttext_b = "";
728 while ((j = shorttext.indexOf(seed, j + 1)) != -1) {
729 int prefixLength = diff_commonPrefix(longtext.substring(i),
730 shorttext.substring(j));
731 int suffixLength = diff_commonSuffix(longtext.substring(0, i),
732 shorttext.substring(0, j));
733 if (best_common.length() < suffixLength + prefixLength) {
734 best_common = shorttext.substring(j - suffixLength, j)
735 + shorttext.substring(j, j + prefixLength);
736 best_longtext_a = longtext.substring(0, i - suffixLength);
737 best_longtext_b = longtext.substring(i + prefixLength);
738 best_shorttext_a = shorttext.substring(0, j - suffixLength);
739 best_shorttext_b = shorttext.substring(j + prefixLength);
740 }
741 }
742 if (best_common.length() * 2 >= longtext.length()) {
743 return new String[]{best_longtext_a, best_longtext_b,
744 best_shorttext_a, best_shorttext_b, best_common};
745 } else {
746 return null;
747 }
748 }
749
750
751
752
753
754 public void diff_cleanupSemantic(LinkedList<Diff> diffs) {
755 if (diffs.isEmpty()) {
756 return;
757 }
758 boolean changes = false;
759 Stack<Diff> equalities = new Stack<Diff>();
760 String lastequality = null;
761 ListIterator<Diff> pointer = diffs.listIterator();
762
763 int length_insertions1 = 0;
764 int length_deletions1 = 0;
765
766 int length_insertions2 = 0;
767 int length_deletions2 = 0;
768 Diff thisDiff = pointer.next();
769 while (thisDiff != null) {
770 if (thisDiff.operation == Operation.EQUAL) {
771
772 equalities.push(thisDiff);
773 length_insertions1 = length_insertions2;
774 length_deletions1 = length_deletions2;
775 length_insertions2 = 0;
776 length_deletions2 = 0;
777 lastequality = thisDiff.text;
778 } else {
779
780 if (thisDiff.operation == Operation.INSERT) {
781 length_insertions2 += thisDiff.text.length();
782 } else {
783 length_deletions2 += thisDiff.text.length();
784 }
785
786
787 if (lastequality != null && (lastequality.length()
788 <= Math.max(length_insertions1, length_deletions1))
789 && (lastequality.length()
790 <= Math.max(length_insertions2, length_deletions2))) {
791
792
793 while (thisDiff != equalities.lastElement()) {
794 thisDiff = pointer.previous();
795 }
796 pointer.next();
797
798
799 pointer.set(new Diff(Operation.DELETE, lastequality));
800
801 pointer.add(new Diff(Operation.INSERT, lastequality));
802
803 equalities.pop();
804 if (!equalities.empty()) {
805
806 equalities.pop();
807 }
808 if (equalities.empty()) {
809
810 while (pointer.hasPrevious()) {
811 pointer.previous();
812 }
813 } else {
814
815 thisDiff = equalities.lastElement();
816 while (thisDiff != pointer.previous()) {
817
818 }
819 }
820
821 length_insertions1 = 0;
822 length_insertions2 = 0;
823 length_deletions1 = 0;
824 length_deletions2 = 0;
825 lastequality = null;
826 changes = true;
827 }
828 }
829 thisDiff = pointer.hasNext() ? pointer.next() : null;
830 }
831
832
833 if (changes) {
834 diff_cleanupMerge(diffs);
835 }
836 diff_cleanupSemanticLossless(diffs);
837
838
839
840
841
842
843
844 pointer = diffs.listIterator();
845 Diff prevDiff = null;
846 thisDiff = null;
847 if (pointer.hasNext()) {
848 prevDiff = pointer.next();
849 if (pointer.hasNext()) {
850 thisDiff = pointer.next();
851 }
852 }
853 while (thisDiff != null) {
854 if (prevDiff.operation == Operation.DELETE &&
855 thisDiff.operation == Operation.INSERT) {
856 String deletion = prevDiff.text;
857 String insertion = thisDiff.text;
858 int overlap_length1 = this.diff_commonOverlap(deletion, insertion);
859 int overlap_length2 = this.diff_commonOverlap(insertion, deletion);
860 if (overlap_length1 >= overlap_length2) {
861 if (overlap_length1 >= deletion.length() / 2.0 ||
862 overlap_length1 >= insertion.length() / 2.0) {
863
864 pointer.previous();
865 pointer.add(new Diff(Operation.EQUAL,
866 insertion.substring(0, overlap_length1)));
867 prevDiff.text =
868 deletion.substring(0, deletion.length() - overlap_length1);
869 thisDiff.text = insertion.substring(overlap_length1);
870
871
872 }
873 } else {
874 if (overlap_length2 >= deletion.length() / 2.0 ||
875 overlap_length2 >= insertion.length() / 2.0) {
876
877
878 pointer.previous();
879 pointer.add(new Diff(Operation.EQUAL,
880 deletion.substring(0, overlap_length2)));
881 prevDiff.operation = Operation.INSERT;
882 prevDiff.text =
883 insertion.substring(0, insertion.length() - overlap_length2);
884 thisDiff.operation = Operation.DELETE;
885 thisDiff.text = deletion.substring(overlap_length2);
886
887
888 }
889 }
890 thisDiff = pointer.hasNext() ? pointer.next() : null;
891 }
892 prevDiff = thisDiff;
893 thisDiff = pointer.hasNext() ? pointer.next() : null;
894 }
895 }
896
897
898
899
900
901
902
903 public void diff_cleanupSemanticLossless(LinkedList<Diff> diffs) {
904 String equality1, edit, equality2;
905 String commonString;
906 int commonOffset;
907 int score, bestScore;
908 String bestEquality1, bestEdit, bestEquality2;
909
910 ListIterator<Diff> pointer = diffs.listIterator();
911 Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
912 Diff thisDiff = pointer.hasNext() ? pointer.next() : null;
913 Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
914
915 while (nextDiff != null) {
916 if (prevDiff.operation == Operation.EQUAL &&
917 nextDiff.operation == Operation.EQUAL) {
918
919 equality1 = prevDiff.text;
920 edit = thisDiff.text;
921 equality2 = nextDiff.text;
922
923
924 commonOffset = diff_commonSuffix(equality1, edit);
925 if (commonOffset != 0) {
926 commonString = edit.substring(edit.length() - commonOffset);
927 equality1 = equality1.substring(0, equality1.length() - commonOffset);
928 edit = commonString + edit.substring(0, edit.length() - commonOffset);
929 equality2 = commonString + equality2;
930 }
931
932
933 bestEquality1 = equality1;
934 bestEdit = edit;
935 bestEquality2 = equality2;
936 bestScore = diff_cleanupSemanticScore(equality1, edit)
937 + diff_cleanupSemanticScore(edit, equality2);
938 while (edit.length() != 0 && equality2.length() != 0
939 && edit.charAt(0) == equality2.charAt(0)) {
940 equality1 += edit.charAt(0);
941 edit = edit.substring(1) + equality2.charAt(0);
942 equality2 = equality2.substring(1);
943 score = diff_cleanupSemanticScore(equality1, edit)
944 + diff_cleanupSemanticScore(edit, equality2);
945
946 if (score >= bestScore) {
947 bestScore = score;
948 bestEquality1 = equality1;
949 bestEdit = edit;
950 bestEquality2 = equality2;
951 }
952 }
953
954 if (!prevDiff.text.equals(bestEquality1)) {
955
956 if (bestEquality1.length() != 0) {
957 prevDiff.text = bestEquality1;
958 } else {
959 pointer.previous();
960 pointer.previous();
961 pointer.previous();
962 pointer.remove();
963 pointer.next();
964 pointer.next();
965 }
966 thisDiff.text = bestEdit;
967 if (bestEquality2.length() != 0) {
968 nextDiff.text = bestEquality2;
969 } else {
970 pointer.remove();
971 nextDiff = thisDiff;
972 thisDiff = prevDiff;
973 }
974 }
975 }
976 prevDiff = thisDiff;
977 thisDiff = nextDiff;
978 nextDiff = pointer.hasNext() ? pointer.next() : null;
979 }
980 }
981
982
983
984
985
986
987
988
989
990 private int diff_cleanupSemanticScore(String one, String two) {
991 if (one.length() == 0 || two.length() == 0) {
992
993 return 6;
994 }
995
996
997
998
999
1000
1001 char char1 = one.charAt(one.length() - 1);
1002 char char2 = two.charAt(0);
1003 boolean nonAlphaNumeric1 = !Character.isLetterOrDigit(char1);
1004 boolean nonAlphaNumeric2 = !Character.isLetterOrDigit(char2);
1005 boolean whitespace1 = nonAlphaNumeric1 && Character.isWhitespace(char1);
1006 boolean whitespace2 = nonAlphaNumeric2 && Character.isWhitespace(char2);
1007 boolean lineBreak1 = whitespace1
1008 && Character.getType(char1) == Character.CONTROL;
1009 boolean lineBreak2 = whitespace2
1010 && Character.getType(char2) == Character.CONTROL;
1011 boolean blankLine1 = lineBreak1 && BLANKLINEEND.matcher(one).find();
1012 boolean blankLine2 = lineBreak2 && BLANKLINESTART.matcher(two).find();
1013
1014 if (blankLine1 || blankLine2) {
1015
1016 return 5;
1017 } else if (lineBreak1 || lineBreak2) {
1018
1019 return 4;
1020 } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) {
1021
1022 return 3;
1023 } else if (whitespace1 || whitespace2) {
1024
1025 return 2;
1026 } else if (nonAlphaNumeric1 || nonAlphaNumeric2) {
1027
1028 return 1;
1029 }
1030 return 0;
1031 }
1032
1033
1034 private Pattern BLANKLINEEND
1035 = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL);
1036 private Pattern BLANKLINESTART
1037 = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL);
1038
1039
1040
1041
1042
1043 public void diff_cleanupEfficiency(LinkedList<Diff> diffs) {
1044 if (diffs.isEmpty()) {
1045 return;
1046 }
1047 boolean changes = false;
1048 Stack<Diff> equalities = new Stack<Diff>();
1049 String lastequality = null;
1050 ListIterator<Diff> pointer = diffs.listIterator();
1051
1052 boolean pre_ins = false;
1053
1054 boolean pre_del = false;
1055
1056 boolean post_ins = false;
1057
1058 boolean post_del = false;
1059 Diff thisDiff = pointer.next();
1060 Diff safeDiff = thisDiff;
1061 while (thisDiff != null) {
1062 if (thisDiff.operation == Operation.EQUAL) {
1063
1064 if (thisDiff.text.length() < Diff_EditCost && (post_ins || post_del)) {
1065
1066 equalities.push(thisDiff);
1067 pre_ins = post_ins;
1068 pre_del = post_del;
1069 lastequality = thisDiff.text;
1070 } else {
1071
1072 equalities.clear();
1073 lastequality = null;
1074 safeDiff = thisDiff;
1075 }
1076 post_ins = post_del = false;
1077 } else {
1078
1079 if (thisDiff.operation == Operation.DELETE) {
1080 post_del = true;
1081 } else {
1082 post_ins = true;
1083 }
1084
1085
1086
1087
1088
1089
1090
1091
1092 if (lastequality != null
1093 && ((pre_ins && pre_del && post_ins && post_del)
1094 || ((lastequality.length() < Diff_EditCost / 2)
1095 && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0)
1096 + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) {
1097
1098
1099 while (thisDiff != equalities.lastElement()) {
1100 thisDiff = pointer.previous();
1101 }
1102 pointer.next();
1103
1104
1105 pointer.set(new Diff(Operation.DELETE, lastequality));
1106
1107 pointer.add(thisDiff = new Diff(Operation.INSERT, lastequality));
1108
1109 equalities.pop();
1110 lastequality = null;
1111 if (pre_ins && pre_del) {
1112
1113 post_ins = post_del = true;
1114 equalities.clear();
1115 safeDiff = thisDiff;
1116 } else {
1117 if (!equalities.empty()) {
1118
1119 equalities.pop();
1120 }
1121 if (equalities.empty()) {
1122
1123
1124 thisDiff = safeDiff;
1125 } else {
1126
1127 thisDiff = equalities.lastElement();
1128 }
1129 while (thisDiff != pointer.previous()) {
1130
1131 }
1132 post_ins = post_del = false;
1133 }
1134
1135 changes = true;
1136 }
1137 }
1138 thisDiff = pointer.hasNext() ? pointer.next() : null;
1139 }
1140
1141 if (changes) {
1142 diff_cleanupMerge(diffs);
1143 }
1144 }
1145
1146
1147
1148
1149
1150
1151 public void diff_cleanupMerge(LinkedList<Diff> diffs) {
1152 diffs.add(new Diff(Operation.EQUAL, ""));
1153 ListIterator<Diff> pointer = diffs.listIterator();
1154 int count_delete = 0;
1155 int count_insert = 0;
1156 String text_delete = "";
1157 String text_insert = "";
1158 Diff thisDiff = pointer.next();
1159 Diff prevEqual = null;
1160 int commonlength;
1161 while (thisDiff != null) {
1162 switch (thisDiff.operation) {
1163 case INSERT:
1164 count_insert++;
1165 text_insert += thisDiff.text;
1166 prevEqual = null;
1167 break;
1168 case DELETE:
1169 count_delete++;
1170 text_delete += thisDiff.text;
1171 prevEqual = null;
1172 break;
1173 case EQUAL:
1174 if (count_delete + count_insert > 1) {
1175 boolean both_types = count_delete != 0 && count_insert != 0;
1176
1177 pointer.previous();
1178 while (count_delete-- > 0) {
1179 pointer.previous();
1180 pointer.remove();
1181 }
1182 while (count_insert-- > 0) {
1183 pointer.previous();
1184 pointer.remove();
1185 }
1186 if (both_types) {
1187
1188 commonlength = diff_commonPrefix(text_insert, text_delete);
1189 if (commonlength != 0) {
1190 if (pointer.hasPrevious()) {
1191 thisDiff = pointer.previous();
1192 assert thisDiff.operation == Operation.EQUAL
1193 : "Previous diff should have been an equality.";
1194 thisDiff.text += text_insert.substring(0, commonlength);
1195 pointer.next();
1196 } else {
1197 pointer.add(new Diff(Operation.EQUAL,
1198 text_insert.substring(0, commonlength)));
1199 }
1200 text_insert = text_insert.substring(commonlength);
1201 text_delete = text_delete.substring(commonlength);
1202 }
1203
1204 commonlength = diff_commonSuffix(text_insert, text_delete);
1205 if (commonlength != 0) {
1206 thisDiff = pointer.next();
1207 thisDiff.text = text_insert.substring(text_insert.length()
1208 - commonlength) + thisDiff.text;
1209 text_insert = text_insert.substring(0, text_insert.length()
1210 - commonlength);
1211 text_delete = text_delete.substring(0, text_delete.length()
1212 - commonlength);
1213 pointer.previous();
1214 }
1215 }
1216
1217 if (text_delete.length() != 0) {
1218 pointer.add(new Diff(Operation.DELETE, text_delete));
1219 }
1220 if (text_insert.length() != 0) {
1221 pointer.add(new Diff(Operation.INSERT, text_insert));
1222 }
1223
1224 thisDiff = pointer.hasNext() ? pointer.next() : null;
1225 } else if (prevEqual != null) {
1226
1227 prevEqual.text += thisDiff.text;
1228 pointer.remove();
1229 thisDiff = pointer.previous();
1230 pointer.next();
1231 }
1232 count_insert = 0;
1233 count_delete = 0;
1234 text_delete = "";
1235 text_insert = "";
1236 prevEqual = thisDiff;
1237 break;
1238 }
1239 thisDiff = pointer.hasNext() ? pointer.next() : null;
1240 }
1241 if (diffs.getLast().text.length() == 0) {
1242 diffs.removeLast();
1243 }
1244
1245
1246
1247
1248
1249
1250 boolean changes = false;
1251
1252
1253 pointer = diffs.listIterator();
1254 Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
1255 thisDiff = pointer.hasNext() ? pointer.next() : null;
1256 Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
1257
1258 while (nextDiff != null) {
1259 if (prevDiff.operation == Operation.EQUAL &&
1260 nextDiff.operation == Operation.EQUAL) {
1261
1262 if (thisDiff.text.endsWith(prevDiff.text)) {
1263
1264 thisDiff.text = prevDiff.text
1265 + thisDiff.text.substring(0, thisDiff.text.length()
1266 - prevDiff.text.length());
1267 nextDiff.text = prevDiff.text + nextDiff.text;
1268 pointer.previous();
1269 pointer.previous();
1270 pointer.previous();
1271 pointer.remove();
1272 pointer.next();
1273 thisDiff = pointer.next();
1274 nextDiff = pointer.hasNext() ? pointer.next() : null;
1275 changes = true;
1276 } else if (thisDiff.text.startsWith(nextDiff.text)) {
1277
1278 prevDiff.text += nextDiff.text;
1279 thisDiff.text = thisDiff.text.substring(nextDiff.text.length())
1280 + nextDiff.text;
1281 pointer.remove();
1282 nextDiff = pointer.hasNext() ? pointer.next() : null;
1283 changes = true;
1284 }
1285 }
1286 prevDiff = thisDiff;
1287 thisDiff = nextDiff;
1288 nextDiff = pointer.hasNext() ? pointer.next() : null;
1289 }
1290
1291 if (changes) {
1292 diff_cleanupMerge(diffs);
1293 }
1294 }
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304 public int diff_xIndex(LinkedList<Diff> diffs, int loc) {
1305 int chars1 = 0;
1306 int chars2 = 0;
1307 int last_chars1 = 0;
1308 int last_chars2 = 0;
1309 Diff lastDiff = null;
1310 for (Diff aDiff : diffs) {
1311 if (aDiff.operation != Operation.INSERT) {
1312
1313 chars1 += aDiff.text.length();
1314 }
1315 if (aDiff.operation != Operation.DELETE) {
1316
1317 chars2 += aDiff.text.length();
1318 }
1319 if (chars1 > loc) {
1320
1321 lastDiff = aDiff;
1322 break;
1323 }
1324 last_chars1 = chars1;
1325 last_chars2 = chars2;
1326 }
1327 if (lastDiff != null && lastDiff.operation == Operation.DELETE) {
1328
1329 return last_chars2;
1330 }
1331
1332 return last_chars2 + (loc - last_chars1);
1333 }
1334
1335
1336
1337
1338
1339
1340 public String diff_prettyHtml(LinkedList<Diff> diffs) {
1341 StringBuilder html = new StringBuilder();
1342 for (Diff aDiff : diffs) {
1343 String text = aDiff.text.replace("&", "&").replace("<", "<")
1344 .replace(">", ">").replace("\n", "¶<br>");
1345 switch (aDiff.operation) {
1346 case INSERT:
1347 html.append("<ins style=\"background:#e6ffe6;\">").append(text)
1348 .append("</ins>");
1349 break;
1350 case DELETE:
1351 html.append("<del style=\"background:#ffe6e6;\">").append(text)
1352 .append("</del>");
1353 break;
1354 case EQUAL:
1355 html.append("<span>").append(text).append("</span>");
1356 break;
1357 }
1358 }
1359 return html.toString();
1360 }
1361
1362
1363
1364
1365
1366
1367 public String diff_text1(LinkedList<Diff> diffs) {
1368 StringBuilder text = new StringBuilder();
1369 for (Diff aDiff : diffs) {
1370 if (aDiff.operation != Operation.INSERT) {
1371 text.append(aDiff.text);
1372 }
1373 }
1374 return text.toString();
1375 }
1376
1377
1378
1379
1380
1381
1382 public String diff_text2(LinkedList<Diff> diffs) {
1383 StringBuilder text = new StringBuilder();
1384 for (Diff aDiff : diffs) {
1385 if (aDiff.operation != Operation.DELETE) {
1386 text.append(aDiff.text);
1387 }
1388 }
1389 return text.toString();
1390 }
1391
1392
1393
1394
1395
1396
1397
1398 public int diff_levenshtein(LinkedList<Diff> diffs) {
1399 int levenshtein = 0;
1400 int insertions = 0;
1401 int deletions = 0;
1402 for (Diff aDiff : diffs) {
1403 switch (aDiff.operation) {
1404 case INSERT:
1405 insertions += aDiff.text.length();
1406 break;
1407 case DELETE:
1408 deletions += aDiff.text.length();
1409 break;
1410 case EQUAL:
1411
1412 levenshtein += Math.max(insertions, deletions);
1413 insertions = 0;
1414 deletions = 0;
1415 break;
1416 }
1417 }
1418 levenshtein += Math.max(insertions, deletions);
1419 return levenshtein;
1420 }
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430 public String diff_toDelta(LinkedList<Diff> diffs) {
1431 StringBuilder text = new StringBuilder();
1432 for (Diff aDiff : diffs) {
1433 switch (aDiff.operation) {
1434 case INSERT:
1435 try {
1436 text.append("+").append(URLEncoder.encode(aDiff.text, "UTF-8")
1437 .replace('+', ' ')).append("\t");
1438 } catch (UnsupportedEncodingException e) {
1439
1440 throw new Error("This system does not support UTF-8.", e);
1441 }
1442 break;
1443 case DELETE:
1444 text.append("-").append(aDiff.text.length()).append("\t");
1445 break;
1446 case EQUAL:
1447 text.append("=").append(aDiff.text.length()).append("\t");
1448 break;
1449 }
1450 }
1451 String delta = text.toString();
1452 if (delta.length() != 0) {
1453
1454 delta = delta.substring(0, delta.length() - 1);
1455 delta = unescapeForEncodeUriCompatability(delta);
1456 }
1457 return delta;
1458 }
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468 public LinkedList<Diff> diff_fromDelta(String text1, String delta)
1469 throws IllegalArgumentException {
1470 LinkedList<Diff> diffs = new LinkedList<Diff>();
1471 int pointer = 0;
1472 String[] tokens = delta.split("\t");
1473 for (String token : tokens) {
1474 if (token.length() == 0) {
1475
1476 continue;
1477 }
1478
1479
1480 String param = token.substring(1);
1481 switch (token.charAt(0)) {
1482 case '+':
1483
1484 param = param.replace("+", "%2B");
1485 try {
1486 param = URLDecoder.decode(param, "UTF-8");
1487 } catch (UnsupportedEncodingException e) {
1488
1489 throw new Error("This system does not support UTF-8.", e);
1490 } catch (IllegalArgumentException e) {
1491
1492 throw new IllegalArgumentException(
1493 "Illegal escape in diff_fromDelta: " + param, e);
1494 }
1495 diffs.add(new Diff(Operation.INSERT, param));
1496 break;
1497 case '-':
1498
1499 case '=':
1500 int n;
1501 try {
1502 n = Integer.parseInt(param);
1503 } catch (NumberFormatException e) {
1504 throw new IllegalArgumentException(
1505 "Invalid number in diff_fromDelta: " + param, e);
1506 }
1507 if (n < 0) {
1508 throw new IllegalArgumentException(
1509 "Negative number in diff_fromDelta: " + param);
1510 }
1511 String text;
1512 try {
1513 text = text1.substring(pointer, pointer += n);
1514 } catch (StringIndexOutOfBoundsException e) {
1515 throw new IllegalArgumentException("Delta length (" + pointer
1516 + ") larger than source text length (" + text1.length()
1517 + ").", e);
1518 }
1519 if (token.charAt(0) == '=') {
1520 diffs.add(new Diff(Operation.EQUAL, text));
1521 } else {
1522 diffs.add(new Diff(Operation.DELETE, text));
1523 }
1524 break;
1525 default:
1526
1527 throw new IllegalArgumentException(
1528 "Invalid diff operation in diff_fromDelta: " + token.charAt(0));
1529 }
1530 }
1531 if (pointer != text1.length()) {
1532 throw new IllegalArgumentException("Delta length (" + pointer
1533 + ") smaller than source text length (" + text1.length() + ").");
1534 }
1535 return diffs;
1536 }
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550 public int match_main(String text, String pattern, int loc) {
1551
1552 if (text == null || pattern == null) {
1553 throw new IllegalArgumentException("Null inputs. (match_main)");
1554 }
1555
1556 loc = Math.max(0, Math.min(loc, text.length()));
1557 if (text.equals(pattern)) {
1558
1559 return 0;
1560 } else if (text.length() == 0) {
1561
1562 return -1;
1563 } else if (loc + pattern.length() <= text.length()
1564 && text.substring(loc, loc + pattern.length()).equals(pattern)) {
1565
1566 return loc;
1567 } else {
1568
1569 return match_bitap(text, pattern, loc);
1570 }
1571 }
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581 protected int match_bitap(String text, String pattern, int loc) {
1582 assert (Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)
1583 : "Pattern too long for this application.";
1584
1585
1586 Map<Character, Integer> s = match_alphabet(pattern);
1587
1588
1589 double score_threshold = Match_Threshold;
1590
1591 int best_loc = text.indexOf(pattern, loc);
1592 if (best_loc != -1) {
1593 score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern),
1594 score_threshold);
1595
1596 best_loc = text.lastIndexOf(pattern, loc + pattern.length());
1597 if (best_loc != -1) {
1598 score_threshold = Math.min(match_bitapScore(0, best_loc, loc, pattern),
1599 score_threshold);
1600 }
1601 }
1602
1603
1604 int matchmask = 1 << (pattern.length() - 1);
1605 best_loc = -1;
1606
1607 int bin_min, bin_mid;
1608 int bin_max = pattern.length() + text.length();
1609
1610 int[] last_rd = new int[0];
1611 for (int d = 0; d < pattern.length(); d++) {
1612
1613
1614
1615 bin_min = 0;
1616 bin_mid = bin_max;
1617 while (bin_min < bin_mid) {
1618 if (match_bitapScore(d, loc + bin_mid, loc, pattern)
1619 <= score_threshold) {
1620 bin_min = bin_mid;
1621 } else {
1622 bin_max = bin_mid;
1623 }
1624 bin_mid = (bin_max - bin_min) / 2 + bin_min;
1625 }
1626
1627 bin_max = bin_mid;
1628 int start = Math.max(1, loc - bin_mid + 1);
1629 int finish = Math.min(loc + bin_mid, text.length()) + pattern.length();
1630
1631 int[] rd = new int[finish + 2];
1632 rd[finish + 1] = (1 << d) - 1;
1633 for (int j = finish; j >= start; j--) {
1634 int charMatch;
1635 if (text.length() <= j - 1 || !s.containsKey(text.charAt(j - 1))) {
1636
1637 charMatch = 0;
1638 } else {
1639 charMatch = s.get(text.charAt(j - 1));
1640 }
1641 if (d == 0) {
1642
1643 rd[j] = ((rd[j + 1] << 1) | 1) & charMatch;
1644 } else {
1645
1646 rd[j] = (((rd[j + 1] << 1) | 1) & charMatch)
1647 | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1];
1648 }
1649 if ((rd[j] & matchmask) != 0) {
1650 double score = match_bitapScore(d, j - 1, loc, pattern);
1651
1652
1653 if (score <= score_threshold) {
1654
1655 score_threshold = score;
1656 best_loc = j - 1;
1657 if (best_loc > loc) {
1658
1659 start = Math.max(1, 2 * loc - best_loc);
1660 } else {
1661
1662 break;
1663 }
1664 }
1665 }
1666 }
1667 if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) {
1668
1669 break;
1670 }
1671 last_rd = rd;
1672 }
1673 return best_loc;
1674 }
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684 private double match_bitapScore(int e, int x, int loc, String pattern) {
1685 float accuracy = (float) e / pattern.length();
1686 int proximity = Math.abs(loc - x);
1687 if (Match_Distance == 0) {
1688
1689 return proximity == 0 ? accuracy : 1.0;
1690 }
1691 return accuracy + (proximity / (float) Match_Distance);
1692 }
1693
1694
1695
1696
1697
1698
1699 protected Map<Character, Integer> match_alphabet(String pattern) {
1700 Map<Character, Integer> s = new HashMap<Character, Integer>();
1701 char[] char_pattern = pattern.toCharArray();
1702 for (char c : char_pattern) {
1703 s.put(c, 0);
1704 }
1705 int i = 0;
1706 for (char c : char_pattern) {
1707 s.put(c, s.get(c) | (1 << (pattern.length() - i - 1)));
1708 i++;
1709 }
1710 return s;
1711 }
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723 protected void patch_addContext(Patch patch, String text) {
1724 if (text.length() == 0) {
1725 return;
1726 }
1727 String pattern = text.substring(patch.start2, patch.start2 + patch.length1);
1728 int padding = 0;
1729
1730
1731
1732 while (text.indexOf(pattern) != text.lastIndexOf(pattern)
1733 && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) {
1734 padding += Patch_Margin;
1735 pattern = text.substring(Math.max(0, patch.start2 - padding),
1736 Math.min(text.length(), patch.start2 + patch.length1 + padding));
1737 }
1738
1739 padding += Patch_Margin;
1740
1741
1742 String prefix = text.substring(Math.max(0, patch.start2 - padding),
1743 patch.start2);
1744 if (prefix.length() != 0) {
1745 patch.diffs.addFirst(new Diff(Operation.EQUAL, prefix));
1746 }
1747
1748 String suffix = text.substring(patch.start2 + patch.length1,
1749 Math.min(text.length(), patch.start2 + patch.length1 + padding));
1750 if (suffix.length() != 0) {
1751 patch.diffs.addLast(new Diff(Operation.EQUAL, suffix));
1752 }
1753
1754
1755 patch.start1 -= prefix.length();
1756 patch.start2 -= prefix.length();
1757
1758 patch.length1 += prefix.length() + suffix.length();
1759 patch.length2 += prefix.length() + suffix.length();
1760 }
1761
1762
1763
1764
1765
1766
1767
1768
1769 public LinkedList<Patch> patch_make(String text1, String text2) {
1770 if (text1 == null || text2 == null) {
1771 throw new IllegalArgumentException("Null inputs. (patch_make)");
1772 }
1773
1774 LinkedList<Diff> diffs = diff_main(text1, text2, true);
1775 if (diffs.size() > 2) {
1776 diff_cleanupSemantic(diffs);
1777 diff_cleanupEfficiency(diffs);
1778 }
1779 return patch_make(text1, diffs);
1780 }
1781
1782
1783
1784
1785
1786
1787
1788 public LinkedList<Patch> patch_make(LinkedList<Diff> diffs) {
1789 if (diffs == null) {
1790 throw new IllegalArgumentException("Null inputs. (patch_make)");
1791 }
1792
1793 String text1 = diff_text1(diffs);
1794 return patch_make(text1, diffs);
1795 }
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806 public LinkedList<Patch> patch_make(String text1, String text2,
1807 LinkedList<Diff> diffs) {
1808 return patch_make(text1, diffs);
1809 }
1810
1811
1812
1813
1814
1815
1816
1817
1818 public LinkedList<Patch> patch_make(String text1, LinkedList<Diff> diffs) {
1819 if (text1 == null || diffs == null) {
1820 throw new IllegalArgumentException("Null inputs. (patch_make)");
1821 }
1822
1823 LinkedList<Patch> patches = new LinkedList<Patch>();
1824 if (diffs.isEmpty()) {
1825 return patches;
1826 }
1827 Patch patch = new Patch();
1828 int char_count1 = 0;
1829 int char_count2 = 0;
1830
1831
1832
1833 String prepatch_text = text1;
1834 String postpatch_text = text1;
1835 for (Diff aDiff : diffs) {
1836 if (patch.diffs.isEmpty() && aDiff.operation != Operation.EQUAL) {
1837
1838 patch.start1 = char_count1;
1839 patch.start2 = char_count2;
1840 }
1841
1842 switch (aDiff.operation) {
1843 case INSERT:
1844 patch.diffs.add(aDiff);
1845 patch.length2 += aDiff.text.length();
1846 postpatch_text = postpatch_text.substring(0, char_count2)
1847 + aDiff.text + postpatch_text.substring(char_count2);
1848 break;
1849 case DELETE:
1850 patch.length1 += aDiff.text.length();
1851 patch.diffs.add(aDiff);
1852 postpatch_text = postpatch_text.substring(0, char_count2)
1853 + postpatch_text.substring(char_count2 + aDiff.text.length());
1854 break;
1855 case EQUAL:
1856 if (aDiff.text.length() <= 2 * Patch_Margin
1857 && !patch.diffs.isEmpty() && aDiff != diffs.getLast()) {
1858
1859 patch.diffs.add(aDiff);
1860 patch.length1 += aDiff.text.length();
1861 patch.length2 += aDiff.text.length();
1862 }
1863
1864 if (aDiff.text.length() >= 2 * Patch_Margin) {
1865
1866 if (!patch.diffs.isEmpty()) {
1867 patch_addContext(patch, prepatch_text);
1868 patches.add(patch);
1869 patch = new Patch();
1870
1871
1872
1873
1874 prepatch_text = postpatch_text;
1875 char_count1 = char_count2;
1876 }
1877 }
1878 break;
1879 }
1880
1881
1882 if (aDiff.operation != Operation.INSERT) {
1883 char_count1 += aDiff.text.length();
1884 }
1885 if (aDiff.operation != Operation.DELETE) {
1886 char_count2 += aDiff.text.length();
1887 }
1888 }
1889
1890 if (!patch.diffs.isEmpty()) {
1891 patch_addContext(patch, prepatch_text);
1892 patches.add(patch);
1893 }
1894
1895 return patches;
1896 }
1897
1898
1899
1900
1901
1902
1903 public LinkedList<Patch> patch_deepCopy(LinkedList<Patch> patches) {
1904 LinkedList<Patch> patchesCopy = new LinkedList<Patch>();
1905 for (Patch aPatch : patches) {
1906 Patch patchCopy = new Patch();
1907 for (Diff aDiff : aPatch.diffs) {
1908 Diff diffCopy = new Diff(aDiff.operation, aDiff.text);
1909 patchCopy.diffs.add(diffCopy);
1910 }
1911 patchCopy.start1 = aPatch.start1;
1912 patchCopy.start2 = aPatch.start2;
1913 patchCopy.length1 = aPatch.length1;
1914 patchCopy.length2 = aPatch.length2;
1915 patchesCopy.add(patchCopy);
1916 }
1917 return patchesCopy;
1918 }
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928 public Object[] patch_apply(LinkedList<Patch> patches, String text) {
1929 if (patches.isEmpty()) {
1930 return new Object[]{text, new boolean[0]};
1931 }
1932
1933
1934 patches = patch_deepCopy(patches);
1935
1936 String nullPadding = patch_addPadding(patches);
1937 text = nullPadding + text + nullPadding;
1938 patch_splitMax(patches);
1939
1940 int x = 0;
1941
1942
1943
1944
1945 int delta = 0;
1946 boolean[] results = new boolean[patches.size()];
1947 for (Patch aPatch : patches) {
1948 int expected_loc = aPatch.start2 + delta;
1949 String text1 = diff_text1(aPatch.diffs);
1950 int start_loc;
1951 int end_loc = -1;
1952 if (text1.length() > this.Match_MaxBits) {
1953
1954
1955 start_loc = match_main(text,
1956 text1.substring(0, this.Match_MaxBits), expected_loc);
1957 if (start_loc != -1) {
1958 end_loc = match_main(text,
1959 text1.substring(text1.length() - this.Match_MaxBits),
1960 expected_loc + text1.length() - this.Match_MaxBits);
1961 if (end_loc == -1 || start_loc >= end_loc) {
1962
1963 start_loc = -1;
1964 }
1965 }
1966 } else {
1967 start_loc = match_main(text, text1, expected_loc);
1968 }
1969 if (start_loc == -1) {
1970
1971 results[x] = false;
1972
1973 delta -= aPatch.length2 - aPatch.length1;
1974 } else {
1975
1976 results[x] = true;
1977 delta = start_loc - expected_loc;
1978 String text2;
1979 if (end_loc == -1) {
1980 text2 = text.substring(start_loc,
1981 Math.min(start_loc + text1.length(), text.length()));
1982 } else {
1983 text2 = text.substring(start_loc,
1984 Math.min(end_loc + this.Match_MaxBits, text.length()));
1985 }
1986 if (text1.equals(text2)) {
1987
1988 text = text.substring(0, start_loc) + diff_text2(aPatch.diffs)
1989 + text.substring(start_loc + text1.length());
1990 } else {
1991
1992
1993 LinkedList<Diff> diffs = diff_main(text1, text2, false);
1994 if (text1.length() > this.Match_MaxBits
1995 && diff_levenshtein(diffs) / (float) text1.length()
1996 > this.Patch_DeleteThreshold) {
1997
1998 results[x] = false;
1999 } else {
2000 diff_cleanupSemanticLossless(diffs);
2001 int index1 = 0;
2002 for (Diff aDiff : aPatch.diffs) {
2003 if (aDiff.operation != Operation.EQUAL) {
2004 int index2 = diff_xIndex(diffs, index1);
2005 if (aDiff.operation == Operation.INSERT) {
2006
2007 text = text.substring(0, start_loc + index2) + aDiff.text
2008 + text.substring(start_loc + index2);
2009 } else if (aDiff.operation == Operation.DELETE) {
2010
2011 text = text.substring(0, start_loc + index2)
2012 + text.substring(start_loc + diff_xIndex(diffs,
2013 index1 + aDiff.text.length()));
2014 }
2015 }
2016 if (aDiff.operation != Operation.DELETE) {
2017 index1 += aDiff.text.length();
2018 }
2019 }
2020 }
2021 }
2022 }
2023 x++;
2024 }
2025
2026 text = text.substring(nullPadding.length(), text.length()
2027 - nullPadding.length());
2028 return new Object[]{text, results};
2029 }
2030
2031
2032
2033
2034
2035
2036
2037 public String patch_addPadding(LinkedList<Patch> patches) {
2038 short paddingLength = this.Patch_Margin;
2039 String nullPadding = "";
2040 for (short x = 1; x <= paddingLength; x++) {
2041 nullPadding += String.valueOf((char) x);
2042 }
2043
2044
2045 for (Patch aPatch : patches) {
2046 aPatch.start1 += paddingLength;
2047 aPatch.start2 += paddingLength;
2048 }
2049
2050
2051 Patch patch = patches.getFirst();
2052 LinkedList<Diff> diffs = patch.diffs;
2053 if (diffs.isEmpty() || diffs.getFirst().operation != Operation.EQUAL) {
2054
2055 diffs.addFirst(new Diff(Operation.EQUAL, nullPadding));
2056 patch.start1 -= paddingLength;
2057 patch.start2 -= paddingLength;
2058 patch.length1 += paddingLength;
2059 patch.length2 += paddingLength;
2060 } else if (paddingLength > diffs.getFirst().text.length()) {
2061
2062 Diff firstDiff = diffs.getFirst();
2063 int extraLength = paddingLength - firstDiff.text.length();
2064 firstDiff.text = nullPadding.substring(firstDiff.text.length())
2065 + firstDiff.text;
2066 patch.start1 -= extraLength;
2067 patch.start2 -= extraLength;
2068 patch.length1 += extraLength;
2069 patch.length2 += extraLength;
2070 }
2071
2072
2073 patch = patches.getLast();
2074 diffs = patch.diffs;
2075 if (diffs.isEmpty() || diffs.getLast().operation != Operation.EQUAL) {
2076
2077 diffs.addLast(new Diff(Operation.EQUAL, nullPadding));
2078 patch.length1 += paddingLength;
2079 patch.length2 += paddingLength;
2080 } else if (paddingLength > diffs.getLast().text.length()) {
2081
2082 Diff lastDiff = diffs.getLast();
2083 int extraLength = paddingLength - lastDiff.text.length();
2084 lastDiff.text += nullPadding.substring(0, extraLength);
2085 patch.length1 += extraLength;
2086 patch.length2 += extraLength;
2087 }
2088
2089 return nullPadding;
2090 }
2091
2092
2093
2094
2095
2096
2097
2098 public void patch_splitMax(LinkedList<Patch> patches) {
2099 short patch_size = Match_MaxBits;
2100 String precontext, postcontext;
2101 Patch patch;
2102 int start1, start2;
2103 boolean empty;
2104 Operation diff_type;
2105 String diff_text;
2106 ListIterator<Patch> pointer = patches.listIterator();
2107 Patch bigpatch = pointer.hasNext() ? pointer.next() : null;
2108 while (bigpatch != null) {
2109 if (bigpatch.length1 <= Match_MaxBits) {
2110 bigpatch = pointer.hasNext() ? pointer.next() : null;
2111 continue;
2112 }
2113
2114 pointer.remove();
2115 start1 = bigpatch.start1;
2116 start2 = bigpatch.start2;
2117 precontext = "";
2118 while (!bigpatch.diffs.isEmpty()) {
2119
2120 patch = new Patch();
2121 empty = true;
2122 patch.start1 = start1 - precontext.length();
2123 patch.start2 = start2 - precontext.length();
2124 if (precontext.length() != 0) {
2125 patch.length1 = patch.length2 = precontext.length();
2126 patch.diffs.add(new Diff(Operation.EQUAL, precontext));
2127 }
2128 while (!bigpatch.diffs.isEmpty()
2129 && patch.length1 < patch_size - Patch_Margin) {
2130 diff_type = bigpatch.diffs.getFirst().operation;
2131 diff_text = bigpatch.diffs.getFirst().text;
2132 if (diff_type == Operation.INSERT) {
2133
2134 patch.length2 += diff_text.length();
2135 start2 += diff_text.length();
2136 patch.diffs.addLast(bigpatch.diffs.removeFirst());
2137 empty = false;
2138 } else if (diff_type == Operation.DELETE && patch.diffs.size() == 1
2139 && patch.diffs.getFirst().operation == Operation.EQUAL
2140 && diff_text.length() > 2 * patch_size) {
2141
2142 patch.length1 += diff_text.length();
2143 start1 += diff_text.length();
2144 empty = false;
2145 patch.diffs.add(new Diff(diff_type, diff_text));
2146 bigpatch.diffs.removeFirst();
2147 } else {
2148
2149 diff_text = diff_text.substring(0, Math.min(diff_text.length(),
2150 patch_size - patch.length1 - Patch_Margin));
2151 patch.length1 += diff_text.length();
2152 start1 += diff_text.length();
2153 if (diff_type == Operation.EQUAL) {
2154 patch.length2 += diff_text.length();
2155 start2 += diff_text.length();
2156 } else {
2157 empty = false;
2158 }
2159 patch.diffs.add(new Diff(diff_type, diff_text));
2160 if (diff_text.equals(bigpatch.diffs.getFirst().text)) {
2161 bigpatch.diffs.removeFirst();
2162 } else {
2163 bigpatch.diffs.getFirst().text = bigpatch.diffs.getFirst().text
2164 .substring(diff_text.length());
2165 }
2166 }
2167 }
2168
2169 precontext = diff_text2(patch.diffs);
2170 precontext = precontext.substring(Math.max(0, precontext.length()
2171 - Patch_Margin));
2172
2173 if (diff_text1(bigpatch.diffs).length() > Patch_Margin) {
2174 postcontext = diff_text1(bigpatch.diffs).substring(0, Patch_Margin);
2175 } else {
2176 postcontext = diff_text1(bigpatch.diffs);
2177 }
2178 if (postcontext.length() != 0) {
2179 patch.length1 += postcontext.length();
2180 patch.length2 += postcontext.length();
2181 if (!patch.diffs.isEmpty()
2182 && patch.diffs.getLast().operation == Operation.EQUAL) {
2183 patch.diffs.getLast().text += postcontext;
2184 } else {
2185 patch.diffs.add(new Diff(Operation.EQUAL, postcontext));
2186 }
2187 }
2188 if (!empty) {
2189 pointer.add(patch);
2190 }
2191 }
2192 bigpatch = pointer.hasNext() ? pointer.next() : null;
2193 }
2194 }
2195
2196
2197
2198
2199
2200
2201 public String patch_toText(List<Patch> patches) {
2202 StringBuilder text = new StringBuilder();
2203 for (Patch aPatch : patches) {
2204 text.append(aPatch);
2205 }
2206 return text.toString();
2207 }
2208
2209
2210
2211
2212
2213
2214
2215
2216 public List<Patch> patch_fromText(String textline)
2217 throws IllegalArgumentException {
2218 List<Patch> patches = new LinkedList<Patch>();
2219 if (textline.length() == 0) {
2220 return patches;
2221 }
2222 List<String> textList = Arrays.asList(textline.split("\n"));
2223 LinkedList<String> text = new LinkedList<String>(textList);
2224 Patch patch;
2225 Pattern patchHeader
2226 = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
2227 Matcher m;
2228 char sign;
2229 String line;
2230 while (!text.isEmpty()) {
2231 m = patchHeader.matcher(text.getFirst());
2232 if (!m.matches()) {
2233 throw new IllegalArgumentException(
2234 "Invalid patch string: " + text.getFirst());
2235 }
2236 patch = new Patch();
2237 patches.add(patch);
2238 patch.start1 = Integer.parseInt(m.group(1));
2239 if (m.group(2).length() == 0) {
2240 patch.start1--;
2241 patch.length1 = 1;
2242 } else if (m.group(2).equals("0")) {
2243 patch.length1 = 0;
2244 } else {
2245 patch.start1--;
2246 patch.length1 = Integer.parseInt(m.group(2));
2247 }
2248
2249 patch.start2 = Integer.parseInt(m.group(3));
2250 if (m.group(4).length() == 0) {
2251 patch.start2--;
2252 patch.length2 = 1;
2253 } else if (m.group(4).equals("0")) {
2254 patch.length2 = 0;
2255 } else {
2256 patch.start2--;
2257 patch.length2 = Integer.parseInt(m.group(4));
2258 }
2259 text.removeFirst();
2260
2261 while (!text.isEmpty()) {
2262 try {
2263 sign = text.getFirst().charAt(0);
2264 } catch (IndexOutOfBoundsException e) {
2265
2266 text.removeFirst();
2267 continue;
2268 }
2269 line = text.getFirst().substring(1);
2270 line = line.replace("+", "%2B");
2271 try {
2272 line = URLDecoder.decode(line, "UTF-8");
2273 } catch (UnsupportedEncodingException e) {
2274
2275 throw new Error("This system does not support UTF-8.", e);
2276 } catch (IllegalArgumentException e) {
2277
2278 throw new IllegalArgumentException(
2279 "Illegal escape in patch_fromText: " + line, e);
2280 }
2281 if (sign == '-') {
2282
2283 patch.diffs.add(new Diff(Operation.DELETE, line));
2284 } else if (sign == '+') {
2285
2286 patch.diffs.add(new Diff(Operation.INSERT, line));
2287 } else if (sign == ' ') {
2288
2289 patch.diffs.add(new Diff(Operation.EQUAL, line));
2290 } else if (sign == '@') {
2291
2292 break;
2293 } else {
2294
2295 throw new IllegalArgumentException(
2296 "Invalid patch mode '" + sign + "' in: " + line);
2297 }
2298 text.removeFirst();
2299 }
2300 }
2301 return patches;
2302 }
2303
2304
2305
2306
2307
2308 public static class Diff {
2309
2310
2311
2312 public Operation operation;
2313
2314
2315
2316 public String text;
2317
2318
2319
2320
2321
2322
2323 public Diff(Operation operation, String text) {
2324
2325 this.operation = operation;
2326 this.text = text;
2327 }
2328
2329
2330
2331
2332
2333 public String toString() {
2334 String prettyText = this.text.replace('\n', '\u00b6');
2335 return "Diff(" + this.operation + ",\"" + prettyText + "\")";
2336 }
2337
2338
2339
2340
2341
2342
2343 @Override
2344 public int hashCode() {
2345 final int prime = 31;
2346 int result = (operation == null) ? 0 : operation.hashCode();
2347 result += prime * ((text == null) ? 0 : text.hashCode());
2348 return result;
2349 }
2350
2351
2352
2353
2354
2355
2356 @Override
2357 public boolean equals(Object obj) {
2358 if (this == obj) {
2359 return true;
2360 }
2361 if (obj == null) {
2362 return false;
2363 }
2364 if (getClass() != obj.getClass()) {
2365 return false;
2366 }
2367 Diff other = (Diff) obj;
2368 if (operation != other.operation) {
2369 return false;
2370 }
2371 if (text == null) {
2372 if (other.text != null) {
2373 return false;
2374 }
2375 } else if (!text.equals(other.text)) {
2376 return false;
2377 }
2378 return true;
2379 }
2380 }
2381
2382
2383
2384
2385
2386 public static class Patch {
2387 public LinkedList<Diff> diffs;
2388 public int start1;
2389 public int start2;
2390 public int length1;
2391 public int length2;
2392
2393
2394
2395
2396 public Patch() {
2397 this.diffs = new LinkedList<Diff>();
2398 }
2399
2400
2401
2402
2403
2404
2405
2406 public String toString() {
2407 String coords1, coords2;
2408 if (this.length1 == 0) {
2409 coords1 = this.start1 + ",0";
2410 } else if (this.length1 == 1) {
2411 coords1 = Integer.toString(this.start1 + 1);
2412 } else {
2413 coords1 = (this.start1 + 1) + "," + this.length1;
2414 }
2415 if (this.length2 == 0) {
2416 coords2 = this.start2 + ",0";
2417 } else if (this.length2 == 1) {
2418 coords2 = Integer.toString(this.start2 + 1);
2419 } else {
2420 coords2 = (this.start2 + 1) + "," + this.length2;
2421 }
2422 StringBuilder text = new StringBuilder();
2423 text.append("@@ -").append(coords1).append(" +").append(coords2)
2424 .append(" @@\n");
2425
2426 for (Diff aDiff : this.diffs) {
2427 switch (aDiff.operation) {
2428 case INSERT:
2429 text.append('+');
2430 break;
2431 case DELETE:
2432 text.append('-');
2433 break;
2434 case EQUAL:
2435 text.append(' ');
2436 break;
2437 }
2438 try {
2439 text.append(URLEncoder.encode(aDiff.text, "UTF-8").replace('+', ' '))
2440 .append("\n");
2441 } catch (UnsupportedEncodingException e) {
2442
2443 throw new Error("This system does not support UTF-8.", e);
2444 }
2445 }
2446 return unescapeForEncodeUriCompatability(text.toString());
2447 }
2448 }
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463 private static String unescapeForEncodeUriCompatability(String str) {
2464 return str.replace("%21", "!").replace("%7E", "~")
2465 .replace("%27", "'").replace("%28", "(").replace("%29", ")")
2466 .replace("%3B", ";").replace("%2F", "/").replace("%3F", "?")
2467 .replace("%3A", ":").replace("%40", "@").replace("%26", "&")
2468 .replace("%3D", "=").replace("%2B", "+").replace("%24", "$")
2469 .replace("%2C", ",").replace("%23", "#");
2470 }
2471 }