addfile ./LICENSE.txt hunk ./LICENSE.txt 1 +Copyright (c) 2001, 2002 I/NET Inc. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. addfile ./cl-difflib-tests.asd hunk ./cl-difflib-tests.asd 1 +;;; ------------------------------------------------- -*- Mode: LISP -*- +;;; CL-DIFFLIB -- A Lisp library for computing differences between +;;; sequences. +;;; +;;; Copyright 2005 +;;; John Wiseman (jjwiseman@yahoo.com) +;;; 2005-02-02 +;;; +;;; Licensed under the MIT license--see the accompanying LICENSE.txt +;;; file. +;;; +;;; This is the ASDF system definition for the unit tests. + +(in-package :asdf) + +(asdf:operate 'asdf:load-op :cl-difflib) + + +(asdf:defsystem :cl-difflib-tests + :depends-on (:cl-difflib) + :components ((:file "unit-tests"))) + +(defmethod asdf:perform ((o asdf:test-op) (c (eql (find-system :cl-difflib-tests)))) + (or (funcall (intern (symbol-name '#:run-tests) + (find-package '#:difflib-test))) + (error "test-op failed"))) addfile ./cl-difflib.asd hunk ./cl-difflib.asd 1 - +;;; ------------------------------------------------- -*- Mode: LISP -*- +;;; CL-DIFFLIB -- A Lisp library for computing differences between +;;; sequences. +;;; +;;; Copyright 2005 +;;; John Wiseman (jjwiseman@yahoo.com) +;;; 2005-02-02 +;;; +;;; Licensed under the MIT license--see the accompanying LICENSE.txt +;;; file. +;;; +;;; This is the ASDF system definition. + +(in-package :asdf) + +(defsystem :cl-difflib + :name "CL-DIFFLIB" + :author "John Wiseman " + :version "0.1" + :maintainer "John Wiseman " + :licence "MIT" + :description "A Lisp library for computing differences between sequences." + :long-description "A Lisp library for computing differences between sequences. Based on Python's difflib module." + + :components ((:file "package") + (:file "difflib" :depends-on ("package")))) + +(defmethod perform ((o test-op) (c (eql (find-system 'cl-difflib)))) + (oos 'load-op 'cl-difflib-tests) + (oos 'test-op 'cl-difflib-tests :force t)) addfile ./difflib.lisp hunk ./difflib.lisp 1 +;;; -------------------------------------------------------------------- +;;; CL-DIFFLIB -- A Lisp library for computing differences between +;;; sequences. +;;; +;;; Copyright 2005 +;;; John Wiseman (jjwiseman@yahoo.com) +;;; 2005-02-02 +;;; +;;; Licensed under the MIT license--see the accompanying LICENSE.txt +;;; file. +;;; +;;; This is nearly a transcription of Python's difflib module, which +;;; contains the following description of its algorithm: +;;; +;;; The basic algorithm predates, and is a little fancier than, an +;;; algorithm published in the late 1980's by Ratcliff and +;;; Obershelp under the hyperbolic name "gestalt pattern +;;; matching". The basic idea is to find the longest contiguous +;;; matching subsequence that contains no "junk" elements (R-O +;;; doesn't address junk). The same idea is then applied +;;; recursively to the pieces of the sequences to the left and to +;;; the right of the matching subsequence. This does not yield +;;; minimal edit sequences, but does tend to yield matches that +;;; "look right" to people. + +(in-package :difflib) + +;; A couple utility macros + +;; Hey, it's just like Python's enumerate function! +(defmacro enumerate ((index-var elt-var sequence &optional result-form) &body body) + "Iterates over a sequence while keeping track of an index. + + (enumerate (i e '(a b c)) + (format T \"~&~S ~S\" i e)) + => + 1 a + 2 b + 3 c" + (let ((sequence-var (gensym "SEQUENCE"))) + `(let ((,sequence-var ,sequence)) + (dotimes (,index-var (length ,sequence-var) ,result-form) + (let ((,elt-var (elt ,sequence-var ,index-var))) + ,@body))))) + +;; Hey, it's just like Python's range function! +(defmacro do-range ((var start-form end-form &optional result-form) &body body) + "Iterates VAR through the range of integers in [START-FORM, + END-FORM). Returns the value of END-FORM (at the time END-FORM is + evaluated, VAR is bound to the value of END-FORM. + + (do-range (i 10 (length s)) + (print (elt s i)))" + (let ((start-var (gensym)) + (end-var (gensym))) + `(let ((,start-var ,start-form) + (,end-var ,end-form)) + (do ((,var ,start-var (1+ ,var))) + ((>= ,var ,end-var) ,result-form) + ,@body)))) + + + +(defstruct opcode + "A single instruction for modifying sequence A into sequence B, + where TAG has the following possible values and meanings: + + :REPLACE a[i1:i2] should be replaced by b[j1:j2] + :DELETE a[i1:i2] should be deleted + :INSERT b[j2:j2] should be inserted + :EQUAL a[i1:i2] = b[j1:j2]" + tag + i1 + i2 + j1 + j2) + +(defmethod print-object ((self opcode) stream) + (print-unreadable-object (self stream :type T) + (format stream "~S ~S ~S ~S ~S" + (opcode-tag self) + (opcode-i1 self) + (opcode-i2 self) + (opcode-j1 self) + (opcode-j2 self)))) + +(defun opcode-range (opcode) + (with-slots (i1 i2 j1 j2) opcode + (ecase (opcode-tag opcode) + ((:replace :equal) + (values i1 i2 j1 j2)) + ((:delete) + (values i1 i2)) + ((:insert) + (values j1 j2))))) + +(defun opcode= (op1 op2) + "Tests two opcodes for equality." + (and (eq (opcode-tag op1) (opcode-tag op2)) + (multiple-value-bind (op1-lo1 op1-hi1 op1-lo2 op1-hi2) + (opcode-range op1) + (multiple-value-bind (op2-lo1 op2-hi1 op2-lo2 op2-hi2) + (opcode-range op2) + (and (eql op1-lo1 op2-lo1) + (eql op1-hi1 op2-hi1) + (eql op1-lo2 op2-lo2) + (eql op1-hi2 op2-hi2)))))) + + +(defclass sequence-matcher () + (;; User-supplied slots + (a :initform nil :initarg :a :accessor sequence-a) + (b :initform nil :initarg :b :accessor sequence-b) + (junk-function :initform nil :initarg :junk-function :accessor junk-function) + (test-function :initform #'eql :initarg :test-function :accessor test-function) + ;; Intermediate data + (b-junk-function :initform nil :accessor b-junk-function) + (b2j :initform nil :accessor b2j) + (b-popular-function :initform nil :accessor b-popular-function) + ;; Cache slots + (opcodes :initform nil :accessor opcodes) + (matching-blocks :initform nil :accessor matching-blocks) + (full-b-count :initform nil :accessor full-b-count)) + (:documentation "The sequence-matcher class compares pairs of + sequences. The main restriction is that sequence elements must + be hashable (use :test-function to specify the type of + hashtable).")) + + +(defmethod initialize-instance :after ((self sequence-matcher) &key) + (set-sequences self (sequence-a self) (sequence-b self) T)) + + +(defmethod set-sequences ((self sequence-matcher) a b &optional force-p) + (set-sequence-a self a force-p) + (set-sequence-b self b force-p)) + + +(defmethod set-sequence-b ((self sequence-matcher) b &optional force-p) + (unless (and (not force-p) (eq b (sequence-b self))) + (setf (sequence-b self) b) + (setf (matching-blocks self) '()) + (setf (opcodes self) '()) + (setf (full-b-count self) nil) + (chain-b self))) + +(defmethod set-sequence-a ((self sequence-matcher) a &optional force-p) + (unless (and (not force-p) (eq a (sequence-a self))) + (setf (sequence-a self) a) + (setf (matching-blocks self) '()) + (setf (opcodes self) '()))) + + +(defmethod chain-b ((self sequence-matcher)) + (let* ((b (sequence-b self)) + (n (length b)) + (b2j (setf (b2j self) (make-hash-table :test (test-function self)))) + (popular (make-hash-table :test (test-function self)))) + (enumerate (i elt b) + (if (has-key elt b2j) + (let ((indices (gethash elt b2j))) + (if (and (>= n 200) (> (* (length indices) 100) n)) + (progn + (setf (gethash elt popular) T) + ;; Clear indices + (setf (gethash elt b2j) '())) + (setf (gethash elt b2j) (append indices (list i))))) + (setf (gethash elt b2j) (list i)))) + ;; Purge leftover indices for popular elements. + (maphash #'(lambda (elt v) + (declare (ignore v)) + (setf (gethash elt b2j) '())) + popular) + ;; Now b2j.keys() contains elements uniquely, and especially when + ;; the sequence is a string, that's usually a good deal smaller + ;; than len(string). The difference is the number of isjunk calls + ;; saved. + (let ((junk-function (junk-function self)) + (junk (make-hash-table :test (test-function self)))) + (when junk-function + (mapc #'(lambda (hash) + (maphash #'(lambda (elt v) + (declare (ignore v)) + (when (funcall junk-function elt) + (setf (gethash elt junk) T) + (setf (gethash elt hash) '()))) + hash)) + (list popular b2j))) + ;; Now for x in b, isjunk(x) == x in junkdict, but the latter is + ;; much faster. Note too that while there may be a lot of junk in + ;; the sequence, the number of *unique* junk elements is probably + ;; small. So the memory burden of keeping this dict alive is + ;; likely trivial compared to the size of b2j. + (setf (b-junk-function self) + #'(lambda (elt) (has-key elt junk))) + (setf (b-popular-function self) + #'(lambda (elt) (has-key elt popular)))))) + + +(defmethod find-longest-match ((self sequence-matcher) alo ahi blo bhi) + (let ((test-function (test-function self))) + (let ((a (sequence-a self)) + (b (sequence-b self)) + (b2j (b2j self)) + (b-junk-function (b-junk-function self)) + (best-i alo) + (best-j blo) + (best-size 0) + ;; find longest junk-free match + ;; during an iteration of the loop, j2len[j] = length of longest + ;; junk-free match ending with a[i-1] and b[j] + (j2len (make-hash-table :test test-function))) + (do-range (i alo ahi) + ;; look at all instances of a[i] in b; note that because + ;; b2j has no junk keys, the loop is skipped if a[i] is junk + (let ((newj2len (make-hash-table :test test-function))) + (tagbody + (dolist (j (gethash (elt a i) b2j '())) + ;; a[i] matches b[j] + (unless (< j blo) + (when (>= j bhi) + (go continue)) + (let ((k (setf (gethash j newj2len) (+ (gethash (- j 1) j2len 0) 1)))) + (when (> k best-size) + (setf best-i (+ (- i k) 1) + best-j (+ (- j k) 1) + best-size k))))) + continue + (setf j2len newj2len)) + + ;; Extend the best by non-junk elements on each end. In particular, + ;; "popular" non-junk elements aren't in b2j, which greatly speeds + ;; the inner loop above, but also means "the best" match so far + ;; doesn't contain any junk *or* popular non-junk elements. + (loop while (and (> best-i alo) + (> best-j blo) + (not (funcall b-junk-function (elt b (- best-j 1)))) + (funcall test-function (elt a (- best-i 1)) (elt b (- best-j 1)))) + do (decf best-i) + (decf best-j) + (incf best-size)) + (loop while (and (< (+ best-i best-size) ahi) + (< (+ best-j best-size) bhi) + (not (funcall b-junk-function (elt b (+ best-j best-size)))) + (funcall test-function (elt a (+ best-i best-size)) (elt b (+ best-j best-size)))) + do (incf best-size)) + + ;; Now that we have a wholly interesting match (albeit + ;; possibly empty!), we may as well suck up the matching junk + ;; on each side of it too. Can't think of a good reason not + ;; to, and it saves post-processing the (possibly + ;; considerable) expense of figuring out what to do with it. + ;; In the case of an empty interesting match, this is clearly + ;; the right thing to do, because no other kind of match is + ;; possible in the regions. + (loop while (and (> best-i alo) + (> best-j blo) + (funcall b-junk-function (elt b (- best-j 1))) + (funcall test-function (elt a (- best-i 1)) (elt b (- best-j 1)))) + do (decf best-i) + (decf best-j) + (incf best-size)) + (loop while (and (< (+ best-i best-size) ahi) + (< (+ best-j best-size) bhi) + (funcall b-junk-function (elt b (+ best-j best-size))) + (funcall test-function (elt a (+ best-i best-size)) (elt b (+ best-j best-size)))) + do (incf best-size)))) + (values best-i best-j best-size)))) + +(defmethod get-matching-blocks ((self sequence-matcher)) + (if (matching-blocks self) + (matching-blocks self) + (let ((matching-blocks '()) + (la (length (sequence-a self))) + (lb (length (sequence-b self)))) + (setf matching-blocks (helper self 0 la 0 lb matching-blocks)) + (setf matching-blocks (append matching-blocks (list + (list la lb 0)))) + (setf (matching-blocks self) matching-blocks)))) + + +(defmethod helper ((self sequence-matcher) alo ahi blo bhi answer) + (multiple-value-bind (i j k) + (find-longest-match self alo ahi blo bhi) + (let ((x (list i j k))) + ;; a[alo:i] vs b[blo:j] unknown + ;; a[i:i+k] same as b[j:j+k] + ;; a[i+k:ahi] vs b[j+k:bhi] unknown + (when (not (= k 0)) + (when (and (< alo i) (< blo j)) + (setf answer (helper self alo i blo j answer))) + (setf answer (append answer (list x))) + (when (and (< (+ i k) ahi) (< (+ j k) bhi)) + (setf answer (helper self (+ i k) ahi (+ j k) bhi answer)))) + answer))) + +(defmethod get-opcodes ((self sequence-matcher)) + (if (opcodes self) + (opcodes self) + (let ((i 0) + (j 0) + (opcodes '())) + (dolist (block (get-matching-blocks self)) + (destructuring-bind (ai bj size) block + ;; invariant: we've pumped out correct diffs to change + ;; a[:i] into b[:j], and the next matching block is + ;; a[ai:ai+size] == b[bj:bj+size]. So we need to pump out + ;; a diff to change a[i:ai] into b[j:bj], pump out the + ;; matching block, and move (i,j) beyond the match + (let ((tag nil)) + (cond ((and (< i ai) (< j bj)) + (setf tag :replace)) + ((< i ai) + (setf tag :delete)) + ((< j bj) + (setf tag :insert))) + (when tag + (let ((opcode (make-opcode :tag tag :i1 i :i2 ai :j1 j :j2 bj))) + (push opcode opcodes))) + (setf i (+ ai size)) + (setf j (+ bj size)) + ;; the list of matching blocks is terminated by a + ;; sentinel with size 0 + (when (not (= size 0)) + (push (make-opcode :tag :equal :i1 ai :i2 i :j1 bj :j2 j) opcodes))))) + (setf (opcodes self) (reverse opcodes))))) + + + +(defun group-opcodes (opcodes n) + (let ((o (first opcodes))) + (when (eq (opcode-tag o) :equal) + (setf opcodes + (append (list (make-opcode :tag (opcode-tag o) + :i1 (max (opcode-i1 o) (- (opcode-i2 o) n)) + :i2 (opcode-i2 o) + :j1 (max (opcode-j1 o) (- (opcode-j2 o) n)) + :j2 (opcode-j2 o))) + (rest opcodes))))) + (let ((o (car (last opcodes)))) + (when (eq (opcode-tag o) :equal) + (setf opcodes + (append + (butlast opcodes) + (list (make-opcode :tag (opcode-tag o) + :i1 (opcode-i1 o) + :i2 (min (opcode-i2 o) + (+ (opcode-i1 o) n)) + :j1 (opcode-j1 o) + :j2 (min (opcode-j2 o) + (+ (opcode-j1 o) n)))))))) + (let ((nn (* n 2)) + (group '()) + (groups '())) + (dolist (o opcodes) + (with-slots (tag i1 i2 j1 j2) o + (if (and (eq tag :equal) (> (- i2 i1) nn)) + (progn + (push (make-opcode :tag tag :i1 i1 :i2 (min i2 (+ i1 n)) :j1 j1 :j2 (min j2 (+ j1 n))) + group) + (push (reverse group) groups) + (setf group '()) + (push (make-opcode :tag tag :i1 (max i1 (- i2 n)) :i2 i2 + :j1 (max j1 (- j2 n)) :j2 j2) + group)) + (push o group)))) + (when (and group + (not (and (= (length group) 1) + (eq (opcode-tag (first group)) :equal)))) + (push (reverse group) groups)) + (reverse groups))) + +(defmethod similarity-ratio ((self sequence-matcher)) + "Returns a measure of the sequences' similarity (a value in [0, + 1])." + (let ((matches (reduce #'(lambda (sum trip) + (+ sum trip)) + (get-matching-blocks self) + :key #'(lambda (triple) (elt triple 2))))) + (calculate-similarity-ratio matches (+ (length (sequence-a self)) + (length (sequence-b self)))))) + + +(defun calculate-similarity-ratio (matches length) + (if length + (/ (* 2 matches) length) + 1)) + +(defmethod quick-similarity-ratio ((self sequence-matcher)) + (unless (full-b-count self) + (setf (full-b-count self) + (make-hash-table :test (test-function self)))) + (let ((full-b-count (full-b-count self)) + (a (sequence-a self)) + (b (sequence-b self))) + (dotimes (i (length b)) + (let ((e (elt b i))) + (incf (gethash e full-b-count 0)))) + (let ((avail (make-hash-table :test (test-function self)))) + (let ((matches 0)) + (dotimes (i (length a)) + (let ((e (elt a i))) + (let ((numb (multiple-value-bind (count has-key) + (gethash e avail) + (if has-key + count + (gethash e full-b-count 0))))) + (setf (gethash e avail) (- numb 1)) + (when (> numb 0) + (incf matches))))) + (calculate-similarity-ratio matches (+ (length a) (length b))))))) + +(defmethod very-quick-similarity-ratio ((self sequence-matcher)) + (let ((la (length (sequence-a self))) + (lb (length (sequence-b self)))) + (calculate-similarity-ratio (min la lb) (+ la lb)))) + +(defun get-close-matches (word possibilities &key (max 3) (cutoff 0.6)) + (let ((matcher (make-instance 'sequence-matcher)) + (matches '())) + (set-sequence-b matcher word) + (dolist (p possibilities) + (set-sequence-a matcher p) + ;; Just as an example of the benefit of the approximate ratio + ;; functions, when comparing the string "RUN" to the symbol + ;; names of all symbols external to the COMMON-LISP package, the + ;; performance is as follows: + ;; + ;; Optimization Time Memory + ;; ---------------------------------------------------------------- + ;; similarity-ratio 1269 15,291,880 + ;; quick-similarity-ratio 165 (0.13) 1,074,712 (0.07) + ;; very-quick-similarity-ratio 54 (0.04) 336,456 (0.02) + (when (and (>= (very-quick-similarity-ratio matcher) cutoff) + (>= (quick-similarity-ratio matcher) cutoff) + (>= (similarity-ratio matcher) cutoff)) + (push (cons p (similarity-ratio matcher)) matches))) + (setf matches (sort matches #'> :key #'cdr)) + (mapcar #'car + (subseq matches 0 (min (length matches) max))))) + + +(defun has-key (key hash) + "Checks whether a key value is present in a hash table." + (multiple-value-bind (val in-p) + (gethash key hash) + (declare (ignore val)) + in-p)) + + + +#| +(defclass differ () + ((line-junk-function :initform nil :initarg :line-junk-function :accessor line-junk-function) + (char-junk-function :initform nil :initarg :char-junk-function :accessor char-junk-function))) + +(defmethod compare ((self differ) a b stream) + (let ((matcher (make-instance 'sequence-matcher + :a a + :b b + :junk-function (line-junk-function self)))) + (let ((opcodes (get-opcodes matcher))) + (dolist (op opcodes) + (ecase (opcode-tag op) + ((:replace) + (fancy-replace stream a b op)) + ((:delete) + (dump stream #\- a op)) + ((:insert) + (dump stream #\+ b op)) + ((:equal) + (dump stream #\space a op))))))) + +(defun dump (stream char seq op) + (multiple-value-bind (lo hi) + (opcode-range op) + (do-range (i lo hi) + (format stream "~&~A ~A" char (elt seq i))))) +|# + + +(defun unified-diff (stream a b &key from-file to-file from-file-date to-file-date (n 3) + (test-function #'eql)) + (let ((started NIL) + (matcher (make-instance 'sequence-matcher + :a a + :b b + :test-function test-function))) + (dolist (group (group-opcodes (get-opcodes matcher) n)) + (unless started + (format stream "~&--- ~A ~A" (or from-file "") (or from-file-date "")) + (format stream "~&+++ ~A ~A" (or to-file "") (or to-file-date "")) + (setf started T)) + (let ((i1 (opcode-i1 (elt group 0))) + (i2 (opcode-i2 (elt group (- (length group) 1)))) + (j1 (opcode-j1 (elt group 0))) + (j2 (opcode-j2 (elt group (- (length group) 1))))) + (format stream "~&@@ -~D,~D +~D,~D @@" (+ i1 1) (- i2 i1) (+ j1 1) (- j2 j1)) + (dolist (op group) + (with-slots (tag i1 i2 j1 j2) op + (when (eq tag :equal) + (map 'nil + #'(lambda (line) + (format stream "~& ~A" line)) + (subseq a i1 i2))) + (when (member tag '(:replace :delete)) + (map 'nil + #'(lambda (line) + (format stream "~&-~A" line)) + (subseq a i1 i2))) + (when (member tag '(:replace :insert)) + (map 'nil + #'(lambda (line) + (format stream "~&+~A" line)) + (subseq b j1 j2)))))))) + (values)) + +(defun context-diff (stream a b &key from-file to-file from-file-date to-file-date (n 3) + (test-function #'eql)) + (flet ((some-opcodes-contain (ops tags) + (find-if #'(lambda (op) + (member (opcode-tag op) tags)) + ops))) + (let ((started NIL) + (prefix-map '((:insert . "+") + (:delete . "-") + (:replace . "!") + (:equal . " "))) + (matcher (make-instance 'sequence-matcher + :a a + :b b + :test-function test-function))) + (dolist (group (group-opcodes (get-opcodes matcher) n)) + (when (not started) + (format stream "~&*** ~A ~A" (or from-file "") (or from-file-date "")) + (format stream "~&--- ~A ~A" (or to-file "") (or to-file-date "")) + (setf started T)) + (format stream "~&***************" ) + (let ((first-op (first group)) + (last-op (car (last group)))) + (if (>= (- (opcode-i2 last-op) + (opcode-i1 first-op)) + 2) + (format stream "~&*** ~D,~D ***" + (+ (opcode-i1 first-op) 1) + (opcode-i2 last-op)) + (format stream "~&*** ~D ***" (opcode-i2 last-op))) + (when (some-opcodes-contain group '(:replace :delete)) + (dolist (opcode group) + (unless (eq (opcode-tag opcode) :insert) + (map 'nil + #'(lambda (line) + (format stream "~&~A ~A" + (cdr (assoc (opcode-tag opcode) prefix-map)) + line)) + (subseq a (opcode-i1 opcode) (opcode-i2 opcode)))))) + (if (>= (- (opcode-j2 last-op) + (opcode-j1 first-op)) + 2) + (format stream "~&--- ~D,~D ----" + (+ (opcode-j1 first-op) 1) + (opcode-j2 last-op)) + (format stream "~&--- ~D ----" (opcode-j2 last-op))) + (when (some-opcodes-contain group '(:replace :insert)) + (dolist (opcode group) + (unless (eq (opcode-tag opcode) :delete) + (map 'nil + #'(lambda (line) + (format stream "~&~A ~A" + (cdr (assoc (opcode-tag opcode) prefix-map)) + line)) + (subseq b (opcode-j1 opcode) (opcode-j2 opcode)))))))))) + (values)) addfile ./package.lisp hunk ./package.lisp 1 - +;;; CL-DIFFLIB -- A Lisp library for computing differences between +;;; sequences. +;;; +;;; Copyright 2005 +;;; John Wiseman (jjwiseman@yahoo.com) +;;; 2005-02-02 +;;; +;;; Licensed under the MIT license--see the accompanying LICENSE.txt +;;; file. +;;; +;;; This is the package definition. + +(cl:defpackage "DIFFLIB" + (:use #:common-lisp) + (:export #:opcode + #:make-opcode + #:opcode-p + #:opcode-tag + #:opcode-i1 + #:opcode-i2 + #:opcode-j1 + #:opcode-j2 + #:opcode-range + #:opcode= + + #:sequence-matcher + + #:sequence-a + #:sequence-b + #:junk-function + #:test-function + #:set-sequences + #:set-sequence-a + #:set-sequence-b + + #:get-opcodes + #:group-opcodes + #:similarity-ratio + #:quick-similarity-ratio + #:very-quick-similarity-ratio + + #:get-close-matches + #:unified-diff + #:context-diff)) + addfile ./unit-tests.lisp hunk ./unit-tests.lisp 1 +(cl:defpackage "DIFFLIB-TEST" + (:use #:common-lisp) + (:export #:run-tests)) + +(in-package #:difflib-test) + + +;; Some simple unit test utilities + +(defvar *passed-tests* '()) +(defvar *failed-tests* '()) + +(defmacro test (name expr expected-value &optional (comparator '(function equal)) + failure-code) + `(unless (test-aux ',name ',expr ,expr ,expected-value ,comparator) + ,failure-code)) + +(defmacro condition-test (name expr expected-condition &optional (comparator '(function typep)) + failure-code) + (let ((completed-var (gensym "COMPLETED")) + (condition-var (gensym "CONDITION")) + (value-var (gensym "VALUE"))) + `(let ((,completed-var NIL)) + (multiple-value-bind (,value-var ,condition-var) + (ignore-errors + ,expr + (setf ,completed-var T)) + (unless (condition-test-aux ',name ',expr ,value-var (not ,completed-var) + ,condition-var ,expected-condition ,comparator) + ,failure-code))))) + +(defun condition-test-aux (name expr value error-p error expected-error comparator) + (if error-p + (let ((got-expected-p (funcall comparator error expected-error))) + (if got-expected-p + (test-success name expr error expected-error) + (test-failure name expr error expected-error)) + got-expected-p) + (test-failure name expr value expected-error))) + +(defun test-aux (name expr value expected-value comparator) + (let ((got-expected-p (funcall comparator value expected-value))) + (if got-expected-p + (test-success name expr value expected-value) + (test-failure name expr value expected-value)) + got-expected-p)) + +(defun test-failure (name expr value expected-value) + (assert (not (assoc name *failed-tests*))) + (assert (not (assoc name *passed-tests*))) + (push (cons name (list expr value expected-value)) *failed-tests*) + (warn "FAILURE: Test ~S: ~S evaluated to ~S instead of ~S." + name expr value expected-value) + nil) + +(defun test-success (name expr value expected-value) + (assert (not (assoc name *failed-tests*))) + (assert (not (assoc name *passed-tests*))) + (push (cons name (list expr value expected-value)) *passed-tests*) + (format T "~&Test ~S passed.~%" name)) + +(defun begin-tests () + (setf *passed-tests* '()) + (setf *failed-tests* '())) + +(defun end-tests () + (let ((num-failed (length *failed-tests*)) + (num-passed (length *passed-tests*))) + (format T "~&-----~&Testing complete, ~S of ~S tests failed (~,2F%)" + num-failed + (+ num-failed num-passed) + (* 100.0 (/ num-failed (+ num-failed num-passed)))) + (when (= num-failed 0) + (format T "~&ALL TESTS PASSED.")))) + + +;; Top level driver + +(defun run-tests () + (begin-tests) + (unwind-protect + (progn + (test-opcode-equality) + (test-get-opcodes) + (test-get-grouped-opcodes) + (test-similarity-ratio) + (test-close-matches) + (test-unified-diff) + (test-context-diff)) + (end-tests))) + + +;; Utility functions + +(defun diff-opcodes (a b &key (test-function #'eql) matcher) + (if matcher + (difflib:set-sequences matcher a b) + (setf matcher (make-instance 'difflib:sequence-matcher + :a a + :b b + :test-function test-function))) + (difflib:get-opcodes matcher)) + +(defun grouped-opcodes (a b n &key (test-function #'eql) matcher) + (if matcher + (difflib:set-sequences matcher a b) + (setf matcher (make-instance 'difflib:sequence-matcher + :a a + :b b + :test-function test-function))) + (difflib:group-opcodes (difflib:get-opcodes matcher) n)) + +(defun diff-similarity-ratio (a b &key (test-function #'eql) junk matcher) + (if matcher + (difflib:set-sequences matcher a b) + (setf matcher (make-instance 'difflib:sequence-matcher + :a a + :b b + :test-function test-function + :junk-function junk))) + (difflib:similarity-ratio matcher)) + +(defun diff-quick-similarity-ratio (a b &key (test-function #'eql) junk matcher) + (if matcher + (difflib:set-sequences matcher a b) + (setf matcher (make-instance 'difflib:sequence-matcher + :a a + :b b + :test-function test-function + :junk-function junk))) + (difflib:quick-similarity-ratio matcher)) + +(defun diff-very-quick-similarity-ratio (a b &key (test-function #'eql) junk matcher) + (if matcher + (difflib:set-sequences matcher a b) + (setf matcher (make-instance 'difflib:sequence-matcher + :a a + :b b + :test-function test-function + :junk-function junk))) + (difflib:very-quick-similarity-ratio matcher)) + +(defun opcode (tag i1 i2 j1 j2) + (difflib:make-opcode :tag tag :i1 i1 :i2 i2 :j1 j1 :j2 j2)) + +(defun opcodes-from-list (list) + (mapcar #'(lambda (spec) (apply #'opcode spec)) + list)) + +(defun opcode-groups-from-lists (lists) + (mapcar #'opcodes-from-list lists)) + +(defun opcodes-equal (l1 l2) + (and (= (length l1) (length l2)) + (every #'difflib:opcode= l1 l2))) + +(defun opcode-groups-equal (l1 l2) + (every #'opcodes-equal l1 l2)) + +(defun approx= (a b) + (< (abs (- a b)) .001)) + + + +;; The tests themselves + +(defun test-opcode-equality () + (test opcode-equality-1 + (difflib:make-opcode :tag :replace :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :replace :i1 1 :i2 2 :j1 3 :j2 4) + #'difflib:opcode=) + (test opcode-equality-2 + (difflib:make-opcode :tag :delete :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :delete :i1 1 :i2 2 :j1 5 :j2 6) + #'difflib:opcode=) + (test opcode-equality-3 + (difflib:make-opcode :tag :insert :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :insert :i1 5 :i2 6 :j1 3 :j2 4) + #'difflib:opcode=) + (test opcode-equality-4 + (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 4) + #'difflib:opcode=) + (test opcode-equality-5 + (difflib:opcode= (difflib:make-opcode :tag :replace :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :replace :i1 5 :i2 2 :j1 3 :j2 4)) + NIL) + (test opcode-equality-6 + (difflib:opcode= (difflib:make-opcode :tag :replace :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :replace :i1 5 :i2 2 :j1 5 :j2 4)) + NIL) + (test opcode-equality-7 + (difflib:opcode= (difflib:make-opcode :tag :replace :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :replace :i1 5 :i2 2 :j1 3 :j2 4)) + NIL) + (test opcode-equality-9 + (difflib:opcode= (difflib:make-opcode :tag :delete :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :delete :i1 5 :i2 2 :j1 3 :j2 4)) + NIL) + (test opcode-equality-10 + (difflib:opcode= (difflib:make-opcode :tag :delete :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :delete :i1 1 :i2 5 :j1 3 :j2 4)) + NIL) + (test opcode-equality-11 + (difflib:opcode= (difflib:make-opcode :tag :insert :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :insert :i1 1 :i2 2 :j1 5 :j2 4)) + NIL) + (test opcode-equality-12 + (difflib:opcode= (difflib:make-opcode :tag :insert :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :insert :i1 1 :i2 2 :j1 3 :j2 5)) + NIL) + (test opcode-equality-13 + (difflib:opcode= (difflib:make-opcode :tag :equal :i1 5 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 4)) + NIL) + (test opcode-equality-14 + (difflib:opcode= (difflib:make-opcode :tag :equal :i1 1 :i2 6 :j1 3 :j2 4) + (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 4)) + NIL) + (test opcode-equality-15 + (difflib:opcode= (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 5 :j2 4) + (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 4)) + NIL) + (test opcode-equality-16 + (difflib:opcode= (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 5) + (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 4)) + NIL) + (test opcode-equality-17 + (difflib:opcode= (difflib:make-opcode :tag :replace :i1 1 :i2 2 :j1 3 :j2 4) + (difflib:make-opcode :tag :equal :i1 1 :i2 2 :j1 3 :j2 4)) + NIL)) + + +(defun test-get-opcodes () + (test opcodes-1 + (diff-opcodes "123456789" + "12346789") + (opcodes-from-list '((:equal 0 4 0 4) + (:delete 4 5 4 4) + (:equal 5 9 4 8))) + #'opcodes-equal) + (test opcodes-2 + (diff-opcodes "how i enjoy the music" + "how i like the music") + (opcodes-from-list '((:equal 0 6 0 6) + (:insert 6 6 6 9) + (:equal 6 7 9 10) + (:delete 7 11 10 10) + (:equal 11 21 10 20))) + #'opcodes-equal) + (test opcodes-3 + (diff-opcodes "living insuide a tomato" + "living inside a tomato") + (opcodes-from-list '((:equal 0 10 0 10) + (:delete 10 11 10 10) + (:equal 11 23 10 22))) + #'opcodes-equal) + (test opcodes-4 + (diff-opcodes "123456789" + "987654321") + (opcodes-from-list '((:insert 0 0 0 8) + (:equal 0 1 8 9) + (:delete 1 9 9 9))) + #'opcodes-equal) + (test opcodes-5 + (diff-opcodes "running into a mountain" + "ruining my mountain boy") + (opcodes-from-list '((:equal 0 2 0 2) + (:replace 2 3 2 3) + (:equal 3 8 3 8) + (:replace 8 14 8 10) + (:equal 14 23 10 19) + (:insert 23 23 19 23))) + #'opcodes-equal) + (test opcodes-6 + (diff-opcodes "one two three four five six seven" + "big three two four five apricot seven") + (opcodes-from-list '((:replace 0 7 0 3) + (:equal 7 13 3 9) + (:insert 13 13 9 13) + (:equal 13 24 13 24) + (:replace 24 25 24 27) + (:equal 25 26 27 28) + (:replace 26 27 28 31) + (:equal 27 33 31 37))) + #'opcodes-equal) + (test opcodes-7 + (diff-opcodes '(1 2 3 4 5 6 7 8 9) + '(1 2 3 4 6 7 8 9)) + (diff-opcodes "123456789" + "12346789") + #'opcodes-equal) + (test opcodes-8 + (diff-opcodes '(1 2 3 4 5 6 7 8 9) + '(1 2 3 4 6 7 8 9)) + (diff-opcodes #(1 2 3 4 5 6 7 8 9) + #(1 2 3 4 6 7 8 9)) + #'opcodes-equal) + (let ((s1 (make-array (list 1000)))) + (dotimes (i (length s1)) + (setf (elt s1 i) i)) + (let ((s2 (copy-seq s1))) + (setf (elt s2 500) 5) + (test opcodes-9 + (diff-opcodes s1 s2) + (opcodes-from-list '((:EQUAL 0 500 0 500) + (:REPLACE 500 501 500 501) + (:EQUAL 501 1000 501 1000))) + #'opcodes-equal))) + (let ((s1 (make-array (list 1000) :initial-element 1))) + (let ((s2 (copy-seq s1))) + (setf (elt s2 500) 5) + (test opcodes-10 + (diff-opcodes s1 s2) + (opcodes-from-list '((:EQUAL 0 500 0 500) (:REPLACE 500 1000 500 1000))) + #'opcodes-equal))) + (let ((matcher (make-instance 'difflib:sequence-matcher))) + (test opcodes-11 + (diff-opcodes "123456789" + "12346789" + :matcher matcher) + (opcodes-from-list '((:equal 0 4 0 4) + (:delete 4 5 4 4) + (:equal 5 9 4 8))) + #'opcodes-equal) + (test opcodes-12 + (diff-opcodes "how i enjoy the music" + "how i like the music" + :matcher matcher) + (opcodes-from-list '((:equal 0 6 0 6) + (:insert 6 6 6 9) + (:equal 6 7 9 10) + (:delete 7 11 10 10) + (:equal 11 21 10 20))) + #'opcodes-equal))) + + +(defun test-get-grouped-opcodes () + (test grouped-opcodes-1 + (grouped-opcodes '("1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" + "13" "14" "15" "16" "17" "18" "19" "20" "21" "22" + "23" "24" "25" "26" "27" "28" "29" "30" "31" "32" + "33" "34" "35" "36" "37" "38" "39") + '("1" "2" "3" "4" "5" "6" "7" "8" "i" "9" "10" "11" + "12" "13" "14" "15" "16" "17" "18" "19" "20x" "21" + "22" "28" "29" "30" "31" "32" "33" "34" "35y" "36" + "37" "38" "39") + 3 + :test-function #'equal) + (opcode-groups-from-lists '(((:equal 5 8 5 8) (:insert 8 8 8 9) (:equal 8 11 9 12)) + ((:equal 16 19 17 20) (:replace 19 20 20 21) + (:equal 20 22 21 23) (:delete 22 27 23 23) (:equal 27 30 23 26)) + ((:equal 31 34 27 30) (:replace 34 35 30 31) (:equal 35 38 31 34)))) + #'opcode-groups-equal)) + + +(defun test-similarity-ratio () + (test ratio-1 + (diff-similarity-ratio "abcd" "bcde") + 0.75 + #'approx=) + (test ratio-2 + (diff-similarity-ratio "private Thread currentThread;" + "private volatile Thread currentThread;" + :junk #'(lambda (c) (eql c #\space))) + 0.866 + #'approx=) + (test ratio-3 + (diff-quick-similarity-ratio "abcd" "bcde") + 0.75 + #'approx=) + (test ratio-4 + (diff-very-quick-similarity-ratio "abcd" "bcde") + 1 + #'approx=)) + + +(defun test-close-matches () + (test close-matches-1 + (difflib:get-close-matches "appel" '("ape" "apple" "peach" "puppy")) + '("apple" "ape") + #'equal)) + + +(defun test-unified-diff () + (test unified-diff-1 + (with-output-to-string (s) + (difflib:unified-diff s + '("1" "2" "3" "4" "5") + '("1" "2" "8" "7" "5" "6") + :test-function #'equal + :from-file "Original.txt" + :from-file-date "Wed Feb 02 21:28:00 2005" + :to-file "Modified.txt" + :to-file-date "Wed Feb 02 21:28:01 2005")) + (with-output-to-string (s) + (dolist (line '("--- Original.txt Wed Feb 02 21:28:00 2005" + "+++ Modified.txt Wed Feb 02 21:28:01 2005" + "@@ -1,5 +1,6 @@" + " 1" + " 2" + "-3" + "-4" + "+8" + "+7" + " 5" + "+6")) + (format s "~&~A" line))) + #'string=)) + + +(defun test-context-diff () + (test context-diff-1 + (with-output-to-string (s) + (difflib:context-diff s + '("one" "two" "three" "four") + '("zero" "one" "tree" "four") + :test-function #'equal + :from-file "Original.txt" + :from-file-date "Wed Feb 02 21:28:00 2005" + :to-file "Modified.txt" + :to-file-date "Wed Feb 02 21:28:01 2005")) + (with-output-to-string (s) + (dolist (line '("*** Original.txt Wed Feb 02 21:28:00 2005" + "--- Modified.txt Wed Feb 02 21:28:01 2005" + "***************" + "*** 1,4 ***" + " one" + "! two" + "! three" + " four" + "--- 1,4 ----" + "+ zero" + " one" + "! tree" + " four")) + (format s "~&~A" line))) + #'string=))