1 /* radare - LGPL - Copyright 2009-2010 pancake<nopcode.org> */
4 bdiff.c - efficient binary diff extension for Mercurial
6 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
8 This software may be used and distributed according to the terms of
9 the GNU General Public License, incorporated herein by reference.
11 Based roughly on Python difflib
35 struct hunk *base, *head;
38 static int splitlines(const char *a, int len, struct line **lr)
41 const char *p, *b = a;
42 const char * const plast = a + len - 1;
46 eprintf ("null pointer received\n");
51 i = 1; /* extra line for sentinel */
52 for (p = a; p < a + len; p++)
53 if (*p == '\n' || p == plast)
56 *lr = l = (struct line *)malloc(sizeof(struct line) * i);
60 /* build the line array and calculate hashes */
62 for (p = a; p < a + len; p++) {
63 /* Leonid Yuriev's hash */
64 h = (h * 1664525) + *p + 1013904223;
66 if (*p == '\n' || p == plast) {
77 /* set up a sentinel */
83 static int inline cmp(struct line *a, struct line *b)
85 return a->h != b->h || a->len != b->len || memcmp(a->l, b->l, a->len);
88 static int equatelines(struct line *a, int an, struct line *b, int bn)
90 int i, j, buckets = 1, t, scale;
93 /* build a hash table of the next highest power of 2 */
94 while (buckets < bn + 1)
97 /* try to allocate a large hash table to avoid collisions */
98 for (scale = 4; scale; scale /= 2) {
99 h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
107 buckets = buckets * scale - 1;
109 /* clear the hash table */
110 for (i = 0; i <= buckets; i++) {
115 /* add lines to the hash table chains */
116 for (i = bn - 1; i >= 0; i--) {
117 /* find the equivalence class */
118 for (j = b[i].h & buckets; h[j].pos != INT_MAX;
119 j = (j + 1) & buckets)
120 if (!cmp(b + i, b + h[j].pos))
123 /* add to the head of the equivalence class */
127 h[j].len++; /* keep track of popularity */
130 /* compute popularity threshold */
131 t = (bn >= 4000) ? bn / 1000 : bn + 1;
133 /* match items in a to their equivalence class in b */
134 for (i = 0; i < an; i++) {
135 /* find the equivalence class */
136 for (j = a[i].h & buckets; h[j].pos != INT_MAX;
137 j = (j + 1) & buckets)
138 if (!cmp(a + i, b + h[j].pos))
141 a[i].e = j; /* use equivalence class for quick compare */
143 a[i].n = h[j].pos; /* point to head of match list */
145 a[i].n = INT_MAX; /* too popular */
148 /* discard hash tables */
153 static int longest_match(struct line *a, struct line *b, struct pos *pos,
154 int a1, int a2, int b1, int b2, int *omi, int *omj)
156 int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;
158 for (i = a1; i < a2; i++) {
159 /* skip things before the current block */
160 for (j = a[i].n; j < b1; j = b[j].n)
163 /* loop through all lines match a[i] in b */
164 for (; j < b2; j = b[j].n) {
165 /* does this extend an earlier match? */
166 if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
167 k = pos[j - 1].len + 1;
173 /* best match so far? */
187 /* expand match to include neighboring popular lines */
188 while (mi - mb > a1 && mj - mb > b1 &&
189 a[mi - mb - 1].e == b[mj - mb - 1].e)
191 while (mi + mk < a2 && mj + mk < b2 &&
192 a[mi + mk].e == b[mj + mk].e)
201 static void recurse(struct line *a, struct line *b, struct pos *pos,
202 int a1, int a2, int b1, int b2, struct hunklist *l)
206 /* find the longest match in this chunk */
207 k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
211 /* and recurse on the remaining chunks on either side */
212 recurse(a, b, pos, a1, i, b1, j, l);
218 recurse(a, b, pos, i + k, a2, j + k, b2, l);
221 static struct hunklist diff(struct line *a, int an, struct line *b, int bn)
228 /* allocate and fill arrays */
229 t = equatelines(a, an, b, bn);
230 pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));
231 /* we can't have more matches than lines in the shorter file */
232 l.head = l.base = (struct hunk *)malloc(sizeof(struct hunk) *
233 ((an<bn ? an:bn) + 1));
235 if (pos && l.base && t) {
236 /* generate the matching block list */
237 recurse(a, b, pos, 0, an, 0, bn, &l);
238 l.head->a1 = l.head->a2 = an;
239 l.head->b1 = l.head->b2 = bn;
245 /* normalize the hunk list, try to push each hunk towards the end */
246 for (curr = l.base; curr != l.head; curr++) {
247 struct hunk *next = curr+1;
253 if (curr->a2 == next->a1)
254 while (curr->a2+shift < an && curr->b2+shift < bn
255 && !cmp(a+curr->a2+shift, b+curr->b2+shift))
257 else if (curr->b2 == next->b1)
258 while (curr->b2+shift < bn && curr->a2+shift < an
259 && !cmp(b+curr->b2+shift, a+curr->a2+shift))
273 // TODO: implement the r_diff_lines // we need to implement r_file_line_at (file, off);
274 R_API int r_diff_buffers_delta(RDiff *d, const ut8 *sa, int la, const ut8 *sb, int lb) {
276 struct line *al, *bl;
277 struct hunklist l = { NULL, NULL };
279 int an, bn, offa, rlen, offb, len = 0;
282 an = splitlines((const char *)sa, la, &al);
283 bn = splitlines((const char*)sb, lb, &bl);
285 eprintf ("bindiff_buffers: Out of memory.\n");
289 l = diff (al, an, bl, bn);
291 eprintf ("bindiff_buffers: Out of memory.\n");
296 for (h = l.base; h != l.head; h++) {
297 if (h->a1 != la || h->b1 != lb) {
298 len = bl[h->b1].l - bl[lb].l;
299 offa = al[la].l - al->l;
300 offb = al[h->a1].l - al->l;
306 dop.a_buf = (ut8 *)al[la].l;
309 /* destination file */
310 dop.b_off = offa; // XXX offb not used??
311 dop.b_buf = (ut8 *)bl[lb].l;
313 if (!d->callback (d, d->user, &dop))
318 //printf ("Remove %d bytes at %d\n", rlen, offa);
319 printf ("r-%d @ 0x%"PFMT64x"\n", rlen, (ut64)offa);
321 printf ("e file.write=true\n"); // XXX
324 printf ("%02x", bl[lb].l[i]);
325 printf (" @ 0x%"PFMT64x"\n", (ut64)offa);