summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/solv.c2
-rw-r--r--ext/pool_fileconflicts.c723
-rw-r--r--ext/pool_fileconflicts.h4
-rw-r--r--tools/findfileconflicts.c18
4 files changed, 633 insertions, 114 deletions
diff --git a/examples/solv.c b/examples/solv.c
index 3f4156d..5d8e31e 100644
--- a/examples/solv.c
+++ b/examples/solv.c
@@ -3175,7 +3175,7 @@ rerunsolver:
fcstate.newpkgscnt = newpkgs;
fcstate.checkq = &checkq;
fcstate.newpkgsfps = newpkgsfps;
- pool_findfileconflicts(pool, &checkq, newpkgs, &conflicts, &fileconflict_cb, &fcstate);
+ pool_findfileconflicts(pool, &checkq, newpkgs, &conflicts, FINDFILECONFLICTS_USE_SOLVABLEFILELIST | FINDFILECONFLICTS_CHECK_DIRALIASING | FINDFILECONFLICTS_USE_ROOTDIR, &fileconflict_cb, &fcstate);
fcstate.rpmstate = rpm_state_free(fcstate.rpmstate);
if (conflicts.count)
{
diff --git a/ext/pool_fileconflicts.c b/ext/pool_fileconflicts.c
index 2a102a7..63847b5 100644
--- a/ext/pool_fileconflicts.c
+++ b/ext/pool_fileconflicts.c
@@ -7,6 +7,9 @@
#include <stdio.h>
#include <sys/stat.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
#include "pool.h"
#include "repo.h"
@@ -17,6 +20,7 @@
struct cbdata {
Pool *pool;
int create;
+ int aliases;
Queue lookat; /* conflict candidates */
Queue lookat_dir; /* not yet conflicting directories */
@@ -38,9 +42,30 @@ struct cbdata {
Id idx; /* index of package we're looking at */
Id hx; /* used in findfileconflicts2_cb, limit to files matching hx */
+ Id dirid; /* used in findfileconflicts2_cb, limit to dirs matching dirid */
+ Id dirhash; /* used in findfileconflicts2_cb, limit to dirs matching dirhash */
+
Queue files;
unsigned char *filesspace;
unsigned int filesspacen;
+
+ Hashtable normap;
+ Hashval normapn;
+ unsigned int normapused;
+ Queue norq;
+
+ Hashtable statmap;
+ Hashval statmapn;
+ unsigned int statmapused;
+
+ int usestat;
+ int statsmade;
+
+ const char *rootdir;
+ int rootdirl;
+
+ char *canonspace;
+ int canonspacen;
};
#define FILESSPACE_BLOCK 255
@@ -106,8 +131,7 @@ finddirs_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
return;
cbdata->dirmap[2 * h] = hx;
cbdata->dirmap[2 * h + 1] = idx;
- cbdata->dirmapused++;
- if (cbdata->dirmapused * 2 > cbdata->dirmapn)
+ if (++cbdata->dirmapused * 2 > cbdata->dirmapn)
cbdata->dirmap = growhash(cbdata->dirmap, &cbdata->dirmapn);
return;
}
@@ -164,11 +188,9 @@ findfileconflicts_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
cbdata->lastdirhash = strnhash(fn, dp - fn);
}
dhx = cbdata->lastdirhash;
-#if 1
/* this mirrors the "if (!hx) hx = strlen(fn) + 1" in finddirs_cb */
if (!isindirmap(cbdata, dhx ? dhx : dp - fn + 1))
return;
-#endif
hx = strhash_cont(dp, dhx);
if (!hx)
hx = strlen(fn) + 1;
@@ -191,8 +213,7 @@ findfileconflicts_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
return;
cbdata->cflmap[2 * h] = hx;
cbdata->cflmap[2 * h + 1] = (isdir ? ~idx : idx);
- cbdata->cflmapused++;
- if (cbdata->cflmapused * 2 > cbdata->cflmapn)
+ if (++cbdata->cflmapused * 2 > cbdata->cflmapn)
cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn);
return;
}
@@ -212,20 +233,367 @@ findfileconflicts_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
/* dump all delayed directory hits for hx */
for (i = 0; i < cbdata->lookat_dir.count; i += 2)
if (cbdata->lookat_dir.elements[i] == hx)
- queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]);
+ {
+ queue_push2(&cbdata->lookat, hx, cbdata->lookat_dir.elements[i + 1]);
+ queue_push2(&cbdata->lookat, 0, 0);
+ }
}
else if (oidx == idx)
return; /* no conflicts with ourself, please */
queue_push2(&cbdata->lookat, hx, oidx);
+ queue_push2(&cbdata->lookat, 0, 0);
queue_push2(&cbdata->lookat, hx, idx);
+ queue_push2(&cbdata->lookat, 0, 0);
}
-static inline void
-addfilesspace(struct cbdata *cbdata, unsigned char *data, int len)
+/* same as findfileconflicts_cb, but
+ * - hashes with just the basename
+ * - sets idx in a map instead of pushing to lookat
+ * - sets the hash element to -1 if there may be a conflict
+ */
+static void
+findfileconflicts_basename_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
{
+ struct cbdata *cbdata = cbdatav;
+ int isdir = S_ISDIR(info->mode);
+ const char *dp;
+ Id idx, oidx;
+ Id hx, qx;
+ Hashval h, hh;
+
+ idx = cbdata->idx;
+
+ if (!info->dirlen)
+ return;
+ dp = fn + info->dirlen;
+ hx = strhash(dp);
+ if (!hx)
+ hx = strlen(fn) + 1;
+
+ h = hx & cbdata->cflmapn;
+ hh = HASHCHAIN_START;
+ for (;;)
+ {
+ qx = cbdata->cflmap[2 * h];
+ if (!qx)
+ break;
+ if (qx == hx)
+ break;
+ h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
+ }
+ if (!qx)
+ {
+ /* a miss */
+ if (!cbdata->create)
+ return;
+ cbdata->cflmap[2 * h] = hx;
+ cbdata->cflmap[2 * h + 1] = (isdir ? -idx - 2 : idx);
+ if (++cbdata->cflmapused * 2 > cbdata->cflmapn)
+ cbdata->cflmap = growhash(cbdata->cflmap, &cbdata->cflmapn);
+ return;
+ }
+ oidx = cbdata->cflmap[2 * h + 1];
+ if (oidx < -1)
+ {
+ int i;
+ if (isdir)
+ {
+ /* both are directories. delay the conflict, keep oidx in slot */
+ queue_push2(&cbdata->lookat_dir, hx, idx);
+ return;
+ }
+ oidx = -idx - 2;
+ /* now have file, had directories before. */
+ cbdata->cflmap[2 * h + 1] = oidx; /* make it a file */
+ /* dump all delayed directory hits for hx */
+ for (i = 0; i < cbdata->lookat_dir.count; i += 2)
+ if (cbdata->lookat_dir.elements[i] == hx)
+ MAPSET(&cbdata->idxmap, cbdata->lookat_dir.elements[i + 1]);
+ }
+ else if (oidx == idx)
+ return; /* no conflicts with ourself, please */
+ if (oidx >= 0)
+ MAPSET(&cbdata->idxmap, oidx);
+ MAPSET(&cbdata->idxmap, idx);
+ if (oidx != -1)
+ cbdata->cflmap[2 * h + 1] = -1;
+}
+
+static inline Id
+addfilesspace(struct cbdata *cbdata, int len)
+{
+ unsigned int off = cbdata->filesspacen;
cbdata->filesspace = solv_extend(cbdata->filesspace, cbdata->filesspacen, len, 1, FILESSPACE_BLOCK);
- memcpy(cbdata->filesspace + cbdata->filesspacen, data, len);
cbdata->filesspacen += len;
+ return off;
+}
+
+static Id
+unifywithstat(struct cbdata *cbdata, Id diroff, int dirl)
+{
+ struct stat stb;
+ int i;
+ Hashval h, hh;
+ Id hx, qx;
+ Id nspaceoff;
+ unsigned char statdata[16 + sizeof(stb.st_dev) + sizeof(stb.st_ino)];
+
+ if (dirl > 1 && cbdata->filesspace[diroff + dirl - 1] == '/')
+ cbdata->filesspace[diroff + dirl - 1] = 0;
+ cbdata->statsmade++;
+ i = stat((char *)cbdata->filesspace + diroff, &stb);
+ if (dirl > 1 && cbdata->filesspace[diroff + dirl - 1] == 0)
+ cbdata->filesspace[diroff + dirl - 1] = '/';
+ if (i)
+ return diroff;
+ memset(statdata, 0, 16);
+ memcpy(statdata + 8, &stb.st_dev, sizeof(stb.st_dev));
+ memcpy(statdata, &stb.st_ino, sizeof(stb.st_ino));
+ hx = 0;
+ for (i = 15; i >= 0; i--)
+ hx = (unsigned int)hx * 13 + statdata[i];
+ h = hx & cbdata->statmapn;
+ hh = HASHCHAIN_START;
+ for (;;)
+ {
+ qx = cbdata->statmap[2 * h];
+ if (!qx)
+ break;
+ if (qx == hx)
+ {
+ Id off = cbdata->statmap[2 * h + 1];
+ const char *dp = (const char *)cbdata->filesspace + cbdata->norq.elements[off];
+ if (!memcmp(dp, (const char *)statdata, 16))
+ return cbdata->norq.elements[off + 1];
+ }
+ h = HASHCHAIN_NEXT(h, hh, cbdata->statmapn);
+ }
+ /* new stat result. work. */
+ nspaceoff = addfilesspace(cbdata, 16);
+ memcpy(cbdata->filesspace + nspaceoff, statdata, 16);
+ queue_push2(&cbdata->norq, nspaceoff, nspaceoff);
+ cbdata->statmap[2 * h] = hx;
+ cbdata->statmap[2 * h + 1] = cbdata->norq.count - 2;
+ if (++cbdata->statmapused * 2 > cbdata->statmapn)
+ cbdata->statmap = growhash(cbdata->statmap, &cbdata->statmapn);
+ return nspaceoff;
+}
+
+/* forward declaration */
+static Id normalizedir(struct cbdata *cbdata, const char *dir, int dirl, Id hx, int create);
+
+static Id
+unifywithcanon(struct cbdata *cbdata, Id diroff, int dirl)
+{
+ Id dirnameid;
+ int i, l, ll, lo;
+ struct stat stb;
+
+#if 0
+ printf("UNIFY %.*s\n", dirl, (char *)cbdata->filesspace + diroff);
+#endif
+ if (!dirl || cbdata->filesspace[diroff] != '/')
+ return diroff;
+ /* strip / at end*/
+ while (dirl && cbdata->filesspace[diroff + dirl - 1] == '/')
+ dirl--;
+ if (!dirl)
+ return diroff;
+ /* find dirname */
+ for (i = dirl - 1; i > 0; i--)
+ if (cbdata->filesspace[diroff + i] == '/')
+ break;
+ i++; /* include trailing / */
+ dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, i, strnhash((char *)cbdata->filesspace + diroff, i), 1);
+ if (dirnameid == -1)
+ return diroff; /* some cyclic link */
+ if (cbdata->filesspace[dirnameid] != '/')
+ return diroff; /* hmm */
+ l = strlen((char *)cbdata->filesspace + dirnameid);
+ if (l && cbdata->filesspace[dirnameid + l - 1] != '/')
+ return diroff;
+ /* special handling for '.', '..', '' */
+ if (cbdata->filesspace[diroff + i] == '.')
+ {
+ if (dirl - i == 1)
+ return dirnameid;
+ if (dirl - i == 2 && cbdata->filesspace[diroff + i + 1] == '.')
+ {
+ dirl = strlen((char *)cbdata->filesspace + dirnameid);
+ if (dirl <= 2)
+ return dirnameid;
+ for (i = dirl - 2; i > 0; i--)
+ if (cbdata->filesspace[diroff + i] == '/')
+ break;
+ dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + dirnameid, i + 1, strnhash((char *)cbdata->filesspace + dirnameid, i + 1), 1);
+ return dirnameid == -1 ? diroff : dirnameid;
+ }
+ }
+ if (cbdata->rootdirl + l + dirl - i + 1 > cbdata->canonspacen)
+ {
+ cbdata->canonspacen = cbdata->rootdirl + l + dirl - i + 20;
+ cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen);
+ strcpy(cbdata->canonspace, cbdata->rootdir);
+ }
+ strcpy(cbdata->canonspace + cbdata->rootdirl, (char *)cbdata->filesspace + dirnameid);
+ strncpy(cbdata->canonspace + cbdata->rootdirl + l, (char *)cbdata->filesspace + diroff + i, dirl - i);
+ cbdata->canonspace[cbdata->rootdirl + l + dirl - i] = 0;
+ cbdata->statsmade++;
+#if 0
+ printf("stat()ing %s\n", cbdata->canonspace);
+#endif
+ if (lstat(cbdata->canonspace, &stb))
+ return diroff; /* hmm */
+ if (!S_ISLNK(stb.st_mode))
+ {
+ /* not a symlink, have canon entry */
+ diroff = addfilesspace(cbdata, l + dirl - i + 2);
+ strcpy((char *)cbdata->filesspace + diroff, cbdata->canonspace + cbdata->rootdirl);
+ l += dirl - i;
+ /* add trailing / */
+ if (cbdata->filesspace[diroff + l - 1] != '/')
+ {
+ cbdata->filesspace[diroff + l++] = '/';
+ cbdata->filesspace[diroff + l] = 0;
+ }
+ dirnameid = normalizedir(cbdata, (char *)cbdata->filesspace + diroff, l, strnhash((char *)cbdata->filesspace + diroff, l), 1);
+ return dirnameid == -1 ? diroff : dirnameid;
+ }
+ /* oh no, a symlink! follow */
+ if (cbdata->rootdirl + l + dirl - i + stb.st_size + 2 > cbdata->canonspacen)
+ {
+ cbdata->canonspacen = cbdata->rootdirl + l + dirl - i + stb.st_size + 20;
+ cbdata->canonspace = solv_realloc(cbdata->canonspace, cbdata->canonspacen);
+ }
+ lo = cbdata->rootdirl + l + dirl - i + 1;
+ ll = readlink(cbdata->canonspace, cbdata->canonspace + lo, stb.st_size);
+ if (ll < 0 || ll > stb.st_size)
+ return diroff; /* hmm */
+ if (ll == 0)
+ return dirnameid;
+ if (cbdata->canonspace[lo + ll - 1] != '/')
+ cbdata->canonspace[lo + ll++] = '/'; /* add trailing / */
+ cbdata->canonspace[lo + ll] = 0;
+ if (cbdata->canonspace[lo] != '/')
+ {
+ /* relative link, concatenate */
+ memmove(cbdata->canonspace + cbdata->rootdirl + l, cbdata->canonspace + lo, ll + 1);
+ lo = cbdata->rootdirl;
+ ll += l;
+ }
+ dirnameid = normalizedir(cbdata, (char *)cbdata->canonspace + lo, ll, strnhash((char *)cbdata->canonspace + lo, ll), 1);
+ return dirnameid == -1 ? diroff : dirnameid;
+}
+
+/*
+ * map a directory (containing a trailing /) into a number.
+ * for unifywithstat this is the offset to the 16 byte stat result.
+ * for unifywithcanon this is the offset to the normailzed dir.
+ */
+static Id
+normalizedir(struct cbdata *cbdata, const char *dir, int dirl, Id hx, int create)
+{
+ Hashval h, hh;
+ Id qx;
+ Id nspaceoff;
+ int mycnt;
+
+ if (!hx)
+ hx = dirl + 1;
+ h = hx & cbdata->normapn;
+ hh = HASHCHAIN_START;
+ for (;;)
+ {
+ qx = cbdata->normap[2 * h];
+ if (!qx)
+ break;
+ if (qx == hx)
+ {
+ Id off = cbdata->normap[2 * h + 1];
+ const char *dp = (const char *)cbdata->filesspace + cbdata->norq.elements[off];
+ if (!strncmp(dp, dir, dirl) && dp[dirl] == 0)
+ return cbdata->norq.elements[off + 1];
+ }
+ h = HASHCHAIN_NEXT(h, hh, cbdata->normapn);
+ }
+ if (!create)
+ return 0;
+ /* new dir. work. */
+ if (dir >= (const char *)cbdata->filesspace && dir < (const char *)cbdata->filesspace + cbdata->filesspacen)
+ {
+ /* can happen when called from unifywithcanon */
+ Id off = dir - (const char *)cbdata->filesspace;
+ nspaceoff = addfilesspace(cbdata, dirl + 1);
+ dir = (const char *)cbdata->filesspace + off;
+ }
+ else
+ nspaceoff = addfilesspace(cbdata, dirl + 1);
+ if (dirl)
+ memcpy(cbdata->filesspace + nspaceoff, dir, dirl);
+ cbdata->filesspace[nspaceoff + dirl] = 0;
+ mycnt = cbdata->norq.count;
+ queue_push2(&cbdata->norq, nspaceoff, -1);
+ cbdata->normap[2 * h] = hx;
+ cbdata->normap[2 * h + 1] = cbdata->norq.count - 2;
+ /* unify */
+ if (cbdata->usestat)
+ nspaceoff = unifywithstat(cbdata, nspaceoff, dirl);
+ else
+ nspaceoff = unifywithcanon(cbdata, nspaceoff, dirl);
+ /* update */
+ cbdata->norq.elements[mycnt + 1] = nspaceoff;
+#if 0
+ if (!cbdata->usestat)
+ printf("%s normalized to %d: %s\n", cbdata->filesspace + cbdata->norq.elements[mycnt], nspaceoff, cbdata->filesspace + nspaceoff);
+#endif
+ if (++cbdata->normapused * 2 > cbdata->normapn)
+ cbdata->normap = growhash(cbdata->normap, &cbdata->normapn);
+ return nspaceoff;
+}
+
+static void
+findfileconflicts_alias_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
+{
+ int isdir = S_ISDIR(info->mode);
+ struct cbdata *cbdata = cbdatav;
+ const char *dp;
+ Id idx, dirid;
+ Id hx, qx;
+ Hashval h, hh;
+
+ idx = cbdata->idx;
+
+ if (!info->dirlen)
+ return;
+ dp = fn + info->dirlen;
+ if (info->diridx != cbdata->lastdiridx)
+ {
+ cbdata->lastdiridx = info->diridx;
+ cbdata->lastdirhash = 0;
+ }
+ dp = fn + info->dirlen;
+ hx = strhash(dp);
+ if (!hx)
+ hx = strlen(fn) + 1;
+
+ h = hx & cbdata->cflmapn;
+ hh = HASHCHAIN_START;
+ for (;;)
+ {
+ qx = cbdata->cflmap[2 * h];
+ if (!qx)
+ break;
+ if (qx == hx)
+ break;
+ h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
+ }
+ if (!qx || cbdata->cflmap[2 * h + 1] != -1)
+ return;
+ if (!cbdata->lastdirhash)
+ cbdata->lastdirhash = strnhash(fn, dp - fn);
+ dirid = normalizedir(cbdata, fn, dp - fn, cbdata->lastdirhash, 1);
+ queue_push2(&cbdata->lookat, hx, idx);
+ queue_push2(&cbdata->lookat, cbdata->lastdirhash, isdir ? -dirid : dirid);
}
static void
@@ -235,6 +603,7 @@ findfileconflicts2_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
Hashval hx;
const char *dp;
char md5padded[34];
+ Id off;
if (!info->dirlen)
return;
@@ -244,19 +613,31 @@ findfileconflicts2_cb(void *cbdatav, const char *fn, struct filelistinfo *info)
cbdata->lastdiridx = info->diridx;
cbdata->lastdirhash = strnhash(fn, dp - fn);
}
- hx = cbdata->lastdirhash;
- hx = strhash_cont(dp, hx);
+ if (cbdata->aliases)
+ {
+ if (cbdata->lastdirhash != cbdata->dirhash)
+ return;
+ hx = strhash(dp);
+ }
+ else
+ {
+ hx = cbdata->lastdirhash;
+ hx = strhash_cont(dp, hx);
+ }
if (!hx)
hx = strlen(fn) + 1;
if ((Id)hx != cbdata->hx)
return;
+ if (cbdata->dirid && cbdata->dirid != normalizedir(cbdata, fn, dp - fn, cbdata->dirhash, 0))
+ return;
strncpy(md5padded, info->digest, 32);
md5padded[32] = 0;
md5padded[33] = info->color;
- /* printf("%d, hx %x -> %s %d %s\n", cbdata->idx, hx, fn, fmode, md5); */
- queue_push(&cbdata->files, cbdata->filesspacen);
- addfilesspace(cbdata, (unsigned char *)md5padded, 34);
- addfilesspace(cbdata, (unsigned char *)fn, strlen(fn) + 1);
+ /* printf("%d, hx %x -> %s %d %s\n", cbdata->idx, hx, fn, info->mode, info->digest); */
+ off = addfilesspace(cbdata, strlen(fn) + (34 + 1));
+ memcpy(cbdata->filesspace + off, (unsigned char *)md5padded, 34);
+ strcpy((char *)cbdata->filesspace + off + 34, fn);
+ queue_push(&cbdata->files, off);
}
static int
@@ -264,20 +645,44 @@ lookat_idx_cmp(const void *ap, const void *bp, void *dp)
{
const Id *a = ap, *b = bp;
unsigned int ahx, bhx;
- if (a[1] - b[1] != 0)
+ if (a[1] - b[1] != 0) /* idx */
return a[1] - b[1];
- ahx = (unsigned int)a[0]; /* a[0] can be < 0 */
+ if (a[3] - b[3] != 0) /* dirid */
+ return a[3] - b[3];
+ ahx = (unsigned int)a[0]; /* can be < 0 */
bhx = (unsigned int)b[0];
- return ahx < bhx ? -1 : ahx > bhx ? 1 : 0;
+ if (ahx != bhx)
+ return ahx < bhx ? -1 : 1;
+ ahx = (unsigned int)a[2]; /* dhx */
+ bhx = (unsigned int)b[2];
+ if (ahx != bhx)
+ return ahx < bhx ? -1 : 1;
+ return 0;
}
static int
lookat_hx_cmp(const void *ap, const void *bp, void *dp)
{
const Id *a = ap, *b = bp;
- unsigned int ahx = (unsigned int)a[0]; /* a[0] can be < 0 */
- unsigned int bhx = (unsigned int)b[0];
- return ahx < bhx ? -1 : ahx > bhx ? 1 : a[1] - b[1];
+ unsigned int ahx, bhx;
+ Id adirid, bdirid;
+ ahx = (unsigned int)a[0]; /* can be < 0 */
+ bhx = (unsigned int)b[0];
+ if (ahx != bhx)
+ return ahx < bhx ? -1 : 1;
+ adirid = a[3] < 0 ? -a[3] : a[3];
+ bdirid = b[3] < 0 ? -b[3] : b[3];
+ if (adirid - bdirid != 0) /* dirid */
+ return adirid - bdirid;
+ if (a[3] != b[3])
+ return a[3] > 0 ? -1 : 1; /* bring positive dirids to front */
+ if (a[1] - b[1] != 0) /* idx */
+ return a[1] - b[1];
+ ahx = (unsigned int)a[2]; /* dhx */
+ bhx = (unsigned int)b[2];
+ if (ahx != bhx)
+ return ahx < bhx ? -1 : 1;
+ return 0;
}
static int
@@ -316,51 +721,44 @@ iterate_solvable_dirs(Pool *pool, Id p, void (*cb)(void *, const char *, struct
dataiterator_free(&di);
}
-#if 0
-static void
-iterate_solvable_files(Pool *pool, Id p, void (*cb)(void *, const char *, struct filelistinfo *), void *cbdata)
+/* before calling the expensive findfileconflicts_basename_cb we check if any of
+ * the basenames match. This only makes sense when cbdata->create is off.
+ */
+static int
+precheck_solvable_files(struct cbdata *cbdata, Pool *pool, Id p)
{
Dataiterator di;
- char *space = 0;
- int spacen = 0;
- Repodata *lastdata = 0;
- Id lastdirid = -1;
- int dirl = 0, l;
- struct filelistinfo info;
- const char *tmpdir = 0;
- unsigned int diridx;
+ Id hx, qx;
+ Hashval h, hh;
+ int found = 0;
dataiterator_init(&di, pool, 0, p, SOLVABLE_FILELIST, 0, SEARCH_COMPLETE_FILELIST);
- memset(&info, 0, sizeof(info));
while (dataiterator_step(&di))
{
- if (di.data != lastdata || di.kv.id != lastdirid)
- {
- lastdata = di.data;
- lastdirid = di.kv.id;
- tmpdir = repodata_dir2str(di.data, di.kv.id, "");
- dirl = strlen(tmpdir);
- info.diridx++;
- info.dirlen = dirl;
- }
- l = dirl + strlen(di.kv.str) + 1;
- if (l > spacen)
- {
- spacen = l + 16;
- space = solv_realloc(space, spacen);
- }
- if (tmpdir)
+ hx = strhash(di.kv.str);
+ if (!hx)
+ hx = strlen(di.kv.str) + 1;
+ h = hx & cbdata->cflmapn;
+ hh = HASHCHAIN_START;
+ for (;;)
{
- strcpy(space, tmpdir);
- tmpdir = 0;
+ qx = cbdata->cflmap[2 * h];
+ if (!qx)
+ break;
+ if (qx == hx)
+ {
+ found = 1;
+ break;
+ }
+ h = HASHCHAIN_NEXT(h, hh, cbdata->cflmapn);
}
- strcpy(space + dirl, di.kv.str);
- cb(cbdata, space, &info);
+ if (found)
+ break;
}
dataiterator_free(&di);
- solv_free(space);
+ return found;
}
-#endif
+
int
pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, int flags, void *(*handle_cb)(Pool *, Id, void *) , void *handle_cbdata)
@@ -382,7 +780,18 @@ pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, in
POOL_DEBUG(SOLV_DEBUG_STATS, "packages: %d, cutoff %d\n", pkgs->count, cutoff);
memset(&cbdata, 0, sizeof(cbdata));
+ cbdata.aliases = flags & FINDFILECONFLICTS_CHECK_DIRALIASING;
cbdata.pool = pool;
+ if (cbdata.aliases && (flags & FINDFILECONFLICTS_USE_ROOTDIR) != 0)
+ {
+ cbdata.rootdir = pool_get_rootdir(pool);
+ if (cbdata.rootdir && !strcmp(cbdata.rootdir, "/"))
+ cbdata.rootdir = 0;
+ if (cbdata.rootdir)
+ cbdata.rootdirl = strlen(cbdata.rootdir);
+ if (!cbdata.rootdir)
+ cbdata.usestat = 1;
+ }
queue_init(&cbdata.lookat);
queue_init(&cbdata.lookat_dir);
map_init(&cbdata.idxmap, pkgs->count);
@@ -394,42 +803,44 @@ pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, in
/* avarage dir count: 20 dirs per package */
/* first pass: scan dirs */
- cflmapn = (cutoff + 3) * 64;
- while ((cflmapn & (cflmapn - 1)) != 0)
- cflmapn = cflmapn & (cflmapn - 1);
- cbdata.dirmap = solv_calloc(cflmapn, 2 * sizeof(Id));
- cbdata.dirmapn = cflmapn - 1; /* make it a mask */
- cbdata.create = 1;
- idxmapset = 0;
- for (i = 0; i < pkgs->count; i++)
+ if (!cbdata.aliases)
{
- p = pkgs->elements[i];
- cbdata.idx = i;
- if (i == cutoff)
- cbdata.create = 0;
- if ((flags & FINDFILECONFLICTS_USESOLVABLEFILELIST) != 0 && installed)
+ cflmapn = (cutoff + 3) * 64;
+ while ((cflmapn & (cflmapn - 1)) != 0)
+ cflmapn = cflmapn & (cflmapn - 1);
+ cbdata.dirmap = solv_calloc(cflmapn, 2 * sizeof(Id));
+ cbdata.dirmapn = cflmapn - 1; /* make it a mask */
+ cbdata.create = 1;
+ idxmapset = 0;
+ for (i = 0; i < pkgs->count; i++)
{
- if (p >= installed->start && p < installed->end && pool->solvables[p].repo == installed)
+ p = pkgs->elements[i];
+ cbdata.idx = i;
+ if (i == cutoff)
+ cbdata.create = 0;
+ if ((flags & FINDFILECONFLICTS_USE_SOLVABLEFILELIST) != 0 && installed)
{
- iterate_solvable_dirs(pool, p, finddirs_cb, &cbdata);
- if (MAPTST(&cbdata.idxmap, i))
- idxmapset++;
- continue;
+ if (p >= installed->start && p < installed->end && pool->solvables[p].repo == installed)
+ {
+ iterate_solvable_dirs(pool, p, finddirs_cb, &cbdata);
+ if (MAPTST(&cbdata.idxmap, i))
+ idxmapset++;
+ continue;
+ }
}
+ handle = (*handle_cb)(pool, p, handle_cbdata);
+ if (!handle)
+ continue;
+ rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_ONLYDIRS, finddirs_cb, &cbdata);
+ if (MAPTST(&cbdata.idxmap, i))
+ idxmapset++;
}
- handle = (*handle_cb)(pool, p, handle_cbdata);
- if (!handle)
- continue;
- rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_ONLYDIRS, finddirs_cb, &cbdata);
- if (MAPTST(&cbdata.idxmap, i))
- idxmapset++;
+ POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap size: %d, used %d\n", cbdata.dirmapn + 1, cbdata.dirmapused);
+ POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap memory usage: %d K\n", (cbdata.dirmapn + 1) * 2 * (int)sizeof(Id) / 1024);
+ POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap creation took %d ms\n", solv_timems(now));
+ POOL_DEBUG(SOLV_DEBUG_STATS, "dir conflicts found: %d, idxmap %d of %d\n", cbdata.dirconflicts, idxmapset, pkgs->count);
}
- POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap size: %d, used %d\n", cbdata.dirmapn + 1, cbdata.dirmapused);
- POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap memory usage: %d K\n", (cbdata.dirmapn + 1) * 2 * (int)sizeof(Id) / 1024);
- POOL_DEBUG(SOLV_DEBUG_STATS, "dirmap creation took %d ms\n", solv_timems(now));
- POOL_DEBUG(SOLV_DEBUG_STATS, "dir conflicts found: %d, idxmap %d of %d\n", cbdata.dirconflicts, idxmapset, pkgs->count);
-
/* second pass: scan files */
now = solv_timems(0);
cflmapn = (cutoff + 3) * 128;
@@ -440,24 +851,78 @@ pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, in
cbdata.create = 1;
for (i = 0; i < pkgs->count; i++)
{
- if (!MAPTST(&cbdata.idxmap, i))
+ if (!cbdata.aliases && !MAPTST(&cbdata.idxmap, i))
continue;
p = pkgs->elements[i];
cbdata.idx = i;
if (i == cutoff)
cbdata.create = 0;
- /* can't use FINDFILECONFLICTS_USESOLVABLEFILELIST because we have to know if
+ if (cbdata.aliases && !cbdata.create && FINDFILECONFLICTS_USE_SOLVABLEFILELIST)
+ {
+ if (p >= installed->start && p < installed->end && pool->solvables[p].repo == installed)
+ if (!precheck_solvable_files(&cbdata, pool, p))
+ continue;
+ }
+ /* can't use FINDFILECONFLICTS_USE_SOLVABLEFILELIST because we have to know if
* the file is a directory or not */
handle = (*handle_cb)(pool, p, handle_cbdata);
if (!handle)
continue;
cbdata.lastdiridx = -1;
- rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, findfileconflicts_cb, &cbdata);
+ rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, cbdata.aliases ? findfileconflicts_basename_cb : findfileconflicts_cb, &cbdata);
}
POOL_DEBUG(SOLV_DEBUG_STATS, "filemap size: %d, used %d\n", cbdata.cflmapn + 1, cbdata.cflmapused);
POOL_DEBUG(SOLV_DEBUG_STATS, "filemap memory usage: %d K\n", (cbdata.cflmapn + 1) * 2 * (int)sizeof(Id) / 1024);
POOL_DEBUG(SOLV_DEBUG_STATS, "filemap creation took %d ms\n", solv_timems(now));
+ POOL_DEBUG(SOLV_DEBUG_STATS, "lookat_dir size: %d\n", cbdata.lookat_dir.count);
+ queue_free(&cbdata.lookat_dir);
+
+ /* we need another pass for aliases */
+ if (cbdata.aliases)
+ {
+ now = solv_timems(0);
+ /* make sure the first offset is not zero */
+ addfilesspace(&cbdata, 1);
+ cflmapn = (cutoff + 3) * 16;
+ while ((cflmapn & (cflmapn - 1)) != 0)
+ cflmapn = cflmapn & (cflmapn - 1);
+ cbdata.normap = solv_calloc(cflmapn, 2 * sizeof(Id));
+ cbdata.normapn = cflmapn - 1; /* make it a mask */
+ if (cbdata.usestat)
+ {
+ cbdata.statmap = solv_calloc(cflmapn, 2 * sizeof(Id));
+ cbdata.statmapn = cflmapn - 1; /* make it a mask */
+ }
+ cbdata.create = 0;
+ for (i = 0; i < pkgs->count; i++)
+ {
+ if (!MAPTST(&cbdata.idxmap, i))
+ continue;
+ p = pkgs->elements[i];
+ cbdata.idx = i;
+ /* can't use FINDFILECONFLICTS_USE_SOLVABLEFILELIST because we have to know if
+ * the file is a directory or not */
+ handle = (*handle_cb)(pool, p, handle_cbdata);
+ if (!handle)
+ continue;
+ cbdata.lastdiridx = -1;
+ rpm_iterate_filelist(handle, RPM_ITERATE_FILELIST_NOGHOSTS, findfileconflicts_alias_cb, &cbdata);
+ }
+ POOL_DEBUG(SOLV_DEBUG_STATS, "normap size: %d, used %d\n", cbdata.normapn + 1, cbdata.normapused);
+ POOL_DEBUG(SOLV_DEBUG_STATS, "normap memory usage: %d K\n", (cbdata.normapn + 1) * 2 * (int)sizeof(Id) / 1024);
+ POOL_DEBUG(SOLV_DEBUG_STATS, "stats made: %d\n", cbdata.statsmade);
+ if (cbdata.usestat)
+ {
+ POOL_DEBUG(SOLV_DEBUG_STATS, "statmap size: %d, used %d\n", cbdata.statmapn + 1, cbdata.statmapused);
+ POOL_DEBUG(SOLV_DEBUG_STATS, "statmap memory usage: %d K\n", (cbdata.statmapn + 1) * 2 * (int)sizeof(Id) / 1024);
+ }
+ cbdata.statmap = solv_free(cbdata.statmap);
+ cbdata.statmapn = 0;
+ cbdata.canonspace = solv_free(cbdata.canonspace);
+ cbdata.canonspacen = 0;
+ POOL_DEBUG(SOLV_DEBUG_STATS, "alias processing took %d ms\n", solv_timems(now));
+ }
cbdata.dirmap = solv_free(cbdata.dirmap);
cbdata.dirmapn = 0;
@@ -465,29 +930,51 @@ pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, in
cbdata.cflmap = solv_free(cbdata.cflmap);
cbdata.cflmapn = 0;
cbdata.cflmapused = 0;
- map_free(&cbdata.idxmap);
now = solv_timems(0);
- POOL_DEBUG(SOLV_DEBUG_STATS, "lookat_dir size: %d\n", cbdata.lookat_dir.count);
- queue_free(&cbdata.lookat_dir);
- /* sort and unify */
- solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 2, sizeof(Id) * 2, &lookat_idx_cmp, pool);
- for (i = j = 0; i < cbdata.lookat.count; i += 2)
+ map_free(&cbdata.idxmap);
+
+ /* sort and unify/prune */
+ POOL_DEBUG(SOLV_DEBUG_STATS, "raw candidates: %d, pruning\n", cbdata.lookat.count / 4);
+ solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 4, sizeof(Id) * 4, &lookat_hx_cmp, pool);
+ for (i = j = 0; i < cbdata.lookat.count; )
{
+ int first = 1;
Id hx = cbdata.lookat.elements[i];
Id idx = cbdata.lookat.elements[i + 1];
- if (j && hx == cbdata.lookat.elements[j - 2] && idx == cbdata.lookat.elements[j - 1])
- continue;
- cbdata.lookat.elements[j++] = hx;
- cbdata.lookat.elements[j++] = idx;
+ Id dhx = cbdata.lookat.elements[i + 2];
+ Id dirid = cbdata.lookat.elements[i + 3];
+ i += 4;
+ for (; i < cbdata.lookat.count && hx == cbdata.lookat.elements[i] && (dirid == cbdata.lookat.elements[i + 3] || dirid == -cbdata.lookat.elements[i + 3]); i += 4)
+ {
+ if (idx == cbdata.lookat.elements[i + 1] && dhx == cbdata.lookat.elements[i + 2])
+ continue; /* ignore duplicates */
+ if (first)
+ {
+ if (dirid < 0)
+ continue; /* all have a neg dirid */
+ cbdata.lookat.elements[j++] = hx;
+ cbdata.lookat.elements[j++] = idx;
+ cbdata.lookat.elements[j++] = dhx;
+ cbdata.lookat.elements[j++] = dirid;
+ first = 0;
+ }
+ idx = cbdata.lookat.elements[i + 1];
+ dhx = cbdata.lookat.elements[i + 2];
+ cbdata.lookat.elements[j++] = hx;
+ cbdata.lookat.elements[j++] = idx;
+ cbdata.lookat.elements[j++] = dhx;
+ cbdata.lookat.elements[j++] = dirid;
+ }
}
queue_truncate(&cbdata.lookat, j);
- POOL_DEBUG(SOLV_DEBUG_STATS, "candidates: %d\n", cbdata.lookat.count / 2);
+ POOL_DEBUG(SOLV_DEBUG_STATS, "candidates now: %d\n", cbdata.lookat.count / 4);
/* third pass: collect file info for all files that match a hx */
+ solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 4, sizeof(Id) * 4, &lookat_idx_cmp, pool);
queue_init(&cbdata.files);
- for (i = 0; i < cbdata.lookat.count; i += 2)
+ for (i = 0; i < cbdata.lookat.count; i += 4)
{
Id idx = cbdata.lookat.elements[i + 1];
int iterflags = RPM_ITERATE_FILELIST_WITHMD5 | RPM_ITERATE_FILELIST_NOGHOSTS;
@@ -495,34 +982,40 @@ pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, in
iterflags |= RPM_ITERATE_FILELIST_WITHCOL;
p = pkgs->elements[idx];
handle = (*handle_cb)(pool, p, handle_cbdata);
- for (;; i += 2)
+ for (;; i += 4)
{
int fstart = cbdata.files.count;
queue_push(&cbdata.files, idx);
queue_push(&cbdata.files, 0);
cbdata.idx = idx;
cbdata.hx = cbdata.lookat.elements[i];
+ cbdata.dirhash = cbdata.lookat.elements[i + 2];
+ cbdata.dirid = cbdata.lookat.elements[i + 3];
cbdata.lastdiridx = -1;
if (handle)
rpm_iterate_filelist(handle, iterflags, findfileconflicts2_cb, &cbdata);
cbdata.files.elements[fstart + 1] = cbdata.files.count;
cbdata.lookat.elements[i + 1] = fstart;
- if (i + 2 >= cbdata.lookat.count || cbdata.lookat.elements[i + 3] != idx)
+ if (i + 4 >= cbdata.lookat.count || cbdata.lookat.elements[i + 4 + 1] != idx)
break;
}
}
+ cbdata.normap = solv_free(cbdata.normap);
+ cbdata.normapn = 0;
+
/* forth pass: for each hx we have, compare all matching files against all other matching files */
- solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 2, sizeof(Id) * 2, &lookat_hx_cmp, pool);
- for (i = 0; i < cbdata.lookat.count - 2; i += 2)
+ solv_sort(cbdata.lookat.elements, cbdata.lookat.count / 4, sizeof(Id) * 4, &lookat_hx_cmp, pool);
+ for (i = 0; i < cbdata.lookat.count - 4; i += 4)
{
Id hx = cbdata.lookat.elements[i];
Id pstart = cbdata.lookat.elements[i + 1];
+ Id dirid = cbdata.lookat.elements[i + 3];
Id pidx = cbdata.files.elements[pstart];
Id pend = cbdata.files.elements[pstart + 1];
- if (cbdata.lookat.elements[i + 2] != hx)
+ if (cbdata.lookat.elements[i + 4] != hx)
continue; /* no package left with that hx */
- for (j = i + 2; j < cbdata.lookat.count && cbdata.lookat.elements[j] == hx; j += 2)
+ for (j = i + 4; j < cbdata.lookat.count && cbdata.lookat.elements[j] == hx && cbdata.lookat.elements[j + 3] == dirid; j += 4)
{
Id qstart = cbdata.lookat.elements[j + 1];
Id qidx = cbdata.files.elements[qstart];
@@ -535,8 +1028,21 @@ pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, in
{
char *fsi = (char *)cbdata.filesspace + cbdata.files.elements[ii];
char *fsj = (char *)cbdata.filesspace + cbdata.files.elements[jj];
- if (strcmp(fsi + 34, fsj + 34))
- continue; /* different file names */
+ if (cbdata.aliases)
+ {
+ /* compare just the basenames, the dirs match */
+ char *bsi = strrchr(fsi + 34, '/');
+ char *bsj = strrchr(fsj + 34, '/');
+ if ((!bsi || !bsj) && bsi != bsj)
+ continue;
+ if (strcmp(bsi, bsj))
+ continue; /* different file names */
+ }
+ else
+ {
+ if (strcmp(fsi + 34, fsj + 34))
+ continue; /* different file names */
+ }
if (!strcmp(fsi, fsj))
continue; /* md5 sum matches */
if (obsoleteusescolors && fsi[33] && fsj[33] && (fsi[33] & fsj[33]) == 0)
@@ -560,6 +1066,7 @@ pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, in
solv_sort(conflicts->elements, conflicts->count / 6, 6 * sizeof(Id), conflicts_cmp, pool);
POOL_DEBUG(SOLV_DEBUG_STATS, "found %d file conflicts\n", conflicts->count / 6);
POOL_DEBUG(SOLV_DEBUG_STATS, "file conflict detection took %d ms\n", solv_timems(start));
+
return conflicts->count;
}
diff --git a/ext/pool_fileconflicts.h b/ext/pool_fileconflicts.h
index 8666fc6..13abdc7 100644
--- a/ext/pool_fileconflicts.h
+++ b/ext/pool_fileconflicts.h
@@ -12,6 +12,8 @@
extern int pool_findfileconflicts(Pool *pool, Queue *pkgs, int cutoff, Queue *conflicts, int flags, void *(*handle_cb)(Pool *, Id, void *) , void *handle_cbdata);
-#define FINDFILECONFLICTS_USESOLVABLEFILELIST (1 << 0)
+#define FINDFILECONFLICTS_USE_SOLVABLEFILELIST (1 << 0)
+#define FINDFILECONFLICTS_CHECK_DIRALIASING (1 << 1)
+#define FINDFILECONFLICTS_USE_ROOTDIR (1 << 2)
#endif
diff --git a/tools/findfileconflicts.c b/tools/findfileconflicts.c
index 64f55dd..cb90f54 100644
--- a/tools/findfileconflicts.c
+++ b/tools/findfileconflicts.c
@@ -35,12 +35,17 @@ int main(int argc, char **argv)
int i;
Queue todo, conflicts;
void *state = 0;
+ char *rootdir = 0;
+ if (argc == 3 && !strcmp(argv[1], "--root"))
+ rootdir = argv[2];
pool = pool_create();
+ if (rootdir)
+ pool_set_rootdir(pool, rootdir);
pool_setdebuglevel(pool, 1);
installed = repo_create(pool, "@System");
pool_set_installed(pool, installed);
- if (repo_add_rpmdb(installed, 0, 0))
+ if (repo_add_rpmdb(installed, 0, REPO_USE_ROOTDIR))
{
fprintf(stderr, "findfileconflicts: %s\n", pool_errstr(pool));
exit(1);
@@ -49,12 +54,17 @@ int main(int argc, char **argv)
queue_init(&conflicts);
FOR_REPO_SOLVABLES(installed, p, s)
queue_push(&todo, p);
- state = rpm_state_create(0);
- pool_findfileconflicts(pool, &todo, 0, &conflicts, FINDFILECONFLICTS_USESOLVABLEFILELIST, &iterate_handle, state);
+ state = rpm_state_create(pool_get_rootdir(pool));
+ pool_findfileconflicts(pool, &todo, 0, &conflicts, FINDFILECONFLICTS_USE_SOLVABLEFILELIST | FINDFILECONFLICTS_CHECK_DIRALIASING | FINDFILECONFLICTS_USE_ROOTDIR, &iterate_handle, state);
rpm_state_free(state);
queue_free(&todo);
for (i = 0; i < conflicts.count; i += 6)
- printf("%s: %s[%s] %s[%s]\n", pool_id2str(pool, conflicts.elements[i]), pool_solvid2str(pool, conflicts.elements[i + 1]), pool_id2str(pool, conflicts.elements[i + 2]), pool_solvid2str(pool, conflicts.elements[i + 4]), pool_id2str(pool, conflicts.elements[i + 5]));
+ {
+ if (conflicts.elements[i] != conflicts.elements[i + 3])
+ printf("%s - %s: %s[%s] %s[%s]\n", pool_id2str(pool, conflicts.elements[i]), pool_id2str(pool, conflicts.elements[i + 3]), pool_solvid2str(pool, conflicts.elements[i + 1]), pool_id2str(pool, conflicts.elements[i + 2]), pool_solvid2str(pool, conflicts.elements[i + 4]), pool_id2str(pool, conflicts.elements[i + 5]));
+ else
+ printf("%s: %s[%s] %s[%s]\n", pool_id2str(pool, conflicts.elements[i]), pool_solvid2str(pool, conflicts.elements[i + 1]), pool_id2str(pool, conflicts.elements[i + 2]), pool_solvid2str(pool, conflicts.elements[i + 4]), pool_id2str(pool, conflicts.elements[i + 5]));
+ }
if (conflicts.count)
{
Queue job;