summaryrefslogtreecommitdiff
path: root/src/convert.c
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:15 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2021-03-05 10:08:15 +0900
commit24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch)
tree371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/convert.c
parent0b86d50828d05a27de3ff840d6a06407310393c2 (diff)
downloadwget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2
wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip
Imported Upstream version 1.17upstream/1.17
Diffstat (limited to 'src/convert.c')
-rw-r--r--src/convert.c124
1 files changed, 113 insertions, 11 deletions
diff --git a/src/convert.c b/src/convert.c
index d6ab10b..df8d58d 100644
--- a/src/convert.c
+++ b/src/convert.c
@@ -1,6 +1,6 @@
/* Conversion of links to local files.
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2014
- Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011,
+ 2014, 2015 Free Software Foundation, Inc.
This file is part of GNU Wget.
@@ -46,6 +46,7 @@ as that of the covered work. */
#include "html-url.h"
#include "css-url.h"
#include "iri.h"
+#include "xstrndup.h"
static struct hash_table *dl_file_url_map;
struct hash_table *dl_url_file_map;
@@ -136,8 +137,9 @@ convert_links_in_hashtable (struct hash_table *downloaded_set,
form. We do this even if the URL already is in
relative form, because our directory structure may
not be identical to that on the server (think `-nd',
- `--cut-dirs', etc.) */
- cur_url->convert = CO_CONVERT_TO_RELATIVE;
+ `--cut-dirs', etc.). If --convert-file-only was passed,
+ we only convert the basename portion of the URL. */
+ cur_url->convert = (opt.convert_file_only ? CO_CONVERT_BASENAME_ONLY : CO_CONVERT_TO_RELATIVE);
cur_url->local_name = xstrdup (local_name);
DEBUGP (("will convert url %s to local %s\n", u->url, local_name));
}
@@ -193,7 +195,7 @@ convert_all_links (void)
convert_links_in_hashtable (downloaded_css_set, 1, &file_count);
secs = ptimer_measure (timer);
- logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"),
+ logprintf (LOG_VERBOSE, _("Converted links in %d files in %s seconds.\n"),
file_count, print_decimal (secs));
ptimer_destroy (timer);
@@ -206,6 +208,7 @@ static const char *replace_attr_refresh_hack (const char *, int, FILE *,
const char *, int);
static char *local_quote_string (const char *, bool);
static char *construct_relative (const char *, const char *);
+static char *convert_basename (const char *, const struct urlpos *);
/* Change the links in one file. LINKS is a list of links in the
document, along with their positions and the desired direction of
@@ -221,7 +224,7 @@ convert_links (const char *file, struct urlpos *links)
struct urlpos *link;
int to_url_count = 0, to_file_count = 0;
- logprintf (LOG_VERBOSE, _("Converting %s... "), file);
+ logprintf (LOG_VERBOSE, _("Converting links in %s... "), file);
{
/* First we do a "dry run": go through the list L and see whether
@@ -315,9 +318,32 @@ convert_links (const char *file, struct urlpos *links)
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
link->url->url, newname, link->pos, file));
+
+ xfree (newname);
+ xfree (quoted_newname);
+ ++to_file_count;
+ break;
+ }
+ case CO_CONVERT_BASENAME_ONLY:
+ {
+ char *newname = convert_basename (p, link);
+ char *quoted_newname = local_quote_string (newname, link->link_css_p);
+
+ if (link->link_css_p)
+ p = replace_plain (p, link->size, fp, quoted_newname);
+ else if (!link->link_refresh_p)
+ p = replace_attr (p, link->size, fp, quoted_newname);
+ else
+ p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
+ link->refresh_timeout);
+
+ DEBUGP (("Converted file part only: %s to %s at position %d in %s.\n",
+ link->url->url, newname, link->pos, file));
+
xfree (newname);
xfree (quoted_newname);
++to_file_count;
+
break;
}
case CO_CONVERT_TO_COMPLETE:
@@ -336,6 +362,7 @@ convert_links (const char *file, struct urlpos *links)
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
newlink, link->pos, file));
+
xfree (quoted_newlink);
++to_url_count;
break;
@@ -414,14 +441,89 @@ construct_relative (const char *basefile, const char *linkfile)
++basedirs;
}
- /* Construct LINK as explained above. */
- link = xmalloc (3 * basedirs + strlen (linkfile) + 1);
- for (i = 0; i < basedirs; i++)
- memcpy (link + 3 * i, "../", 3);
- strcpy (link + 3 * i, linkfile);
+ if (!basedirs && (b = strpbrk (linkfile, "/:")) && *b == ':')
+ {
+ link = xmalloc (2 + strlen (linkfile) + 1);
+ memcpy (link, "./", 2);
+ strcpy (link + 2, linkfile);
+ }
+ else
+ {
+ /* Construct LINK as explained above. */
+ link = xmalloc (3 * basedirs + strlen (linkfile) + 1);
+ for (i = 0; i < basedirs; i++)
+ memcpy (link + 3 * i, "../", 3);
+ strcpy (link + 3 * i, linkfile);
+ }
+
return link;
}
+/* Construct and return a "transparent proxy" URL
+ reflecting changes made by --adjust-extension to the file component
+ (i.e., "basename") of the original URL, but leaving the "dirname"
+ of the URL (protocol://hostname... portion) untouched.
+
+ Think: populating a squid cache via a recursive wget scrape, where
+ changing URLs to work locally with "file://..." is NOT desirable.
+
+ Example:
+
+ if
+ p = "//foo.com/bar.cgi?xyz"
+ and
+ link->local_name = "docroot/foo.com/bar.cgi?xyz.css"
+ then
+
+ new_construct_func(p, link);
+ will return
+ "//foo.com/bar.cgi?xyz.css"
+
+ Essentially, we do s/$(basename orig_url)/$(basename link->local_name)/
+*/
+static char *
+convert_basename (const char *p, const struct urlpos *link)
+{
+ int len = link->size;
+ char *url = NULL;
+ char *org_basename = NULL, *local_basename = NULL;
+ char *result = NULL;
+
+ if (*p == '"' || *p == '\'')
+ {
+ len -= 2;
+ p++;
+ }
+
+ url = xstrndup (p, len);
+
+ org_basename = strrchr (url, '/');
+ if (org_basename)
+ org_basename++;
+ else
+ org_basename = url;
+
+ local_basename = strrchr (link->local_name, '/');
+ if (local_basename)
+ local_basename++;
+ else
+ local_basename = url;
+
+ /*
+ * If the basenames differ, graft the adjusted basename (local_basename)
+ * onto the original URL.
+ */
+ if (strcmp (org_basename, local_basename) == 0)
+ result = url;
+ else
+ {
+ result = uri_merge (url, local_basename);
+ xfree (url);
+ }
+
+ return result;
+}
+
/* Used by write_backup_file to remember which files have been
written. */
static struct hash_table *converted_files;