diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:15 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2021-03-05 10:08:15 +0900 |
commit | 24d4e855d95e02a5324c2f3d88cfd5cd19830c2c (patch) | |
tree | 371d954e80394a8e72ef95d6ee7d45312c3f87a9 /src/convert.c | |
parent | 0b86d50828d05a27de3ff840d6a06407310393c2 (diff) | |
download | wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.gz wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.tar.bz2 wget-24d4e855d95e02a5324c2f3d88cfd5cd19830c2c.zip |
Imported Upstream version 1.17upstream/1.17
Diffstat (limited to 'src/convert.c')
-rw-r--r-- | src/convert.c | 124 |
1 files changed, 113 insertions, 11 deletions
diff --git a/src/convert.c b/src/convert.c index d6ab10b..df8d58d 100644 --- a/src/convert.c +++ b/src/convert.c @@ -1,6 +1,6 @@ /* Conversion of links to local files. - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2014 - Free Software Foundation, Inc. + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, + 2014, 2015 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -46,6 +46,7 @@ as that of the covered work. */ #include "html-url.h" #include "css-url.h" #include "iri.h" +#include "xstrndup.h" static struct hash_table *dl_file_url_map; struct hash_table *dl_url_file_map; @@ -136,8 +137,9 @@ convert_links_in_hashtable (struct hash_table *downloaded_set, form. We do this even if the URL already is in relative form, because our directory structure may not be identical to that on the server (think `-nd', - `--cut-dirs', etc.) */ - cur_url->convert = CO_CONVERT_TO_RELATIVE; + `--cut-dirs', etc.). If --convert-file-only was passed, + we only convert the basename portion of the URL. */ + cur_url->convert = (opt.convert_file_only ? CO_CONVERT_BASENAME_ONLY : CO_CONVERT_TO_RELATIVE); cur_url->local_name = xstrdup (local_name); DEBUGP (("will convert url %s to local %s\n", u->url, local_name)); } @@ -193,7 +195,7 @@ convert_all_links (void) convert_links_in_hashtable (downloaded_css_set, 1, &file_count); secs = ptimer_measure (timer); - logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"), + logprintf (LOG_VERBOSE, _("Converted links in %d files in %s seconds.\n"), file_count, print_decimal (secs)); ptimer_destroy (timer); @@ -206,6 +208,7 @@ static const char *replace_attr_refresh_hack (const char *, int, FILE *, const char *, int); static char *local_quote_string (const char *, bool); static char *construct_relative (const char *, const char *); +static char *convert_basename (const char *, const struct urlpos *); /* Change the links in one file. LINKS is a list of links in the document, along with their positions and the desired direction of @@ -221,7 +224,7 @@ convert_links (const char *file, struct urlpos *links) struct urlpos *link; int to_url_count = 0, to_file_count = 0; - logprintf (LOG_VERBOSE, _("Converting %s... "), file); + logprintf (LOG_VERBOSE, _("Converting links in %s... "), file); { /* First we do a "dry run": go through the list L and see whether @@ -315,9 +318,32 @@ convert_links (const char *file, struct urlpos *links) DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n", link->url->url, newname, link->pos, file)); + + xfree (newname); + xfree (quoted_newname); + ++to_file_count; + break; + } + case CO_CONVERT_BASENAME_ONLY: + { + char *newname = convert_basename (p, link); + char *quoted_newname = local_quote_string (newname, link->link_css_p); + + if (link->link_css_p) + p = replace_plain (p, link->size, fp, quoted_newname); + else if (!link->link_refresh_p) + p = replace_attr (p, link->size, fp, quoted_newname); + else + p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname, + link->refresh_timeout); + + DEBUGP (("Converted file part only: %s to %s at position %d in %s.\n", + link->url->url, newname, link->pos, file)); + xfree (newname); xfree (quoted_newname); ++to_file_count; + break; } case CO_CONVERT_TO_COMPLETE: @@ -336,6 +362,7 @@ convert_links (const char *file, struct urlpos *links) DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n", newlink, link->pos, file)); + xfree (quoted_newlink); ++to_url_count; break; @@ -414,14 +441,89 @@ construct_relative (const char *basefile, const char *linkfile) ++basedirs; } - /* Construct LINK as explained above. */ - link = xmalloc (3 * basedirs + strlen (linkfile) + 1); - for (i = 0; i < basedirs; i++) - memcpy (link + 3 * i, "../", 3); - strcpy (link + 3 * i, linkfile); + if (!basedirs && (b = strpbrk (linkfile, "/:")) && *b == ':') + { + link = xmalloc (2 + strlen (linkfile) + 1); + memcpy (link, "./", 2); + strcpy (link + 2, linkfile); + } + else + { + /* Construct LINK as explained above. */ + link = xmalloc (3 * basedirs + strlen (linkfile) + 1); + for (i = 0; i < basedirs; i++) + memcpy (link + 3 * i, "../", 3); + strcpy (link + 3 * i, linkfile); + } + return link; } +/* Construct and return a "transparent proxy" URL + reflecting changes made by --adjust-extension to the file component + (i.e., "basename") of the original URL, but leaving the "dirname" + of the URL (protocol://hostname... portion) untouched. + + Think: populating a squid cache via a recursive wget scrape, where + changing URLs to work locally with "file://..." is NOT desirable. + + Example: + + if + p = "//foo.com/bar.cgi?xyz" + and + link->local_name = "docroot/foo.com/bar.cgi?xyz.css" + then + + new_construct_func(p, link); + will return + "//foo.com/bar.cgi?xyz.css" + + Essentially, we do s/$(basename orig_url)/$(basename link->local_name)/ +*/ +static char * +convert_basename (const char *p, const struct urlpos *link) +{ + int len = link->size; + char *url = NULL; + char *org_basename = NULL, *local_basename = NULL; + char *result = NULL; + + if (*p == '"' || *p == '\'') + { + len -= 2; + p++; + } + + url = xstrndup (p, len); + + org_basename = strrchr (url, '/'); + if (org_basename) + org_basename++; + else + org_basename = url; + + local_basename = strrchr (link->local_name, '/'); + if (local_basename) + local_basename++; + else + local_basename = url; + + /* + * If the basenames differ, graft the adjusted basename (local_basename) + * onto the original URL. + */ + if (strcmp (org_basename, local_basename) == 0) + result = url; + else + { + result = uri_merge (url, local_basename); + xfree (url); + } + + return result; +} + /* Used by write_backup_file to remember which files have been written. */ static struct hash_table *converted_files; |