. We also set the
permissions (guessed as 0644 for plain files and 0755 for
directories as the listing does not give us a clue) and filetype
here. */
tok = strtok(NULL, " ");
if (tok == NULL) continue;
while ((tok != NULL) && (*tok == '\0')) tok = strtok(NULL, " ");
if (tok == NULL) continue;
if (*tok == '<')
{
cur.type = FT_DIRECTORY;
cur.size = 0;
cur.perms = 0755;
DEBUGP (("Directory\n"));
}
else
{
wgint size;
cur.type = FT_PLAINFILE;
errno = 0;
size = str_to_wgint (tok, NULL, 10);
if (size == WGINT_MAX && errno == ERANGE)
cur.size = 0; /* overflow */
else
cur.size = size;
cur.perms = 0644;
DEBUGP (("File, size %s bytes\n", number_to_static_string (cur.size)));
}
cur.linkto = NULL;
/* And put everything into the linked list */
if (!dir)
{
l = dir = xnew (struct fileinfo);
memcpy (l, &cur, sizeof (cur));
l->prev = l->next = NULL;
}
else
{
cur.prev = l;
l->next = xnew (struct fileinfo);
l = l->next;
memcpy (l, &cur, sizeof (cur));
l->next = NULL;
}
cur.name = NULL;
}
xfree (cur.name);
xfree (line);
return dir;
}
/* Convert the VMS-style directory listing stored in "file" to a
linked list of fileinfo (system-independent) entries. The contents
of FILE are considered to be produced by the standard VMS
"DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command,
more or less. (Different VMS FTP servers may have different headers,
and may not supply the same data, but all should be subsets of this.)
VMS normally provides local (server) time and date information.
Define the logical name or environment variable
"WGET_TIMEZONE_DIFFERENTIAL" (seconds) to adjust the receiving local
times if different from the remote local times.
2005-02-23 SMS.
Added code to eliminate "^" escape characters from ODS5 extended file
names. The TCPIP FTP server (V5.4) seems to prefer requests which do
not use the escaped names which it provides.
*/
#define VMS_DEFAULT_PROT_FILE 0644
#define VMS_DEFAULT_PROT_DIR 0755
/* 2005-02-23 SMS.
eat_carets().
Delete ODS5 extended file name escape characters ("^") in the
original buffer.
Note that the current scheme does not handle all EFN cases, but it
could be made more complicated.
*/
static void eat_carets( char *str)
/* char *str; Source pointer. */
{
char *strd; /* Destination pointer. */
char hdgt;
unsigned char uchr;
/* Skip ahead to the first "^", if any. */
while ((*str != '\0') && (*str != '^'))
str++;
/* If no caret was found, quit early. */
if (*str != '\0')
{
/* Shift characters leftward as carets are found. */
strd = str;
while (*str != '\0')
{
uchr = *str;
if (uchr == '^')
{
/* Found a caret. Skip it, and check the next character. */
if ((char_prop[(unsigned char) str[1]] & 64) && (char_prop[(unsigned char) str[2]] & 64))
{
/* Hex digit. Get char code from this and next hex digit. */
uchr = *(++str);
if (uchr <= '9')
{
hdgt = uchr - '0'; /* '0' - '9' -> 0 - 9. */
}
else
{
hdgt = ((uchr - 'A') & 7) + 10; /* [Aa] - [Ff] -> 10 - 15. */
}
hdgt <<= 4; /* X16. */
uchr = *(++str); /* Next char must be hex digit. */
if (uchr <= '9')
{
uchr = hdgt + uchr - '0';
}
else
{
uchr = hdgt + ((uchr - 'A') & 15) + 10;
}
}
else if (uchr == '_')
{
/* Convert escaped "_" to " ". */
uchr = ' ';
}
else if (uchr == '/')
{
/* Convert escaped "/" (invalid Zip) to "?" (invalid VMS). */
/* Note that this is a left-over from Info-ZIP code, and is
probably of little value here, except perhaps to avoid
directory confusion which an unconverted slash might cause.
*/
uchr = '?';
}
/* Else, not a hex digit. Must be a simple escaped character
(or Unicode, which is not yet handled here).
*/
}
/* Else, not a caret. Use as-is. */
*strd = uchr;
/* Advance destination and source pointers. */
strd++;
str++;
}
/* Terminate the destination string. */
*strd = '\0';
}
}
static struct fileinfo *
ftp_parse_vms_ls (FILE *fp)
{
int dt, i, j, len;
int perms;
size_t bufsize = 0;
time_t timenow;
struct tm *timestruct;
char date_str[32];
char *line = NULL, *tok; /* tokenizer */
struct fileinfo *dir, *l, cur; /* list creation */
dir = l = NULL;
/* Skip blank lines, Directory heading, and more blank lines. */
for (j = 0; (i = getline (&line, &bufsize, fp)) > 0; )
{
i = clean_line (line, i);
if (i <= 0)
continue; /* Ignore blank line. */
if ((j == 0) && (line[i - 1] == ']'))
{
/* Found Directory heading line. Next non-blank line
is significant. */
j = 1;
}
else if (!strncmp (line, "Total of ", 9))
{
/* Found "Total of ..." footing line. No valid data
will follow (empty directory). */
i = 0; /* Arrange for early exit. */
break;
}
else
{
break; /* Must be significant data. */
}
}
/* Read remainder of file until the next blank line or EOF. */
cur.name = NULL;
while (i > 0)
{
char *p;
/* The first token is the file name. After a long name, other
data may be on the following line. A valid directory name ends
in ".DIR;1" (any case), although some VMS FTP servers may omit
the version number (";1").
*/
tok = strtok(line, " ");
if (tok == NULL) tok = line;
DEBUGP (("file name: '%s'\n", tok));
/* Stripping the version number on a VMS system would be wrong.
It may be foolish on a non-VMS system, too, but that's someone
else's problem. (Define PRESERVE_VMS_VERSIONS for proper
operation on other operating systems.)
2005-02-23 SMS.
ODS5 extended file names may contain escaped semi-colons, so
the version number is identified as right-side decimal digits
led by a non-escaped semi-colon. It may be absent.
*/
#if (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS))
for (p = tok + strlen (tok); (--p > tok) && c_isdigit(*p); );
if (p > tok && (*p == ';') && (*(p - 1) != '^'))
{
*p = '\0';
}
#endif /* (!defined( __VMS) && !defined( PRESERVE_VMS_VERSIONS)) */
/* 2005-02-23 SMS.
Eliminate "^" escape characters from ODS5 extended file name.
(A caret is invalid in an ODS2 name, so this is always safe.)
*/
eat_carets (tok);
DEBUGP (("file name-^: '%s'\n", tok));
/* Differentiate between a directory and any other file. A VMS
listing may not include file protections (permissions). Set a
default permissions value (according to the file type), which
may be overwritten later. Store directory names without the
".DIR;1" file type and version number, as the plain name is
what will work in a CWD command.
*/
len = strlen (tok);
if (len >= 4 && !c_strncasecmp(tok + (len - 4), ".DIR", 4))
{
*(tok + (len - 4)) = '\0'; /* Discard ".DIR". */
cur.type = FT_DIRECTORY;
cur.perms = VMS_DEFAULT_PROT_DIR;
DEBUGP (("Directory (nv)\n"));
}
else if (len >= 6 && !c_strncasecmp (tok + len - 6, ".DIR;1", 6))
{
*(tok + (len - 6)) = '\0'; /* Discard ".DIR;1". */
cur.type = FT_DIRECTORY;
cur.perms = VMS_DEFAULT_PROT_DIR;
DEBUGP (("Directory (v)\n"));
}
else
{
cur.type = FT_PLAINFILE;
cur.perms = VMS_DEFAULT_PROT_FILE;
DEBUGP (("File\n"));
}
xfree (cur.name);
cur.name = xstrdup (tok);
DEBUGP (("Name: '%s'\n", cur.name));
/* Null the date and time string. */
*date_str = '\0';
/* VMS lacks symbolic links. */
cur.linkto = NULL;
/* VMS reports file sizes in (512-byte) disk blocks, not bytes,
hence useless for an integrity check based on byte-count.
Set size to unknown.
*/
cur.size = 0;
/* Get token 2, if any. A long name may force all other data onto
a second line. If needed, read the second line.
*/
tok = strtok (NULL, " ");
if (tok == NULL)
{
DEBUGP (("Getting additional line.\n"));
i = getline (&line, &bufsize, fp);
if (i <= 0)
{
DEBUGP (("EOF. Leaving listing parser.\n"));
break;
}
/* Second line must begin with " ". Otherwise, it's a first
line (and we may be confused).
*/
i = clean_line (line, i);
if (i <= 0)
{
/* Blank line. End of significant file listing. */
DEBUGP (("Blank line. Leaving listing parser.\n"));
break;
}
else if (line[0] != ' ')
{
DEBUGP (("Non-blank in column 1. Must be a new file name?\n"));
continue;
}
else
{
tok = strtok (line, " ");
if (tok == NULL)
{
/* Unexpected non-empty but apparently blank line. */
DEBUGP (("Null token. Leaving listing parser.\n"));
break;
}
}
}
/* Analyze tokens. (Order is not significant, except date must
precede time.)
Size: ddd or ddd/ddd (where "ddd" is a decimal number)
Date: DD-MMM-YYYY
Time: HH:MM or HH:MM:SS or HH:MM:SS.CC
Owner: [user] or [user,group]
Protection: (ppp,ppp,ppp,ppp) (where "ppp" is "RWED" or some
subset thereof, for System, Owner, Group, World.
If permission is lacking, info may be replaced by the string:
"No privilege for attempted operation".
*/
while (tok != NULL)
{
DEBUGP (("Token: >%s<: ", tok));
if ((strlen (tok) < 12) && (strchr( tok, '-') != NULL))
{
/* Date. */
DEBUGP (("Date.\n"));
snprintf(date_str, sizeof(date_str), "%s ", tok);
}
else if ((strlen (tok) < 12) && (strchr( tok, ':') != NULL))
{
/* Time. */
DEBUGP (("Time. "));
strncat( date_str,
tok,
(sizeof( date_str)- strlen (date_str) - 1));
DEBUGP (("Date time: >%s<\n", date_str));
}
else if (strchr (tok, '[') != NULL)
{
/* Owner. (Ignore.) */
DEBUGP (("Owner.\n"));
}
else if (strchr (tok, '(') != NULL)
{
/* Protections (permissions). */
perms = 0;
j = 0;
/*FIXME: Should not be using the variable like this. */
for (i = 0; i < (int) strlen(tok); i++)
{
switch (tok[ i])
{
case '(':
break;
case ')':
break;
case ',':
if (j == 0)
{
perms = 0;
}
else if (j < 4)
{
perms <<= 3;
}
j++;
break;
case 'R':
perms |= 4;
break;
case 'W':
perms |= 2;
break;
case 'E':
perms |= 1;
break;
case 'D':
perms |= 2;
break;
}
}
cur.perms = perms;
DEBUGP (("Prot. perms = %0o.\n", (unsigned) cur.perms));
}
else
{
/* Nondescript. Probably size(s), probably in blocks.
Could be "No privilege ..." message. (Ignore.)
*/
DEBUGP (("Ignored (size?).\n"));
}
tok = strtok (NULL, " ");
}
/* Tokens exhausted. Interpret the data, and fill in the
structure.
*/
/* Fill tm timestruct according to date-time string. Fractional
seconds are ignored. Default to current time, if conversion
fails.
*/
timenow = time( NULL);
timestruct = localtime( &timenow );
strptime( date_str, "%d-%b-%Y %H:%M:%S", timestruct);
/* Convert struct tm local time to time_t local time. */
timenow = mktime (timestruct);
/* Offset local time according to environment variable (seconds). */
if ((tok = getenv ( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
{
dt = atoi (tok);
DEBUGP (("Time differential = %d.\n", dt));
}
else
dt = 0;
if (dt >= 0)
timenow += dt;
else
timenow -= (-dt);
cur.tstamp = timenow; /* Store the time-stamp. */
DEBUGP (("Timestamp: %ld\n", cur.tstamp));
cur.ptype = TT_HOUR_MIN;
/* Add the data for this item to the linked list, */
if (!dir)
{
l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
cur.prev = cur.next = NULL;
memcpy (l, &cur, sizeof (cur));
}
else
{
cur.prev = l;
cur.next = NULL;
l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
l = l->next;
memcpy (l, &cur, sizeof (cur));
}
cur.name = NULL;
i = getline (&line, &bufsize, fp);
if (i > 0)
{
i = clean_line (line, i);
if (i <= 0)
{
/* Blank line. End of significant file listing. */
break;
}
}
}
xfree (cur.name);
xfree (line);
return dir;
}
/* This function switches between the correct parsing routine depending on
the SYSTEM_TYPE. The system type should be based on the result of the
"SYST" response of the FTP server. According to this response we will
use on of the three different listing parsers that cover the most of FTP
servers used nowadays. */
struct fileinfo *
ftp_parse_ls (const char *file, const enum stype system_type)
{
FILE *fp;
struct fileinfo *fi;
fp = fopen (file, "rb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
fi = ftp_parse_ls_fp (fp, system_type);
fclose(fp);
return fi;
}
struct fileinfo *
ftp_parse_ls_fp (FILE *fp, const enum stype system_type)
{
switch (system_type)
{
case ST_UNIX:
return ftp_parse_unix_ls (fp, 0);
case ST_WINNT:
{
/* Detect whether the listing is simulating the UNIX format */
int c = fgetc(fp);
rewind(fp);
/* If the first character of the file is '0'-'9', it's WINNT
format. */
if (c >= '0' && c <='9')
return ftp_parse_winnt_ls (fp);
else
return ftp_parse_unix_ls (fp, 1);
}
case ST_VMS:
return ftp_parse_vms_ls (fp);
case ST_MACOS:
return ftp_parse_unix_ls (fp, 1);
default:
logprintf (LOG_NOTQUIET, _("\
Unsupported listing type, trying Unix listing parser.\n"));
return ftp_parse_unix_ls (fp, 0);
}
}
/* Stuff for creating FTP index. */
/* The function creates an HTML index containing references to given
directories and files on the appropriate host. The references are
FTP. */
uerr_t
ftp_index (const char *file, struct url *u, struct fileinfo *f)
{
FILE *fp;
char *upwd;
char *htcldir; /* HTML-clean dir name */
char *htclfile; /* HTML-clean file name */
char *urlclfile; /* URL-clean file name */
if (!output_stream)
{
fp = fopen (file, "wb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return FOPENERR;
}
}
else
fp = output_stream;
if (u->user)
{
char *tmpu, *tmpp; /* temporary, clean user and passwd */
tmpu = url_escape (u->user);
tmpp = u->passwd ? url_escape (u->passwd) : NULL;
if (tmpp)
upwd = concat_strings (tmpu, ":", tmpp, "@", (char *) 0);
else
upwd = concat_strings (tmpu, "@", (char *) 0);
xfree (tmpu);
xfree (tmpp);
}
else
upwd = xstrdup ("");
htcldir = html_quote_string (u->dir);
fprintf (fp, "\n");
fprintf (fp, "\n\n");
fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
fprintf (fp, "\n\n\n");
fprintf (fp, _("Index of /%s on %s:%d"), htcldir, u->host, u->port);
fprintf (fp, "
\n
\n\n");
while (f)
{
fprintf (fp, " ");
if (f->tstamp != -1)
{
/* #### Should we translate the months? Or, even better, use
ISO 8601 dates? */
static const char *months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
time_t tstamp = f->tstamp;
struct tm *ptm = localtime (&tstamp);
fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
ptm->tm_mday);
if (f->ptype == TT_HOUR_MIN)
fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
else
fprintf (fp, " ");
}
else
fprintf (fp, _("time unknown "));
switch (f->type)
{
case FT_PLAINFILE:
fprintf (fp, _("File "));
break;
case FT_DIRECTORY:
fprintf (fp, _("Directory "));
break;
case FT_SYMLINK:
fprintf (fp, _("Link "));
break;
default:
fprintf (fp, _("Not sure "));
break;
}
htclfile = html_quote_string (f->name);
urlclfile = url_escape_unsafe_and_reserved (f->name);
fprintf (fp, "host, u->port);
if (*u->dir != '/')
putc ('/', fp);
/* XXX: Should probably URL-escape dir components here, rather
* than just HTML-escape, for consistency with the next bit where
* we use urlclfile for the file component. Anyway, this is safer
* than what we had... */
fprintf (fp, "%s", htcldir);
if (*u->dir)
putc ('/', fp);
fprintf (fp, "%s", urlclfile);
if (f->type == FT_DIRECTORY)
putc ('/', fp);
fprintf (fp, "\">%s", htclfile);
if (f->type == FT_DIRECTORY)
putc ('/', fp);
fprintf (fp, " ");
if (f->type == FT_PLAINFILE)
fprintf (fp, _(" (%s bytes)"), number_to_static_string (f->size));
else if (f->type == FT_SYMLINK)
fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
putc ('\n', fp);
xfree (htclfile);
xfree (urlclfile);
f = f->next;
}
fprintf (fp, "
\n\n\n");
xfree (htcldir);
xfree (upwd);
if (!output_stream)
fclose (fp);
else
fflush (fp);
return FTPOK;
}