diff options
author | Yann Collet <Cyan4973@users.noreply.github.com> | 2019-04-22 17:43:01 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-22 17:43:01 -0700 |
commit | 291b3d8b7e61efc260639c731a3b429419cbc471 (patch) | |
tree | 786ee1fb73fe6ada8a3b7f4ec6a612c771d87b72 /programs | |
parent | c4fe7a20476c53f9814ea990300481c2fcaaa94e (diff) | |
parent | 35b83a921f8030ee9b71cbb7324dc7aafaeb9878 (diff) | |
download | lz4-291b3d8b7e61efc260639c731a3b429419cbc471.tar.gz lz4-291b3d8b7e61efc260639c731a3b429419cbc471.tar.bz2 lz4-291b3d8b7e61efc260639c731a3b429419cbc471.zip |
Merge pull request #691 from lz4/list
--list
Diffstat (limited to 'programs')
-rw-r--r-- | programs/lz4.1 | 24 | ||||
-rw-r--r-- | programs/lz4.1.md | 5 | ||||
-rw-r--r-- | programs/lz4cli.c | 12 | ||||
-rw-r--r-- | programs/lz4io.c | 147 | ||||
-rw-r--r-- | programs/lz4io.h | 5 |
5 files changed, 175 insertions, 18 deletions
diff --git a/programs/lz4.1 b/programs/lz4.1 index 1576e45..ad0c12c 100644 --- a/programs/lz4.1 +++ b/programs/lz4.1 @@ -1,5 +1,5 @@ . -.TH "LZ4" "1" "April 2019" "lz4 1.9.0" "User Commands" +.TH "LZ4" "1" "April 2019" "lz4 1.9.1" "User Commands" . .SH "NAME" \fBlz4\fR \- lz4, unlz4, lz4cat \- Compress or decompress \.lz4 files @@ -23,9 +23,6 @@ When writing scripts that need to decompress files, it is recommended to always \fBlz4\fR supports a command line syntax similar \fIbut not identical\fR to \fBgzip(1)\fR\. Differences are : . .IP "\(bu" 4 -\fBlz4\fR preserves original files -. -.IP "\(bu" 4 \fBlz4\fR compresses a single file by default (see \fB\-m\fR for multiple files) . .IP "\(bu" 4 @@ -35,19 +32,16 @@ When writing scripts that need to decompress files, it is recommended to always \fBlz4 file\.lz4\fR will default to decompression (use \fB\-z\fR to force compression) . .IP "\(bu" 4 -\fBlz4\fR shows real\-time notification statistics during compression or decompression of a single file (use \fB\-q\fR to silence them) -. -.IP "\(bu" 4 -If no destination name is provided, result is sent to \fBstdout\fR \fIexcept if stdout is the console\fR\. +\fBlz4\fR preserves original files . .IP "\(bu" 4 -If no destination name is provided, \fBand\fR if \fBstdout\fR is the console, \fBfile\fR is compressed into \fBfile\.lz4\fR\. +\fBlz4\fR shows real\-time notification statistics during compression or decompression of a single file (use \fB\-q\fR to silence them) . .IP "\(bu" 4 -As a consequence of previous rules, note the following example : \fBlz4 file | consumer\fR sends compressed data to \fBconsumer\fR through \fBstdout\fR, hence it does \fInot\fR create \fBfile\.lz4\fR\. +When no destination is specified, result is sent on implicit output, which depends on \fBstdout\fR status\. When \fBstdout\fR \fIis Not the console\fR, it becomes the implicit output\. Otherwise, if \fBstdout\fR is the console, the implicit output is \fBfilename\.lz4\fR\. . .IP "\(bu" 4 -Another consequence of those rules is that to run \fBlz4\fR under \fBnohup\fR, you should provide a destination file: \fBnohup lz4 file file\.lz4\fR, because \fBnohup\fR writes the specified command\'s output to a file\. +It is considered bad practice to rely on implicit output in scripts\. because the script\'s environment may change\. Always use explicit output in scripts\. \fB\-c\fR ensures that output will be \fBstdout\fR\. Conversely, providing a destination name, or using \fB\-m\fR ensures that the output will be either the specified name, or \fBfilename\.lz4\fR respectively\. . .IP "" 0 . @@ -55,7 +49,7 @@ Another consequence of those rules is that to run \fBlz4\fR under \fBnohup\fR, y Default behaviors can be modified by opt\-in commands, detailed below\. . .IP "\(bu" 4 -\fBlz4 \-m\fR makes it possible to provide multiple input filenames, which will be compressed into files using suffix \fB\.lz4\fR\. Progress notifications are also disabled by default (use \fB\-v\fR to enable them)\. This mode has a behavior which more closely mimics \fBgzip\fR command line, with the main remaining difference being that source files are preserved by default\. +\fBlz4 \-m\fR makes it possible to provide multiple input filenames, which will be compressed into files using suffix \fB\.lz4\fR\. Progress notifications become disabled by default (use \fB\-v\fR to enable them)\. This mode has a behavior which more closely mimics \fBgzip\fR command line, with the main remaining difference being that source files are preserved by default\. . .IP "\(bu" 4 Similarly, \fBlz4 \-m \-d\fR can decompress multiple \fB*\.lz4\fR files\. @@ -111,6 +105,10 @@ Test the integrity of compressed \fB\.lz4\fR files\. The decompressed data is di \fB\-b#\fR Benchmark mode, using \fB#\fR compression level\. . +.TP +\fB\-\-list\fR +List information about \.lz4 files\. note : current implementation is limited to single\-frame \.lz4 files\. +. .SS "Operation modifiers" . .TP @@ -145,7 +143,7 @@ Force write to standard output, even if it is the console\. . .TP \fB\-m\fR \fB\-\-multiple\fR -Multiple input files\. Compressed file names will be appended a \fB\.lz4\fR suffix\. This mode also reduces notification level\. \fBlz4 \-m\fR has a behavior equivalent to \fBgzip \-k\fR (it preserves source files by default)\. +Multiple input files\. Compressed file names will be appended a \fB\.lz4\fR suffix\. This mode also reduces notification level\. Can also be used to list multiple files\. \fBlz4 \-m\fR has a behavior equivalent to \fBgzip \-k\fR (it preserves source files by default)\. . .TP \fB\-r\fR diff --git a/programs/lz4.1.md b/programs/lz4.1.md index ffda3ff..8874467 100644 --- a/programs/lz4.1.md +++ b/programs/lz4.1.md @@ -113,6 +113,10 @@ only the latest one will be applied. * `-b#`: Benchmark mode, using `#` compression level. +* `--list`: + List information about .lz4 files. + note : current implementation is limited to single-frame .lz4 files. + ### Operation modifiers * `-#`: @@ -160,6 +164,7 @@ only the latest one will be applied. Multiple input files. Compressed file names will be appended a `.lz4` suffix. This mode also reduces notification level. + Can also be used to list multiple files. `lz4 -m` has a behavior equivalent to `gzip -k` (it preserves source files by default). diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 82f2fac..39ff1ea 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -141,6 +141,7 @@ static int usage_advanced(const char* exeName) DISPLAY( " -BX : enable block checksum (default:disabled) \n"); DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n"); DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n"); + DISPLAY( "--list : lists information about .lz4 files. Useful if compressed with --content-size flag.\n"); DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n"); DISPLAY( "--favor-decSpeed: compressed files decompress faster, but are less compressed \n"); DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %i)\n", 1); @@ -286,7 +287,7 @@ static int longCommandWArg(const char** stringPtr, const char* longCommand) return result; } -typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench } operationMode_e; +typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench, om_list } operationMode_e; /** determineOpMode() : * auto-determine operation mode, based on input filename extension @@ -383,6 +384,7 @@ int main(int argc, const char** argv) if (!strcmp(argument, "--no-frame-crc")) { LZ4IO_setStreamChecksumMode(prefs, 0); continue; } if (!strcmp(argument, "--content-size")) { LZ4IO_setContentSize(prefs, 1); continue; } if (!strcmp(argument, "--no-content-size")) { LZ4IO_setContentSize(prefs, 0); continue; } + if (!strcmp(argument, "--list")) { mode = om_list; continue; } if (!strcmp(argument, "--sparse")) { LZ4IO_setSparseFile(prefs, 2); continue; } if (!strcmp(argument, "--no-sparse")) { LZ4IO_setSparseFile(prefs, 0); continue; } if (!strcmp(argument, "--favor-decSpeed")) { LZ4IO_favorDecSpeed(prefs, 1); continue; } @@ -691,7 +693,7 @@ int main(int argc, const char** argv) break; } - if (multiple_inputs==0) assert(output_filename); + if (multiple_inputs==0 && mode != om_list) assert(output_filename); /* when multiple_inputs==1, output_filename may simply be useless, * however, output_filename must be !NULL for next strcmp() tests */ if (!output_filename) output_filename = "*\\dummy^!//"; @@ -720,6 +722,12 @@ int main(int argc, const char** argv) } else { operationResult = DEFAULT_DECOMPRESSOR(prefs, input_filename, output_filename); } + } else if (mode == om_list){ + if(!multiple_inputs){ + inFileNames[ifnIdx++] = input_filename; + } + operationResult = LZ4IO_displayCompressedFilesInfo(inFileNames, ifnIdx); + inFileNames=NULL; } else { /* compression is default action */ if (legacy_format) { DISPLAYLEVEL(3, "! Generating LZ4 Legacy format (deprecated) ! \n"); diff --git a/programs/lz4io.c b/programs/lz4io.c index 121bd44..960c451 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -53,11 +53,11 @@ #include <time.h> /* clock */ #include <sys/types.h> /* stat64 */ #include <sys/stat.h> /* stat64 */ -#include "lz4io.h" #include "lz4.h" /* still required for legacy format */ #include "lz4hc.h" /* still required for legacy format */ #define LZ4F_STATIC_LINKING_ONLY #include "lz4frame.h" +#include "lz4io.h" /***************************** @@ -1230,7 +1230,9 @@ int LZ4IO_decompressFilename(LZ4IO_prefs_t* const prefs, const char* input_filen } -int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** inFileNamesTable, int ifntSize, const char* suffix) +int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, + const char** inFileNamesTable, int ifntSize, + const char* suffix) { int i; int skippedFiles = 0; @@ -1250,7 +1252,12 @@ int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** i missingFiles += LZ4IO_decompressSrcFile(prefs, ress, inFileNamesTable[i], stdoutmark); continue; } - if (ofnSize <= ifnSize-suffixSize+1) { free(outFileName); ofnSize = ifnSize + 20; outFileName = (char*)malloc(ofnSize); if (outFileName==NULL) return ifntSize; } + if (ofnSize <= ifnSize-suffixSize+1) { + free(outFileName); + ofnSize = ifnSize + 20; + outFileName = (char*)malloc(ofnSize); + if (outFileName==NULL) return ifntSize; + } if (ifnSize <= suffixSize || strcmp(suffixPtr, suffix) != 0) { DISPLAYLEVEL(1, "File extension doesn't match expected LZ4_EXTENSION (%4s); will not process file: %s\n", suffix, inFileNamesTable[i]); skippedFiles++; @@ -1265,3 +1272,137 @@ int LZ4IO_decompressMultipleFilenames(LZ4IO_prefs_t* const prefs, const char** i free(outFileName); return missingFiles + skippedFiles; } + + +/* ********************************************************************* */ +/* ********************** LZ4 --list command *********************** */ +/* ********************************************************************* */ + +typedef struct { + LZ4F_frameInfo_t frameInfo; + const char* fileName; + unsigned long long fileSize; +} LZ4IO_cFileInfo_t; + +#define LZ4IO_INIT_CFILEINFO { LZ4F_INIT_FRAMEINFO, NULL, 0ULL } + + +typedef enum { LZ4IO_LZ4F_OK, LZ4IO_format_not_known, LZ4IO_not_a_file } LZ4IO_infoResult; + +/* This function is limited, + * it only works fine for a file consisting of a single valid frame using LZ4 Frame specification. + * It will not look at content beyond first frame header. + * It's also unable to parse legacy frames, nor skippable ones. + * + * Things to improve : + * - check the entire file for additional content after first frame + * + combine results from multiple frames, give total + * - Optional : + * + report nb of blocks, hence max. possible decompressed size (when not reported in header) + */ +static LZ4IO_infoResult +LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename) +{ + LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */ + + if (!UTIL_isRegFile(input_filename)) return LZ4IO_not_a_file; + cfinfo->fileSize = UTIL_getFileSize(input_filename); + + /* Get filename without path prefix */ + { const char* b = strrchr(input_filename, '/'); + if (!b) { + b = strrchr(input_filename, '\\'); + } + if (b && b != input_filename) { + b++; + } else { + b = input_filename; + } + cfinfo->fileName = b; + } + + /* Read file and extract header */ + { size_t const hSize = LZ4F_HEADER_SIZE_MAX; + size_t readSize=0; + + void* const buffer = malloc(hSize); + if (!buffer) EXM_THROW(21, "Allocation error : not enough memory"); + + { FILE* const finput = LZ4IO_openSrcFile(input_filename); + if (finput) { + readSize = fread(buffer, 1, hSize, finput); + fclose(finput); + } } + + if (readSize > 0) { + LZ4F_dctx* dctx; + if (!LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION))) { + if (!LZ4F_isError(LZ4F_getFrameInfo(dctx, &cfinfo->frameInfo, buffer, &readSize))) { + result = LZ4IO_LZ4F_OK; + } } + LZ4F_freeDecompressionContext(dctx); + } + + /* clean */ + free(buffer); + } + + return result; +} + + +/* buffer : must be a valid memory area of at least 4 bytes */ +const char* LZ4IO_blockTypeID(int sizeID, int blockMode, char* buffer) +{ + buffer[0] = 'B'; + assert(sizeID >= 4); assert(sizeID <=7); + buffer[1] = (char)(sizeID + '0'); + buffer[2] = (blockMode == LZ4F_blockIndependent) ? 'I' : 'D'; + buffer[3] = 0; + return buffer; +} + + +int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx) +{ + int result = 0; + size_t idx; + + DISPLAY("%5s %20s %20s %10s %7s %s\n", + "Block", "Compressed", "Uncompressed", "Ratio", "Check", "Filename"); + + for (idx=0; idx<ifnIdx; idx++) { + /* Get file info */ + LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO; + LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]); + if (op_result != LZ4IO_LZ4F_OK) { + if (op_result == LZ4IO_not_a_file) { + DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]); + } else { + assert(op_result == LZ4IO_format_not_known); + DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]); + } + result = 1; + continue; + } + if (cfinfo.frameInfo.contentSize) { + char buffer[5]; + double const ratio = (double)cfinfo.fileSize / cfinfo.frameInfo.contentSize; + DISPLAY("%5s %20llu %20llu %8.4f %7s %s \n", + LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer), + cfinfo.fileSize, + cfinfo.frameInfo.contentSize, ratio, + cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-", + cfinfo.fileName); + } else { + char buffer[5]; + DISPLAY("%5s %20llu %20s %10s %7s %s \n", + LZ4IO_blockTypeID(cfinfo.frameInfo.blockSizeID, cfinfo.frameInfo.blockMode, buffer), + cfinfo.fileSize, + "-", "-", + cfinfo.frameInfo.contentChecksumFlag ? "XXH32" : "-", + cfinfo.fileName); + } + } + return result; +} diff --git a/programs/lz4io.h b/programs/lz4io.h index 54d49be..213dc95 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -124,4 +124,9 @@ void LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t* const prefs, unsigned flag); void LZ4IO_favorDecSpeed(LZ4IO_prefs_t* const prefs, int favor); +/* implement --list + * @return 0 on success, 1 on error */ +int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx); + + #endif /* LZ4IO_H_237902873 */ |