diff options
Diffstat (limited to 'src/ToolBox/superpmi/mcs/verbmerge.cpp')
-rw-r--r-- | src/ToolBox/superpmi/mcs/verbmerge.cpp | 470 |
1 files changed, 470 insertions, 0 deletions
diff --git a/src/ToolBox/superpmi/mcs/verbmerge.cpp b/src/ToolBox/superpmi/mcs/verbmerge.cpp new file mode 100644 index 0000000000..c4acfd8769 --- /dev/null +++ b/src/ToolBox/superpmi/mcs/verbmerge.cpp @@ -0,0 +1,470 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#include "standardpch.h" +#include "verbmerge.h" +#include "simpletimer.h" +#include "logging.h" + +// Do reads/writes in large 256MB chunks. +#define BUFFER_SIZE 0x10000000 + +// MergePathStrings: take two file system path components, compose them together, and return the merged pathname string. +// The caller must delete the returned string with delete[]. +// +// static +char* verbMerge::MergePathStrings(const char* dir, const char* file) +{ + size_t dirlen = strlen(dir); + size_t filelen = strlen(file); + size_t newlen = dirlen + 1 /* slash */ + filelen + 1 /* null */; + char* newpath = new char[newlen]; + strcpy(newpath, dir); + strcat(newpath, DIRECTORY_SEPARATOR_STR_A); + strcat(newpath, file); + return newpath; +} + +// AppendFile: append the file named by 'fileName' to the output file referred to by 'hFileOut'. The 'hFileOut' +// handle is assumed to be open, and the file position is assumed to be at the correct spot for writing, to append. +// +// 'buffer' is memory that can be used to do reading/buffering. +// +// static +int verbMerge::AppendFile(HANDLE hFileOut, const char* fileName, unsigned char* buffer, size_t bufferSize) +{ + int result = 0; // default to zero == success + + LogInfo("Appending file '%s'", fileName); + + HANDLE hFileIn = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_SEQUENTIAL_SCAN, NULL); + if (hFileIn == INVALID_HANDLE_VALUE) + { + LogError("Failed to open input file '%s'. GetLastError()=%u", fileName, GetLastError()); + return -1; + } + + LARGE_INTEGER fileSize; + if (GetFileSizeEx(hFileIn, &fileSize) == 0) + { + LogError("GetFileSizeEx on '%s' failed. GetLastError()=%u", fileName, GetLastError()); + result = -1; + goto CLEAN_UP; + } + + for (LONGLONG offset = 0; offset < fileSize.QuadPart; offset += bufferSize) + { + DWORD bytesRead = -1; + BOOL res = ReadFile(hFileIn, buffer, (DWORD)bufferSize, &bytesRead, nullptr); + if (!res) + { + LogError("Failed to read '%s' from offset %lld. GetLastError()=%u", fileName, offset, GetLastError()); + result = -1; + goto CLEAN_UP; + } + DWORD bytesWritten = -1; + BOOL res2 = WriteFile(hFileOut, buffer, bytesRead, &bytesWritten, nullptr); + if (!res2) + { + LogError("Failed to write output file at offset %lld. GetLastError()=%u", offset, GetLastError()); + result = -1; + goto CLEAN_UP; + } + if (bytesRead != bytesWritten) + { + LogError("Failed to read/write matching bytes %u!=%u", bytesRead, bytesWritten); + result = -1; + goto CLEAN_UP; + } + } + +CLEAN_UP: + + if (CloseHandle(hFileIn) == 0) + { + LogError("CloseHandle failed. GetLastError()=%u", GetLastError()); + result = -1; + } + + return result; +} + +// Return true if this is a directory +// +// static +bool verbMerge::DirectoryFilterDirectories(WIN32_FIND_DATAA* findData) +{ + if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) + { + // It's a directory. See if we want to exclude it because of other reasons, such as: + // 1. reparse points: avoid the possibility of loops + // 2. system directories + // 3. hidden directories + // 4. "." or ".." + +#ifndef FEATURE_PAL // FILE_ATTRIBUTE_REPARSE_POINT is not defined in the PAL + if ((findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0) + return false; +#endif // !FEATURE_PAL + if ((findData->dwFileAttributes & FILE_ATTRIBUTE_SYSTEM) != 0) + return false; + if ((findData->dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) != 0) + return false; + + if (strcmp(findData->cFileName, ".") == 0) + return false; + if (strcmp(findData->cFileName, "..") == 0) + return false; + + return true; + } + + return false; +} + +// Return true if this is a file. +// +// static +bool verbMerge::DirectoryFilterFile(WIN32_FIND_DATAA* findData) +{ + if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) + { + // This is not a directory, so it must be a file. + return true; + } + + return false; +} + +// static +int __cdecl verbMerge::WIN32_FIND_DATAA_qsort_helper(const void* p1, const void* p2) +{ + const WIN32_FIND_DATAA* file1 = (WIN32_FIND_DATAA*)p1; + const WIN32_FIND_DATAA* file2 = (WIN32_FIND_DATAA*)p2; + return strcmp(file1->cFileName, file2->cFileName); +} + +// Enumerate a directory for the files specified by "searchPattern". For each element in the directory, +// pass it to the filter function. If the filter returns true, we keep it, otherwise we ignore it. Return +// an array of information for the files that we kept, sorted by filename. +// +// Returns 0 on success, non-zero on failure. +// If success, fileArray and elemCount are set. +// +// static +int verbMerge::FilterDirectory(const char* searchPattern, DirectoryFilterFunction_t filter, /* out */ WIN32_FIND_DATAA** ppFileArray, int* pElemCount) +{ + // First, build up a list, then create an array and sort it after we know how many elements there are. + struct findDataList + { + findDataList(WIN32_FIND_DATAA* newFindData, findDataList* newNext) + : findData(*newFindData) + , next(newNext) + { + } + + static void DeleteList(findDataList* root) + { + for (findDataList* loop = root; loop != nullptr; ) + { + findDataList* tmp = loop; + loop = loop->next; + delete tmp; + } + } + + WIN32_FIND_DATAA findData; + findDataList* next; + }; + + WIN32_FIND_DATAA* retArray = nullptr; + findDataList* first = nullptr; + + int result = 0; // default to zero == success + int elemCount = 0; + + // NOTE: this function only works on Windows 7 and later. + WIN32_FIND_DATAA findData; + HANDLE hSearch; +#ifdef FEATURE_PAL + // PAL doesn't have FindFirstFileEx(). So just use FindFirstFile(). The only reason we use + // the Ex version is potentially better performance (don't populate short name; use large fetch), + // not functionality. + hSearch = FindFirstFileA(searchPattern, &findData); +#else // !FEATURE_PAL + hSearch = FindFirstFileExA(searchPattern, + FindExInfoBasic, // We don't care about the short names + &findData, + FindExSearchNameMatch, // standard name matching + NULL, + FIND_FIRST_EX_LARGE_FETCH); +#endif // !FEATURE_PAL + + if (hSearch == INVALID_HANDLE_VALUE) + { + DWORD lastErr = GetLastError(); + if (lastErr == ERROR_FILE_NOT_FOUND) + { + // This is ok; there was just nothing matching the pattern. + } + else + { + LogError("Failed to find pattern '%s'. GetLastError()=%u", searchPattern, GetLastError()); + } + goto CLEAN_UP; + } + + while (true) + { + // Do something with findData... + + if (filter(&findData)) + { + // Prepend it to the list. + first = new findDataList(&findData, first); + ++elemCount; + } + + BOOL ok = FindNextFileA(hSearch, &findData); + if (!ok) + { + DWORD err = GetLastError(); + if (err != ERROR_NO_MORE_FILES) + { + LogError("Failed to find next file. GetLastError()=%u", GetLastError()); + result = -1; + goto CLEAN_UP; + } + break; + } + } + + // Now sort the list. Create an array to put everything in. + + int i; + + retArray = new WIN32_FIND_DATAA[elemCount]; + i = 0; + for (findDataList* tmp = first; tmp != nullptr; tmp = tmp->next) + { + retArray[i++] = tmp->findData; + } + + qsort(retArray, elemCount, sizeof(retArray[0]), WIN32_FIND_DATAA_qsort_helper); + +CLEAN_UP: + + findDataList::DeleteList(first); + + if ((hSearch != INVALID_HANDLE_VALUE) && !FindClose(hSearch)) + { + LogError("Failed to close search handle. GetLastError()=%u", GetLastError()); + delete[] retArray; + return -1; + } + + *ppFileArray = retArray; + *pElemCount = elemCount; + return result; +} + +// Append all files in the given directory matching the file pattern. +// +// static +int verbMerge::AppendAllInDir(HANDLE hFileOut, const char* dir, const char* file, unsigned char* buffer, size_t bufferSize, bool recursive, /* out */ LONGLONG* size) +{ + int result = 0; // default to zero == success + LONGLONG totalSize = 0; + + char* searchPattern = MergePathStrings(dir, file); + + WIN32_FIND_DATAA* fileArray = nullptr; + int elemCount = 0; + result = FilterDirectory(searchPattern, DirectoryFilterFile, &fileArray, &elemCount); + if (result != 0) + { + goto CLEAN_UP; + } + + for (int i = 0; i < elemCount; i++) + { + const WIN32_FIND_DATAA& findData = fileArray[i]; + char* fileFullPath = MergePathStrings(dir, findData.cFileName); + + // Is it zero length? If so, skip it. + if ((findData.nFileSizeLow == 0) && (findData.nFileSizeHigh == 0)) + { + LogInfo("Skipping zero-length file '%s'", fileFullPath); + } + else + { + result = AppendFile(hFileOut, fileFullPath, buffer, bufferSize); + if (result != 0) + { + // Error was already logged. + delete[] fileFullPath; + goto CLEAN_UP; + } + } + + delete[] fileFullPath; + totalSize += ((LONGLONG)findData.nFileSizeHigh << 32) + (LONGLONG)findData.nFileSizeLow; + } + + // If we need to recurse, then search the directory again for directories, and recursively search each one. + if (recursive) + { + delete[] searchPattern; + delete[] fileArray; + + searchPattern = MergePathStrings(dir, "*"); + fileArray = nullptr; + elemCount = 0; + result = FilterDirectory(searchPattern, DirectoryFilterDirectories, &fileArray, &elemCount); + if (result != 0) + { + goto CLEAN_UP; + } + + LONGLONG dirSize = 0; + for (int i = 0; i < elemCount; i++) + { + const WIN32_FIND_DATAA& findData = fileArray[i]; + + char* fileFullPath = MergePathStrings(dir, findData.cFileName); + result = AppendAllInDir(hFileOut, fileFullPath, file, buffer, bufferSize, recursive, &dirSize); + delete[] fileFullPath; + if (result != 0) + { + // Error was already logged. + goto CLEAN_UP; + } + + totalSize += dirSize; + } + } + +CLEAN_UP: + + delete[] searchPattern; + delete[] fileArray; + + if (result == 0) + { + *size = totalSize; + } + + return result; +} + +// Merge a set of .MC files into an output .MCH file. The .MC files to merge are given as a pattern, one of: +// 1. *.mc -- simple pattern. Assumes current directory. +// 2. foo\bar\*.mc -- simple pattern with relative directory. +// 3. c:\foo\bar\baz\*.mc -- simple pattern with full path. +// If no pattern is given, then the last component of the path is expected to be a directory name, and the pattern is assumed to be "*" (that is, all files). +// +// If "recursive" is true, then the pattern is searched for in the specified directory (or implicit current directory) and +// all sub-directories, recursively. +// +// static +int verbMerge::DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive) +{ + int result = 0; // default to zero == success + SimpleTimer st1; + + LogInfo("Merging files matching '%s' into '%s'", pattern, nameOfOutputFile); + + HANDLE hFileOut = CreateFileA(nameOfOutputFile, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_SEQUENTIAL_SCAN, NULL); + if (hFileOut == INVALID_HANDLE_VALUE) + { + LogError("Failed to open output file '%s'. GetLastError()=%u", nameOfOutputFile, GetLastError()); + return -1; + } + + // Create a buffer we can use for all the copies. + unsigned char* buffer = new unsigned char[BUFFER_SIZE]; + char* dir = nullptr; + const char* file = nullptr; + + dir = _strdup(pattern); + char* lastSlash = strrchr(dir, DIRECTORY_SEPARATOR_CHAR_A); + if (lastSlash == NULL) + { + // The user may have passed a relative path without a slash, or the current directory. + // If there is a wildcard, we use it as the file pattern. If there isn't, we assume it's a relative directory name + // and use it as a directory, with "*" as the file pattern. + const char* wildcard = strchr(dir, '*'); + if (wildcard == NULL) + { + file = "*"; + } + else + { + file = dir; + dir = _strdup("."); + } + } + else + { + const char* wildcard = strchr(lastSlash, '*'); + if (wildcard == NULL) + { + file = "*"; + + // Minor canonicalization: if there is a trailing last slash, strip it (probably should do this in a loop...) + if (*(lastSlash + 1) == '\0') + { + *lastSlash = '\0'; + } + } + else + { + // ok, we found a wildcard after the last slash, so assume there is a pattern. Strip it at the last slash. + *lastSlash = '\0'; + file = lastSlash + 1; + } + } + + LONGLONG totalSize = 0; + LONGLONG dirSize = 0; + + st1.Start(); + + result = AppendAllInDir(hFileOut, dir, file, buffer, BUFFER_SIZE, recursive, &dirSize); + if (result != 0) + { + goto CLEAN_UP; + } + totalSize += dirSize; + + st1.Stop(); + + LogInfo("Read/Wrote %lld MB @ %4.2f MB/s.", + totalSize/(1000*1000), + (((double)totalSize)/(1000*1000))/st1.GetSeconds()); //yes yes.. http://en.wikipedia.org/wiki/Megabyte_per_second#Megabyte_per_second + +CLEAN_UP: + + free((void*)dir); + delete[] buffer; + + if (CloseHandle(hFileOut) == 0) + { + LogError("CloseHandle failed. GetLastError()=%u", GetLastError()); + result = -1; + } + + if (result != 0) + { + // There was a failure. Delete the output file, to avoid leaving some half-created file. + BOOL ok = DeleteFileA(nameOfOutputFile); + if (!ok) + { + LogError("Failed to delete file after MCS /merge failed. GetLastError()=%u", GetLastError()); + } + } + + return result; +} |