summaryrefslogtreecommitdiff
path: root/src/ToolBox/superpmi/mcs/verbmerge.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/ToolBox/superpmi/mcs/verbmerge.cpp')
-rw-r--r--src/ToolBox/superpmi/mcs/verbmerge.cpp470
1 files changed, 470 insertions, 0 deletions
diff --git a/src/ToolBox/superpmi/mcs/verbmerge.cpp b/src/ToolBox/superpmi/mcs/verbmerge.cpp
new file mode 100644
index 0000000000..c4acfd8769
--- /dev/null
+++ b/src/ToolBox/superpmi/mcs/verbmerge.cpp
@@ -0,0 +1,470 @@
+//
+// Copyright (c) Microsoft. All rights reserved.
+// Licensed under the MIT license. See LICENSE file in the project root for full license information.
+//
+
+#include "standardpch.h"
+#include "verbmerge.h"
+#include "simpletimer.h"
+#include "logging.h"
+
+// Do reads/writes in large 256MB chunks.
+#define BUFFER_SIZE 0x10000000
+
+// MergePathStrings: take two file system path components, compose them together, and return the merged pathname string.
+// The caller must delete the returned string with delete[].
+//
+// static
+char* verbMerge::MergePathStrings(const char* dir, const char* file)
+{
+ size_t dirlen = strlen(dir);
+ size_t filelen = strlen(file);
+ size_t newlen = dirlen + 1 /* slash */ + filelen + 1 /* null */;
+ char* newpath = new char[newlen];
+ strcpy(newpath, dir);
+ strcat(newpath, DIRECTORY_SEPARATOR_STR_A);
+ strcat(newpath, file);
+ return newpath;
+}
+
+// AppendFile: append the file named by 'fileName' to the output file referred to by 'hFileOut'. The 'hFileOut'
+// handle is assumed to be open, and the file position is assumed to be at the correct spot for writing, to append.
+//
+// 'buffer' is memory that can be used to do reading/buffering.
+//
+// static
+int verbMerge::AppendFile(HANDLE hFileOut, const char* fileName, unsigned char* buffer, size_t bufferSize)
+{
+ int result = 0; // default to zero == success
+
+ LogInfo("Appending file '%s'", fileName);
+
+ HANDLE hFileIn = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_SEQUENTIAL_SCAN, NULL);
+ if (hFileIn == INVALID_HANDLE_VALUE)
+ {
+ LogError("Failed to open input file '%s'. GetLastError()=%u", fileName, GetLastError());
+ return -1;
+ }
+
+ LARGE_INTEGER fileSize;
+ if (GetFileSizeEx(hFileIn, &fileSize) == 0)
+ {
+ LogError("GetFileSizeEx on '%s' failed. GetLastError()=%u", fileName, GetLastError());
+ result = -1;
+ goto CLEAN_UP;
+ }
+
+ for (LONGLONG offset = 0; offset < fileSize.QuadPart; offset += bufferSize)
+ {
+ DWORD bytesRead = -1;
+ BOOL res = ReadFile(hFileIn, buffer, (DWORD)bufferSize, &bytesRead, nullptr);
+ if (!res)
+ {
+ LogError("Failed to read '%s' from offset %lld. GetLastError()=%u", fileName, offset, GetLastError());
+ result = -1;
+ goto CLEAN_UP;
+ }
+ DWORD bytesWritten = -1;
+ BOOL res2 = WriteFile(hFileOut, buffer, bytesRead, &bytesWritten, nullptr);
+ if (!res2)
+ {
+ LogError("Failed to write output file at offset %lld. GetLastError()=%u", offset, GetLastError());
+ result = -1;
+ goto CLEAN_UP;
+ }
+ if (bytesRead != bytesWritten)
+ {
+ LogError("Failed to read/write matching bytes %u!=%u", bytesRead, bytesWritten);
+ result = -1;
+ goto CLEAN_UP;
+ }
+ }
+
+CLEAN_UP:
+
+ if (CloseHandle(hFileIn) == 0)
+ {
+ LogError("CloseHandle failed. GetLastError()=%u", GetLastError());
+ result = -1;
+ }
+
+ return result;
+}
+
+// Return true if this is a directory
+//
+// static
+bool verbMerge::DirectoryFilterDirectories(WIN32_FIND_DATAA* findData)
+{
+ if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0)
+ {
+ // It's a directory. See if we want to exclude it because of other reasons, such as:
+ // 1. reparse points: avoid the possibility of loops
+ // 2. system directories
+ // 3. hidden directories
+ // 4. "." or ".."
+
+#ifndef FEATURE_PAL // FILE_ATTRIBUTE_REPARSE_POINT is not defined in the PAL
+ if ((findData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) != 0)
+ return false;
+#endif // !FEATURE_PAL
+ if ((findData->dwFileAttributes & FILE_ATTRIBUTE_SYSTEM) != 0)
+ return false;
+ if ((findData->dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) != 0)
+ return false;
+
+ if (strcmp(findData->cFileName, ".") == 0)
+ return false;
+ if (strcmp(findData->cFileName, "..") == 0)
+ return false;
+
+ return true;
+ }
+
+ return false;
+}
+
+// Return true if this is a file.
+//
+// static
+bool verbMerge::DirectoryFilterFile(WIN32_FIND_DATAA* findData)
+{
+ if ((findData->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0)
+ {
+ // This is not a directory, so it must be a file.
+ return true;
+ }
+
+ return false;
+}
+
+// static
+int __cdecl verbMerge::WIN32_FIND_DATAA_qsort_helper(const void* p1, const void* p2)
+{
+ const WIN32_FIND_DATAA* file1 = (WIN32_FIND_DATAA*)p1;
+ const WIN32_FIND_DATAA* file2 = (WIN32_FIND_DATAA*)p2;
+ return strcmp(file1->cFileName, file2->cFileName);
+}
+
+// Enumerate a directory for the files specified by "searchPattern". For each element in the directory,
+// pass it to the filter function. If the filter returns true, we keep it, otherwise we ignore it. Return
+// an array of information for the files that we kept, sorted by filename.
+//
+// Returns 0 on success, non-zero on failure.
+// If success, fileArray and elemCount are set.
+//
+// static
+int verbMerge::FilterDirectory(const char* searchPattern, DirectoryFilterFunction_t filter, /* out */ WIN32_FIND_DATAA** ppFileArray, int* pElemCount)
+{
+ // First, build up a list, then create an array and sort it after we know how many elements there are.
+ struct findDataList
+ {
+ findDataList(WIN32_FIND_DATAA* newFindData, findDataList* newNext)
+ : findData(*newFindData)
+ , next(newNext)
+ {
+ }
+
+ static void DeleteList(findDataList* root)
+ {
+ for (findDataList* loop = root; loop != nullptr; )
+ {
+ findDataList* tmp = loop;
+ loop = loop->next;
+ delete tmp;
+ }
+ }
+
+ WIN32_FIND_DATAA findData;
+ findDataList* next;
+ };
+
+ WIN32_FIND_DATAA* retArray = nullptr;
+ findDataList* first = nullptr;
+
+ int result = 0; // default to zero == success
+ int elemCount = 0;
+
+ // NOTE: this function only works on Windows 7 and later.
+ WIN32_FIND_DATAA findData;
+ HANDLE hSearch;
+#ifdef FEATURE_PAL
+ // PAL doesn't have FindFirstFileEx(). So just use FindFirstFile(). The only reason we use
+ // the Ex version is potentially better performance (don't populate short name; use large fetch),
+ // not functionality.
+ hSearch = FindFirstFileA(searchPattern, &findData);
+#else // !FEATURE_PAL
+ hSearch = FindFirstFileExA(searchPattern,
+ FindExInfoBasic, // We don't care about the short names
+ &findData,
+ FindExSearchNameMatch, // standard name matching
+ NULL,
+ FIND_FIRST_EX_LARGE_FETCH);
+#endif // !FEATURE_PAL
+
+ if (hSearch == INVALID_HANDLE_VALUE)
+ {
+ DWORD lastErr = GetLastError();
+ if (lastErr == ERROR_FILE_NOT_FOUND)
+ {
+ // This is ok; there was just nothing matching the pattern.
+ }
+ else
+ {
+ LogError("Failed to find pattern '%s'. GetLastError()=%u", searchPattern, GetLastError());
+ }
+ goto CLEAN_UP;
+ }
+
+ while (true)
+ {
+ // Do something with findData...
+
+ if (filter(&findData))
+ {
+ // Prepend it to the list.
+ first = new findDataList(&findData, first);
+ ++elemCount;
+ }
+
+ BOOL ok = FindNextFileA(hSearch, &findData);
+ if (!ok)
+ {
+ DWORD err = GetLastError();
+ if (err != ERROR_NO_MORE_FILES)
+ {
+ LogError("Failed to find next file. GetLastError()=%u", GetLastError());
+ result = -1;
+ goto CLEAN_UP;
+ }
+ break;
+ }
+ }
+
+ // Now sort the list. Create an array to put everything in.
+
+ int i;
+
+ retArray = new WIN32_FIND_DATAA[elemCount];
+ i = 0;
+ for (findDataList* tmp = first; tmp != nullptr; tmp = tmp->next)
+ {
+ retArray[i++] = tmp->findData;
+ }
+
+ qsort(retArray, elemCount, sizeof(retArray[0]), WIN32_FIND_DATAA_qsort_helper);
+
+CLEAN_UP:
+
+ findDataList::DeleteList(first);
+
+ if ((hSearch != INVALID_HANDLE_VALUE) && !FindClose(hSearch))
+ {
+ LogError("Failed to close search handle. GetLastError()=%u", GetLastError());
+ delete[] retArray;
+ return -1;
+ }
+
+ *ppFileArray = retArray;
+ *pElemCount = elemCount;
+ return result;
+}
+
+// Append all files in the given directory matching the file pattern.
+//
+// static
+int verbMerge::AppendAllInDir(HANDLE hFileOut, const char* dir, const char* file, unsigned char* buffer, size_t bufferSize, bool recursive, /* out */ LONGLONG* size)
+{
+ int result = 0; // default to zero == success
+ LONGLONG totalSize = 0;
+
+ char* searchPattern = MergePathStrings(dir, file);
+
+ WIN32_FIND_DATAA* fileArray = nullptr;
+ int elemCount = 0;
+ result = FilterDirectory(searchPattern, DirectoryFilterFile, &fileArray, &elemCount);
+ if (result != 0)
+ {
+ goto CLEAN_UP;
+ }
+
+ for (int i = 0; i < elemCount; i++)
+ {
+ const WIN32_FIND_DATAA& findData = fileArray[i];
+ char* fileFullPath = MergePathStrings(dir, findData.cFileName);
+
+ // Is it zero length? If so, skip it.
+ if ((findData.nFileSizeLow == 0) && (findData.nFileSizeHigh == 0))
+ {
+ LogInfo("Skipping zero-length file '%s'", fileFullPath);
+ }
+ else
+ {
+ result = AppendFile(hFileOut, fileFullPath, buffer, bufferSize);
+ if (result != 0)
+ {
+ // Error was already logged.
+ delete[] fileFullPath;
+ goto CLEAN_UP;
+ }
+ }
+
+ delete[] fileFullPath;
+ totalSize += ((LONGLONG)findData.nFileSizeHigh << 32) + (LONGLONG)findData.nFileSizeLow;
+ }
+
+ // If we need to recurse, then search the directory again for directories, and recursively search each one.
+ if (recursive)
+ {
+ delete[] searchPattern;
+ delete[] fileArray;
+
+ searchPattern = MergePathStrings(dir, "*");
+ fileArray = nullptr;
+ elemCount = 0;
+ result = FilterDirectory(searchPattern, DirectoryFilterDirectories, &fileArray, &elemCount);
+ if (result != 0)
+ {
+ goto CLEAN_UP;
+ }
+
+ LONGLONG dirSize = 0;
+ for (int i = 0; i < elemCount; i++)
+ {
+ const WIN32_FIND_DATAA& findData = fileArray[i];
+
+ char* fileFullPath = MergePathStrings(dir, findData.cFileName);
+ result = AppendAllInDir(hFileOut, fileFullPath, file, buffer, bufferSize, recursive, &dirSize);
+ delete[] fileFullPath;
+ if (result != 0)
+ {
+ // Error was already logged.
+ goto CLEAN_UP;
+ }
+
+ totalSize += dirSize;
+ }
+ }
+
+CLEAN_UP:
+
+ delete[] searchPattern;
+ delete[] fileArray;
+
+ if (result == 0)
+ {
+ *size = totalSize;
+ }
+
+ return result;
+}
+
+// Merge a set of .MC files into an output .MCH file. The .MC files to merge are given as a pattern, one of:
+// 1. *.mc -- simple pattern. Assumes current directory.
+// 2. foo\bar\*.mc -- simple pattern with relative directory.
+// 3. c:\foo\bar\baz\*.mc -- simple pattern with full path.
+// If no pattern is given, then the last component of the path is expected to be a directory name, and the pattern is assumed to be "*" (that is, all files).
+//
+// If "recursive" is true, then the pattern is searched for in the specified directory (or implicit current directory) and
+// all sub-directories, recursively.
+//
+// static
+int verbMerge::DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive)
+{
+ int result = 0; // default to zero == success
+ SimpleTimer st1;
+
+ LogInfo("Merging files matching '%s' into '%s'", pattern, nameOfOutputFile);
+
+ HANDLE hFileOut = CreateFileA(nameOfOutputFile, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_SEQUENTIAL_SCAN, NULL);
+ if (hFileOut == INVALID_HANDLE_VALUE)
+ {
+ LogError("Failed to open output file '%s'. GetLastError()=%u", nameOfOutputFile, GetLastError());
+ return -1;
+ }
+
+ // Create a buffer we can use for all the copies.
+ unsigned char* buffer = new unsigned char[BUFFER_SIZE];
+ char* dir = nullptr;
+ const char* file = nullptr;
+
+ dir = _strdup(pattern);
+ char* lastSlash = strrchr(dir, DIRECTORY_SEPARATOR_CHAR_A);
+ if (lastSlash == NULL)
+ {
+ // The user may have passed a relative path without a slash, or the current directory.
+ // If there is a wildcard, we use it as the file pattern. If there isn't, we assume it's a relative directory name
+ // and use it as a directory, with "*" as the file pattern.
+ const char* wildcard = strchr(dir, '*');
+ if (wildcard == NULL)
+ {
+ file = "*";
+ }
+ else
+ {
+ file = dir;
+ dir = _strdup(".");
+ }
+ }
+ else
+ {
+ const char* wildcard = strchr(lastSlash, '*');
+ if (wildcard == NULL)
+ {
+ file = "*";
+
+ // Minor canonicalization: if there is a trailing last slash, strip it (probably should do this in a loop...)
+ if (*(lastSlash + 1) == '\0')
+ {
+ *lastSlash = '\0';
+ }
+ }
+ else
+ {
+ // ok, we found a wildcard after the last slash, so assume there is a pattern. Strip it at the last slash.
+ *lastSlash = '\0';
+ file = lastSlash + 1;
+ }
+ }
+
+ LONGLONG totalSize = 0;
+ LONGLONG dirSize = 0;
+
+ st1.Start();
+
+ result = AppendAllInDir(hFileOut, dir, file, buffer, BUFFER_SIZE, recursive, &dirSize);
+ if (result != 0)
+ {
+ goto CLEAN_UP;
+ }
+ totalSize += dirSize;
+
+ st1.Stop();
+
+ LogInfo("Read/Wrote %lld MB @ %4.2f MB/s.",
+ totalSize/(1000*1000),
+ (((double)totalSize)/(1000*1000))/st1.GetSeconds()); //yes yes.. http://en.wikipedia.org/wiki/Megabyte_per_second#Megabyte_per_second
+
+CLEAN_UP:
+
+ free((void*)dir);
+ delete[] buffer;
+
+ if (CloseHandle(hFileOut) == 0)
+ {
+ LogError("CloseHandle failed. GetLastError()=%u", GetLastError());
+ result = -1;
+ }
+
+ if (result != 0)
+ {
+ // There was a failure. Delete the output file, to avoid leaving some half-created file.
+ BOOL ok = DeleteFileA(nameOfOutputFile);
+ if (!ok)
+ {
+ LogError("Failed to delete file after MCS /merge failed. GetLastError()=%u", GetLastError());
+ }
+ }
+
+ return result;
+}