From 10ad17e525b7c5f6432e3c1b0a453d291e5d78b6 Mon Sep 17 00:00:00 2001 From: John Chen Date: Mon, 20 Apr 2015 08:36:31 -0700 Subject: Enable build crossgen.exe from build.cmd. Adds and modifies CMakeLists.txt files to enable building of crossgen.exe from build.cmd for x64 processor on Windows. Also adds a step in build.cmd to generate native image for mscorlib. [tfs-changeset: 1456454] --- src/tools/CMakeLists.txt | 1 + src/tools/crossgen/CMakeLists.txt | 67 +++ src/tools/crossgen/crossgen.cpp | 15 +- src/tools/util/.gitmirror | 1 + src/tools/util/consoleargs.cpp | 939 ++++++++++++++++++++++++++++++++++++++ src/tools/util/consoleargs.h | 71 +++ src/tools/util/file_can.h | 70 +++ src/tools/util/list.h | 26 ++ src/tools/util/tree.h | 243 ++++++++++ 9 files changed, 1426 insertions(+), 7 deletions(-) create mode 100644 src/tools/crossgen/CMakeLists.txt create mode 100644 src/tools/util/.gitmirror create mode 100644 src/tools/util/consoleargs.cpp create mode 100644 src/tools/util/consoleargs.h create mode 100644 src/tools/util/file_can.h create mode 100644 src/tools/util/list.h create mode 100644 src/tools/util/tree.h (limited to 'src/tools') diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt index d8f881a92e..d23b7e0b82 100644 --- a/src/tools/CMakeLists.txt +++ b/src/tools/CMakeLists.txt @@ -1,2 +1,3 @@ +add_subdirectory(crossgen) add_subdirectory(GenClrDebugResource) add_subdirectory(InjectResource) \ No newline at end of file diff --git a/src/tools/crossgen/CMakeLists.txt b/src/tools/crossgen/CMakeLists.txt new file mode 100644 index 0000000000..e7e2b65086 --- /dev/null +++ b/src/tools/crossgen/CMakeLists.txt @@ -0,0 +1,67 @@ +project(crossgen) + +include(${CLR_DIR}/crossgen.cmake) + +set(CMAKE_INCLUDE_CURRENT_DIR ON) +include_directories(../util) +include_directories(../../pal/prebuilt/corerror) + +set(crossgen_SOURCES crossgen.cpp ../util/consoleargs.cpp) +set(crossgen_RESOURCES Native.rc) + +add_definitions(-DFX_VER_INTERNALNAME_STR=crossgen.exe) +add_definitions(-DNO_NGENPDB) + +if(CLR_CMAKE_PLATFORM_UNIX) + # This does not compile on Linux yet + if(CAN_BE_COMPILED_ON_LINUX) + add_executable(crossgen + ${crossgen_SOURCES} + ${crossgen_RESOURCES} + ) + endif(CAN_BE_COMPILED_ON_LINUX) + +else() + add_executable(crossgen + ${crossgen_SOURCES} + ${crossgen_RESOURCES} + ) + + target_link_libraries(crossgen + advapi32 + ole32 + oleaut32 + uuid + user32 + version + shlwapi + bcrypt + corguids + utilcode_crossgen + corzap_crossgen + jit_crossgen + gcinfo_crossgen + strongname_crossgen + mdcompiler_crossgen + mdwinmd_crossgen + mdruntimerw_crossgen + mdhotdata_crossgen + mdruntime_crossgen + cee_crossgen + mscorlib_crossgen + v3binder_crossgen + ${STATIC_MT_CRT_LIB} + ) + + # Can't compile on linux yet so only add for windows + # add the install targets + install (TARGETS crossgen DESTINATION .) + + # We will generate PDB only for the debug configuration + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/$/crossgen.pdb DESTINATION PDB) + +endif(CLR_CMAKE_PLATFORM_UNIX) + +add_subdirectory(../../zap/crossgen ../../zap/crossgen) +add_subdirectory(../../vm/crossgen ../../vm/crossgen) +add_subdirectory(../../vm/crossgen_mscorlib ../../vm/crossgen_mscorlib) diff --git a/src/tools/crossgen/crossgen.cpp b/src/tools/crossgen/crossgen.cpp index d1bbba80af..db663f0c81 100644 --- a/src/tools/crossgen/crossgen.cpp +++ b/src/tools/crossgen/crossgen.cpp @@ -3,11 +3,6 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. // -// ==++== -// - -// -// ==--== // // TO DO: we currently use raw printf() for output. Maybe we need to pick up something like ngen's Output() handling // to handle multiple code pages, etc, better. @@ -143,9 +138,11 @@ void PrintUsageHelper() W(" /Platform_Resource_Roots \n") W(" - List of paths containing localized assembly directories\n") W(" /App_Paths - List of paths containing user-application assemblies and resources\n") +#ifndef NO_NGENPDB W(" /App_Ni_Paths \n") W(" - List of paths containing user-application native images\n") W(" - Must be used with /CreatePDB switch\n") +#endif // NO_NGENPDB #endif // FEATURE_CORECLR W(" /Platform_Assemblies_Paths\n") @@ -192,10 +189,12 @@ void PrintUsageHelper() #ifdef FEATURE_CORECLR W(" Size on Disk Parameters\n") W(" /NoMetaData - Do not copy metadata and IL into native image.\n") +#ifndef NO_NGENPDB W(" Debugging Parameters\n") W(" /CreatePDB [/lines [] ]\n") W(" When specifying /CreatePDB, the native image should be created\n") W(" first, and should be the path to the NI.") +#endif // NO_NGENPDB #endif // FEATURE_CORECLR ); } @@ -636,6 +635,7 @@ int _cdecl wmain(int argc, __in_ecount(argc) WCHAR **argv) argv++; argc--; } +#ifndef NO_NGENPDB else if (MatchParameter(*argv, W("App_Ni_Paths")) && (argc > 1)) { pwzAppNiPaths = argv[1]; @@ -644,6 +644,7 @@ int _cdecl wmain(int argc, __in_ecount(argc) WCHAR **argv) argv++; argc--; } +#endif // NO_NGENPDB #endif // FEATURE_CORECLR else if (MatchParameter(*argv, W("Platform_Assemblies_Paths")) && (argc > 1)) { @@ -663,7 +664,7 @@ int _cdecl wmain(int argc, __in_ecount(argc) WCHAR **argv) argc--; } #endif // FEATURE_COMINTEROP -#ifdef FEATURE_CORECLR +#if defined(FEATURE_CORECLR) && !defined(NO_NGENPDB) else if (MatchParameter(*argv, W("CreatePDB")) && (argc > 1)) { // syntax: /CreatePDB [/lines [] ] @@ -735,7 +736,7 @@ int _cdecl wmain(int argc, __in_ecount(argc) WCHAR **argv) argv--; argc++; } -#endif // FEATURE_CORECLR +#endif // FEATURE_CORECLR && !NO_NGENPDB else { if (argc == 1) diff --git a/src/tools/util/.gitmirror b/src/tools/util/.gitmirror new file mode 100644 index 0000000000..f507630f94 --- /dev/null +++ b/src/tools/util/.gitmirror @@ -0,0 +1 @@ +Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. \ No newline at end of file diff --git a/src/tools/util/consoleargs.cpp b/src/tools/util/consoleargs.cpp new file mode 100644 index 0000000000..ad316e7a55 --- /dev/null +++ b/src/tools/util/consoleargs.cpp @@ -0,0 +1,939 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#include "consoleargs.h" +#include + +typedef unsigned char byte; + +size_t SafeStrCopy( _In_ LPCWSTR wszSrc, _In_ size_t cchSrc, _Out_ LPWSTR wszDest, _In_ size_t cchDest) +{ + if (cchSrc == (size_t)-1) + cchSrc = wcslen(wszSrc); + + if (cchSrc >= cchDest) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + return 0; + } + + if (FAILED(StringCchCopyNW( wszDest, cchDest, wszSrc, cchSrc))) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + return 0; + } + return cchSrc; +} + +size_t SafeStrLower( _In_ LPCWSTR wszSrc, _In_ size_t cchSrc, _Out_ LPWSTR wszDest, _In_ size_t cchDest) +{ + if (cchSrc == (size_t)-1) + cchSrc = wcslen(wszSrc); + + if (cchSrc >= cchDest) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + return 0; + } + + SafeStrCopy(wszSrc, cchSrc, wszDest, cchDest); + _wcslwr_s((WCHAR*)wszDest, cchDest); + return wcslen(wszDest); +} + +inline int HexValue (WCHAR c) +{ + return (c >= '0' && c <= '9') ? c - '0' : (c & 0xdf) - 'A' + 10; +} + +// Get canonical file path from a user specified path. wszSrcfileName can include relative paths, etc. +// Much of this function was taken from csc.exe. +DWORD GetCanonFilePath(_In_z_ LPCWSTR wszSrcFileName, _Out_z_cap_(cchDestFileName) LPWSTR wszDestFileName, _In_ DWORD cchDestFileName, _In_ bool fPreserveSrcCasing) +{ + DWORD full_len; + WCHAR * full_path = new WCHAR[cchDestFileName]; // an intermediate buffer + WCHAR * temp_path = new WCHAR[cchDestFileName]; // Used if FindFile fails + WCHAR * full_cur; + WCHAR * out_cur; + WCHAR * out_end; + bool hasDrive = false; + + memset(full_path, 0, cchDestFileName * sizeof(WCHAR)); + out_cur = wszDestFileName; + out_end = out_cur + cchDestFileName; + if (wszSrcFileName != wszDestFileName) + *out_cur = L'\0'; + full_cur = full_path; + + // Replace '\\' with single backslashes in paths, because W_GetFullPathName fails to do this on win9x. + size_t i = 0; + size_t j = 0; + size_t length = wcslen(wszSrcFileName); + while (j= cchDestFileName) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + goto FAIL; + } + } + temp_path[i] = L'\0'; + + full_len = GetFullPathNameW(temp_path, cchDestFileName, full_path, NULL); + if (wszSrcFileName == wszDestFileName) + wszDestFileName[cchDestFileName-1] = L'\0'; + if (full_len == 0) { + goto FAIL; + } else if (full_len >= cchDestFileName) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + goto FAIL; + } + + // Allow only 1 ':' for drives and no long paths with "\\?\" + if (((full_path[0] >= L'a' && full_path[0] <= L'z') || + (full_path[0] >= L'A' && full_path[0] <= L'Z')) && + full_path[1] == L':') + hasDrive = true; + + // We don't allow colons (except after the drive letter) + // long paths beginning with "\\?\" + // devices beginning with "\\.\" + // or wildcards + // or characters 0-31 + if (wcschr( full_path + (hasDrive ? 2 : 0), L':') != NULL || + wcsncmp( full_path, L"\\\\?\\", 4) == 0 || + wcsncmp( full_path, L"\\\\.\\", 4) == 0 || + wcspbrk(full_path, L"?*\x1\x2\x3\x4\x5\x6\x7\x8\x9" + L"\xA\xB\xC\xD\xE\xF\x10\x11\x12\x13\x14\x15" + L"\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\0") != NULL) { + SetLastError(ERROR_INVALID_NAME); + goto FAIL; + } + + + if (hasDrive) { + size_t len = SafeStrLower( full_path, 3, out_cur, out_end - out_cur); + if (len == 0) + goto FAIL; + + full_cur += 3; + out_cur += len; + + } else if (full_path[0] == L'\\' && full_path[1] == L'\\') { + // Must be a UNC pathname, so lower-case the server and share + // since there's no known way to get the 'correct casing' + WCHAR * slash = wcschr(full_path + 2, L'\\'); + // slash should now point to the backslash between the server and share + if (slash == NULL || slash == full_path + 2) { + SetLastError(ERROR_INVALID_NAME); + goto FAIL; + } + + slash = wcschr(slash + 1, L'\\'); + if (slash == NULL) { + slash = full_path + wcslen(full_path); + } else if (slash[-1] == L'\\') { + // An empty share-name? + SetLastError(ERROR_INVALID_NAME); + goto FAIL; + } else + slash++; + // slash should now point to char after the slash after the share name + // or the end of the sharename if there's no trailing slash + + size_t len = SafeStrLower( full_path, slash - full_path, out_cur, out_end - out_cur); + if (len == 0) + goto FAIL; + + full_cur = slash; + out_cur += len; + + } else { + // Not a drive-leter path or a UNC path, so assume it's invalid + SetLastError(ERROR_INVALID_NAME); + goto FAIL; + } + + // We either have a lower-cased drive letter or a UNC name + // with it's trailing slash + // out_cur points to the trailing NULL + // full_cur points to the character after the slash + + // Now iterate over each element of the path and attempt to canonicalize it + // It's possible for this loop to never run + // (for strings like "C:\" or "\\unc\share" or "\\unc\share2\") + while (*full_cur) { + WIN32_FIND_DATAW find_data; + bool hasSlash = true; + WCHAR * slash = wcschr(full_cur, '\\'); + if (slash == NULL) { + // This means we're on the last element of the path + // so work with everything left in the string + hasSlash = false; + slash = full_cur + wcslen(full_cur); + } + + // Check to make sure we have enough room for the next part of the path + if (out_cur + (slash - full_cur) >= out_end) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + goto FAIL; + } + + // Copy over the next path part into the output buffer + // so we can run FindFile to get the correct casing/long filename + memcpy(out_cur, full_cur, (BYTE*)slash - (BYTE*)full_cur); + out_cur[slash - full_cur] = L'\0'; + HANDLE hFind = FindFirstFileW(wszDestFileName, &find_data); + if (hFind == INVALID_HANDLE_VALUE) { + size_t temp_len; + + // We coundn't find the file, the general causes are the file doesn't exist + // or we don't have access to it. Either way we still try to get a canonical filename + // but preserve the passed in casing for the filename + + if (!hasSlash && fPreserveSrcCasing) { + // This is the last component in the filename, we should preserve the user's input text + // even if we can't find it + out_cur += slash - full_cur; + full_cur = slash; + break; + } + + // This will succeed even if we don't have access to the file + // (And on NT4 if the filename is already in 8.3 form) + temp_len = GetShortPathNameW(wszDestFileName, temp_path, cchDestFileName); + if (temp_len == 0) { + // GetShortPathName failed, we have no other way of figuring out the + // The filename, so just lowercase it so it hashes in a case-insensitive manner + + if (!hasSlash) { + // If it doesn't have a slash, then it must be the last part of the filename, + // so don't lowercase it, preserve whatever casing the user gave + temp_len = SafeStrCopy( full_cur, slash - full_cur, out_cur, out_end - out_cur); + } else { + temp_len = SafeStrLower( full_cur, slash - full_cur, out_cur, out_end - out_cur); + } + if (temp_len == 0) + goto FAIL; + + full_cur = slash; + out_cur += temp_len; + + } else if (temp_len >= cchDestFileName) { + // The short filename is longer than the whole thing? + // This shouldn't ever happen, right? + SetLastError(ERROR_FILENAME_EXCED_RANGE); + goto FAIL; + } else { + // GetShortPathName succeeded with a path that is less than BUFFER_LEN + // find the last slash and copy it. (We don't want to copy previous + // path components that we've already 'resolved') + // However, GetShortPathName doesn't always correct the casing + // so as a safe-guard, lower-case it (unless it's the last filename) + WCHAR * temp_slash = wcsrchr(temp_path, L'\\'); + + temp_slash++; + size_t len = 0; + if (!hasSlash) { + len = SafeStrCopy( temp_slash, -1, out_cur, out_end - out_cur); + } else { + len = SafeStrLower( temp_slash, -1, out_cur, out_end - out_cur); + } + if (len == 0) + goto FAIL; + + full_cur = slash; + out_cur += len; + + } + } else { + // Copy over the properly cased long filename + FindClose(hFind); + size_t name_len = wcslen(find_data.cFileName); + if (out_cur + name_len + (hasSlash ? 1 : 0) >= out_end) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + goto FAIL; + } + + // out_cur already has the filename with the input casing, so we can just leave it alone + // if this is not a directory name and the caller asked to perserve the casing + if (hasSlash || !fPreserveSrcCasing) { + memcpy(out_cur, find_data.cFileName, name_len * sizeof(WCHAR)); + } + else if (name_len != (slash - full_cur) || _wcsnicmp(find_data.cFileName, full_cur, name_len) != 0) { + // The user asked us to preserve the casing of the filename + // and the filename is different by more than just casing so report + // an error indicating we can't create the file + SetLastError(ERROR_FILE_EXISTS); + goto FAIL; + } + + out_cur += name_len; + full_cur = slash; + } + + if (hasSlash) { + if (out_cur + 1 >= out_end) { + SetLastError(ERROR_FILENAME_EXCED_RANGE); + goto FAIL; + } + full_cur++; + *out_cur++ = L'\\'; + } + *out_cur = '\0'; + } + + return (DWORD)(out_cur - wszDestFileName); + +FAIL: + if (full_path) + { + delete [] full_path; + } + if (temp_path) + { + delete [] temp_path; + } + return 0; +} + +bool FreeString(LPCWSTR szText) +{ + if (szText) + delete [] (const_cast(szText)); + return true; +} + +bool IsWhitespace(WCHAR c) +{ + return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r'; +} + +void ConsoleArgs::CleanUpArgs() +{ + while (m_listArgs) + { + WStrList * next = m_listArgs->next; + if (m_listArgs->arg) + delete [] m_listArgs->arg; + delete m_listArgs; + m_listArgs = next; + } + + if (m_rgArgs) + delete[] m_rgArgs; + + m_rgArgs = NULL; + + if(m_lastErrorMessage) + { + delete[] m_lastErrorMessage; + } +} + +bool ConsoleArgs::GetFullFileName(LPCWSTR szSource, __deref_out_ecount(cbFilenameBuffer) LPWSTR filenameBuffer, DWORD cbFilenameBuffer, bool fOutputFilename) +{ + if (0 == GetCanonFilePath( szSource, filenameBuffer, cbFilenameBuffer, fOutputFilename)) + { + if (filenameBuffer[0] == L'\0') + { + // This could easily fail because of an overflow, but that's OK + // we only want what will fit in the output buffer so we can print + // a good error message + StringCchCopyW(filenameBuffer, cbFilenameBuffer - 4, szSource); + // Don't cat on the ..., only stick it in the last 4 characters + // to indicate truncation (if the string is short than this it just won't print) + StringCchCopyW(filenameBuffer + cbFilenameBuffer - 4, 4, L"..."); + } + return false; + } + return true; +} + +// +// Clear previous error message if any and set the new one by copying into m_lastErrorMessage. +// We are responsible for freeing the memory destruction. +// +void ConsoleArgs::SetErrorMessage(__deref_in LPCWSTR pwzMessage) +{ + if (m_lastErrorMessage != nullptr) + { + delete[] m_lastErrorMessage; + } + m_errorOccured = true; + m_lastErrorMessage = new WCHAR[wcslen(pwzMessage) + 1]; + if (m_lastErrorMessage == nullptr) + { + // + // Out of memory allocating error string + // + m_lastErrorMessage = kOutOfMemory; + return; + } + + wcscpy_s(m_lastErrorMessage, wcslen(pwzMessage) + 1, pwzMessage); +} + +// +// Create a simple leaf tree node with the given text +// +b_tree * ConsoleArgs::MakeLeaf(LPCWSTR text) +{ + b_tree * t = NULL; + size_t name_len = wcslen(text) + 1; + LPWSTR szCopy = new WCHAR[name_len]; + + if (!szCopy) + { + return NULL; + } + + HRESULT hr; + hr = StringCchCopyW (szCopy, name_len, text); + + t = new b_tree(szCopy); + if (!t) + { + delete [] szCopy; + return NULL; + } + return t; +} + +// +// Free the memory allocated by the tree (recursive) +// +void ConsoleArgs::CleanupTree(b_tree *root) +{ + if (root == NULL) + return ; + root->InOrderWalk(FreeString); + delete root; +} + +// +// Search the binary tree and add the given string +// return true if it was added or false if it already +// exists +// +HRESULT ConsoleArgs::TreeAdd(b_tree **root, LPCWSTR add + ) +{ + // Special case - init the tree if it + // doesn't already exist + if (*root == NULL) + { + *root = MakeLeaf(add + ); + return *root == NULL ? E_OUTOFMEMORY : S_OK; + } + + size_t name_len = wcslen(add + ) + 1; + LPWSTR szCopy = new WCHAR[name_len]; + + if (!szCopy) + { + return NULL; + } + + HRESULT hr = StringCchCopyW (szCopy, name_len, add + ); + // otherwise, just let the template do the work + hr = (*root)->Add(szCopy, _wcsicmp); + + if (hr != S_OK) // S_FALSE means it already existed + delete [] szCopy; + + return hr; +} + +// +// Parse the text into a list of argument +// return the total count +// and set 'args' to point to the last list element's 'next' +// This function assumes the text is NULL terminated +// +void ConsoleArgs::TextToArgs(LPCWSTR szText, WStrList ** listReplace) +{ + WStrList **argLast; + const WCHAR *pCur; + size_t iSlash; + int iCount; + + argLast = listReplace; + pCur = szText; + iCount = 0; + + // Guaranteed that all tokens are no bigger than the entire file. + LPWSTR szTemp = new WCHAR[wcslen(szText) + 1]; + if (!szTemp) + { + return ; + } + while (*pCur != '\0') + { + WCHAR *pPut, *pFirst, *pLast; + WCHAR chIllegal; + +LEADINGWHITE: + while (IsWhitespace( *pCur) && *pCur != '\0') + pCur++; + + if (*pCur == '\0') + break; + else if (*pCur == L'#') + { + while ( *pCur != '\0' && *pCur != '\n') + pCur++; // Skip to end of line + goto LEADINGWHITE; + } + + // The treatment of quote marks is a bit different than the standard + // treatment. We only remove quote marks at the very beginning and end of the + // string. We still consider interior quotemarks for space ignoring purposes. + // All the below are considered a single argument: + // "foo bar" -> foo bar + // "foo bar";"baz" -> "foo bar";"baz" + // fo"o ba"r -> fo"o ba"r + // + // Additionally, in order to allow multi-line arguments we allow a ^ at the + // end of a line to specify "invisible space". A character sequence matching + // "\^(\r\n|\r|\n)[ \t]*" will be completely ignored (whether inside a quoted + // string or not). The below transformations occur (and represent a single + // argument): + // "foo ^ + // bar" -> foo bar + // foo;^ + // bar -> foo;bar + // Notes: + // 1. Comments are not recognized in a multi-line argument + // 2. A caret escapes only one new-line followed by an arbitrary number of + // tabs or blanks. + // The following will be parsed as the names suggest, into several different + // arguments: + // /option1 ^ + // val1_1;^ + // val1_2;^ + // val1_3;^ + // + // /option2 + // /opt^ + // ion3 -> /option1 val1_1;val1_2;val1_3; /option2 /option3 + int cQuotes = 0; + pPut = pFirst = szTemp; + chIllegal = 0; + while ((!IsWhitespace( *pCur) || !!(cQuotes & 1)) && *pCur != '\0') + { + switch (*pCur) + { + // All this weird slash stuff follows the standard argument processing routines + case L'\\': + iSlash = 0; + // Copy and advance while counting slashes + while (*pCur == L'\\') + { + *pPut++ = *pCur++; + iSlash++; + } + + // Slashes not followed by a quote character don't matter now + if (*pCur != L'\"') + break; + + // If there's an odd count of slashes, it's escaping the quote + // Otherwise the quote is a quote + if ((iSlash & 1) == 0) + { + ++cQuotes; + } + *pPut++ = *pCur++; + break; + + case L'\"': + ++cQuotes; + *pPut++ = *pCur++; + break; + + case L'^': + // ignore this sequence: \^[\r\n|\r|\n]( \t)* + if (pCur[1] == L'\r' || pCur[1] == L'\n') + { + if (pCur[1] == L'\r' && pCur[2] == L'\n') + pCur += 3; + else + pCur += 2; + + while (*pCur == L' ' || *pCur == L'\t') + ++pCur; + } + else + { + *pPut++ = *pCur++; // Copy the caret and advance + } + break; + + case L'\x01': + case L'\x02': + case L'\x03': + case L'\x04': + case L'\x05': + case L'\x06': + case L'\x07': + case L'\x08': + case L'\x09': + case L'\x0A': + case L'\x0B': + case L'\x0C': + case L'\x0D': + case L'\x0E': + case L'\x0F': + case L'\x10': + case L'\x11': + case L'\x12': + case L'\x13': + case L'\x14': + case L'\x15': + case L'\x16': + case L'\x17': + case L'\x18': + case L'\x19': + case L'\x1A': + case L'\x1B': + case L'\x1C': + case L'\x1D': + case L'\x1E': + case L'\x1F': + case L'|': + // Save the first legal character and skip over them + if (chIllegal == 0) + chIllegal = *pCur; + pCur++; + break; + + default: + *pPut++ = *pCur++; // Copy the char and advance + break; + } + } + + pLast = pPut; + *pPut++ = '\0'; + + // If the string is surrounded by quotes, with no interior quotes, remove them. + if (cQuotes == 2 && *pFirst == L'\"' && *(pLast - 1) == L'\"') + { + ++pFirst; + --pLast; + *pLast = L'\0'; + } + + if (chIllegal != 0) + { + SetErrorMessage(L"Illegal option character."); + break; + } + + size_t cchLen = pLast - pFirst + 1; + WCHAR * szArgCopy = new WCHAR[cchLen]; + if (!szArgCopy || FAILED(StringCchCopyW(szArgCopy, cchLen, pFirst))) + { + SetErrorMessage(L"Out of memory."); + break; + } + WStrList * listArgNew = new WStrList( szArgCopy, (*argLast)); + if (!listArgNew) + { + SetErrorMessage(L"Out of memory."); + break; + } + + *argLast = listArgNew; + argLast = &listArgNew->next; + } + + delete szTemp; + +} + +// +// Pass in the command line args, argc and argv +// +// We expand any response files that may be contained in the args and return a new +// set of args, pargc2 and pppargv2 that contain the full flat command line. +// +bool ConsoleArgs::ExpandResponseFiles(__in int argc, __deref_in_ecount(argc) const LPCWSTR * argv, __deref_out int * pargc2, __out LPWSTR ** pppargv2) +{ + *pargc2 = 0; + *pppargv2 = NULL; + WStrList **argLast = &m_listArgs; + while (argc > 0) + { + // Make a copy of the original var args so we can just delete[] everything + // once parsing is done: original args and new args from response files + // mixed in amongst the originals. + LPWSTR copyArg = new WCHAR[wcslen(argv[0]) + 1]; + if (!copyArg) + { + SetErrorMessage(L"Out of memory."); + return false; + } + wcscpy_s(copyArg, wcslen(argv[0]) + 1, argv[0]); + + WStrList * listArgNew = new WStrList(copyArg, (*argLast)); + if (!listArgNew) + { + SetErrorMessage(L"Out of memory."); + return false; + } + + *argLast = listArgNew; + argLast = &listArgNew->next; + + argc--; + argv++; + } + + // Process Response Files + ProcessResponseArgs(); + if (m_errorOccured) + return false; + + // Now convert to an argc/argv form for remaining processing. + int newArgc = 0; + for (WStrList * listCurArg = m_listArgs; listCurArg != NULL; listCurArg = listCurArg->next) + { + if (listCurArg->arg) + ++newArgc; + } + + m_rgArgs = new LPWSTR[newArgc]; + if (!m_rgArgs) + { + SetErrorMessage(L"Out of memory."); + return false; + } + int i = 0; + for (WStrList * listCurArg = m_listArgs; listCurArg != NULL; listCurArg = listCurArg->next) + { + if (listCurArg->arg) + { + LPWSTR newString = new WCHAR[wcslen(listCurArg->arg) + 1]; + wcscpy_s(newString, wcslen(listCurArg->arg) + 1, listCurArg->arg); + m_rgArgs[i++] = newString; + } + } + + *pargc2 = newArgc; + *pppargv2 = m_rgArgs; + return !m_errorOccured; +} + +// +// Read file to end, converting to unicode +// ppwzTextBuffer is allocated. Caller is responsible for freeing +// +bool ConsoleArgs::ReadTextFile(LPCWSTR pwzFilename, __deref_out LPWSTR *ppwzTextBuffer) +{ + bool success = false; + char *bufA = nullptr; + WCHAR *bufW = nullptr; + + HANDLE hFile = CreateFile(pwzFilename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL); + if (hFile == INVALID_HANDLE_VALUE) + { + SetErrorMessage(L"Cannot open response file."); + goto ErrExit; + } + DWORD size = GetFileSize(hFile, NULL); + bufA = new char[size]; + + if (!bufA) + { + SetErrorMessage(L"Out of memory"); + goto ErrExit; + } + DWORD numRead = 0; + if (!ReadFile(hFile, bufA, size, &numRead, NULL) || numRead != size) + { + SetErrorMessage(L"Failure reading response file."); + goto ErrExit; + } + + char *postByteOrderMarks = bufA; + + // + // If there are Byte Order Marks, skip them make sure they are ones that don't + // require us to handle the wrong endianness + // + + byte byte0 = (byte)bufA[0]; + byte byte1 = (byte)bufA[1]; + byte byte2 = (byte)bufA[2]; + byte byte3 = (byte)bufA[3]; + + bool alreadyUtf16 = false; + + if (byte0 == 0xEF && byte1 == 0xBB && byte2 == 0xBF) + { + postByteOrderMarks += 3; + size -= 3; + } + else if (byte0 == 0xFF && byte1 == 0xFE) + { + postByteOrderMarks += 2; + size -= 2; + alreadyUtf16 = true; + } + else if (byte0 == 0xFE && byte1 == 0xFF) + { + SetErrorMessage(L"Invalid response file format. Use little endian encoding with Unicode"); + goto ErrExit; + } + else if (byte0 == 0xFF && byte1 == 0xFE && byte2 == 0x00 && byte3 == 0x00 || + byte0 == 0x00 && byte1 == 0x00 && byte2 == 0xFE && byte3 == 0xFF) + { + SetErrorMessage(L"Invalid response file format. Use ANSI, UTF-8, or UTF-16"); + goto ErrExit; + } + + if (alreadyUtf16) + { + // + // File is already formatted as UTF-16; just copy the bytes into the output buffer + // + int requiredSize = size + 2; // space for 2 nullptr bytes + + // Sanity check - requiredSize better be an even number since we're dealing with UTF-16 + if (requiredSize % 2 != 0) + { + SetErrorMessage(L"Response file corrupt. Expected UTF-16 encoding but we had an odd number of bytes"); + goto ErrExit; + } + + requiredSize /= 2; + + bufW = new WCHAR[requiredSize]; + if (!bufW) + { + SetErrorMessage(L"Out of memory"); + goto ErrExit; + } + + memcpy(bufW, postByteOrderMarks, size); + bufW[requiredSize - 1] = L'\0'; + } + else + { + // + // File is formated as ANSI or UTF-8 and needs converting to UTF-16 + // + int requiredSize = MultiByteToWideChar(CP_UTF8, 0, postByteOrderMarks, size, nullptr, 0); + bufW = new WCHAR[requiredSize + 1]; + if (!bufW) + { + SetErrorMessage(L"Out of memory"); + goto ErrExit; + } + + if (!MultiByteToWideChar(CP_UTF8, 0, postByteOrderMarks, size, bufW, requiredSize)) + { + SetErrorMessage(L"Failure reading response file."); + goto ErrExit; + } + + bufW[requiredSize] = L'\0'; + } + + *ppwzTextBuffer = bufW; + + success = true; + +ErrExit: + if (bufA) + { + delete[] bufA; + } + CloseHandle(hFile); + return success; +} + +/* + * Process Response files on the command line + */ +void ConsoleArgs::ProcessResponseArgs() +{ + HRESULT hr; + b_tree *response_files = NULL; + + WCHAR szFilename[MAX_PATH]; + + for (WStrList * listCurArg = m_listArgs; + listCurArg != NULL && !m_errorOccured; + listCurArg = listCurArg->next) + { + WCHAR * szArg = listCurArg->arg; + + // Skip everything except Response files + if (szArg == NULL || szArg[0] != '@') + continue; + + if (wcslen(szArg) == 1) + { + SetErrorMessage(L"No response file specified"); + goto CONTINUE; + } + + // Check for duplicates + if (!GetFullFileName(&szArg[1], szFilename, MAX_PATH, false)) + continue; + + + hr = TreeAdd(&response_files, szFilename); + if (hr == E_OUTOFMEMORY) + { + SetErrorMessage(L"Out of memory."); + goto CONTINUE; + } + else if (hr == S_FALSE) + { + SetErrorMessage(L"Duplicate response file."); + goto CONTINUE; + } + + LPWSTR pwzFileBuffer = nullptr; + if (!ReadTextFile(szFilename, &pwzFileBuffer)) + { + goto CONTINUE; + } + + LPWSTR szActualText = nullptr; + DWORD dwNumChars = ExpandEnvironmentStrings(pwzFileBuffer, NULL, 0); + LPWSTR szExpandedBuffer = new WCHAR[dwNumChars]; + if (szExpandedBuffer != nullptr) + { + DWORD dwRetVal = ExpandEnvironmentStrings(pwzFileBuffer, szExpandedBuffer, dwNumChars); + + if (dwRetVal != 0) + { + szActualText = szExpandedBuffer; + } + else + { + // Expand failed + + } + } + + TextToArgs(szActualText, &listCurArg->next); + +CONTINUE: // remove the response file argument, and continue to the next. + listCurArg->arg = NULL; + } + + CleanupTree(response_files); +} + diff --git a/src/tools/util/consoleargs.h b/src/tools/util/consoleargs.h new file mode 100644 index 0000000000..869cee3629 --- /dev/null +++ b/src/tools/util/consoleargs.h @@ -0,0 +1,71 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#ifndef __CONSOLEARGS_H__ +#define __CONSOLEARGS_H__ + +#include "list.h" +#include "tree.h" +#include + +typedef tree b_tree; +typedef list WStrList; + +const LPWSTR kOutOfMemory = L"Out of memory"; + +class ConsoleArgs +{ +public: + // Place the fully-qualified filename in the given output buffer + bool GetFullFileName(LPCWSTR szSource, __deref_out_ecount(cbFilenameBuffer) LPWSTR filenameBuffer, DWORD cbFilenameBuffer, bool fOutputFilename); + + ConsoleArgs() : + m_rgArgs(NULL), + m_listArgs(NULL), + m_errorOccured(false), + m_lastErrorMessage(nullptr) + { + }; + + ~ConsoleArgs() + { + CleanUpArgs(); + }; + + // returns false if there are errors + bool ExpandResponseFiles(__in int argc, __deref_in_ecount(argc) const LPCWSTR * argv, __deref_out int * pargc2, __out LPWSTR ** pppargv2); + + // Frees all memory used by the arg list and the argv/argc array + void CleanUpArgs(); + + LPWSTR ErrorMessage() + { + if (m_errorOccured) + { + return m_lastErrorMessage; + } + else + { + return nullptr; + } + } + +private: + void SetErrorMessage(__deref_in LPCWSTR pwzMessage); + b_tree * MakeLeaf( LPCWSTR szText); + void CleanupTree( b_tree * root); + HRESULT TreeAdd( b_tree ** root, LPCWSTR szAdd); + void TextToArgs( LPCWSTR szText, WStrList ** listReplace); + bool ReadTextFile(LPCWSTR pwzFilename, __deref_out LPWSTR *ppwzTextBuffer); + void ProcessResponseArgs(); + + LPWSTR * m_rgArgs; + WStrList * m_listArgs; + + bool m_errorOccured; + LPWSTR m_lastErrorMessage; +}; + +#endif // __CONSOLEARGS_H__ diff --git a/src/tools/util/file_can.h b/src/tools/util/file_can.h new file mode 100644 index 0000000000..a346a201e2 --- /dev/null +++ b/src/tools/util/file_can.h @@ -0,0 +1,70 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#ifndef __FILE_CAN_H__ +#define __FILE_CAN_H__ + +class CFileChecksum; + +enum FileType +{ + ftUnknown = 0, + ftUnicode, + ftSwappedUnicode, + ftUTF8, + ftASCII, + ftBinary +}; + +HANDLE OpenFileEx( LPCWSTR filename, DWORD *fileLen, LPCWSTR relPath = NULL, bool bWrite = false); +HRESULT ReadTextFile (PCWSTR pszFileName, UINT uiCodePage, WCAllocBuffer & textBuffer, FileType *fileType); +#if !defined(FEATURE_PAL) && !defined(CSEE) +// If you call ReadTextFile a lot you should create one HCRYPTPROV and pass it in to every call, otherwise +// ReadTextFile indirectly creates and destroys a new HCRYPTPROV for every call, which is slow and unnecessary. +// You can use CryptProvider to manage an HCRYPTPROV for you. +HRESULT ReadTextFile (PCWSTR pszFileName, UINT uiCodePage, WCAllocBuffer & textBuffer, FileType *fileType, CFileChecksum *pChecksum, HCRYPTPROV hCryptProv = NULL); +#endif + +// Src and Dest may be the same buffer +// Returns 0 for error (check via GetLastError()) or count of characters +// (not including NULL) copied to Dest. +// if fPreserveSrcCasing is set, ignores on-disk casing of filename (but still gets on-disk casing of directories) +// if fPreserveSrcCasing is set and and existing file matches with different short/longness it will fail +// and set the error code to ERROR_FILE_EXISTS +DWORD GetCanonFilePath(LPCWSTR wszSrcFileName, WCBuffer outBuffer, bool fPreserveSrcCasing); + +// GetCanonFilePath uses a cache to eliminate redundant calls to FindFirstFile. This cache +// is global and is thus long lived. The IDE would like to minimize memory impact, so +// ClearGetCanonFilePathCache is provided here for them to clear the cache when appropriate. +void ClearGetCanonFilePathCache(); + +// Remove quote marks from a string. +// Translation is done in-place +LPWSTR RemoveQuotes(WCBuffer textBuffer); + +// Remove quote marks from a string. +// Replace various characters with other illegal characters if unquoted. +// Translation is done in-place. +LPWSTR RemoveQuotesAndReplaceComma(WCBuffer textBuffer); // "," -> "|" +LPWSTR RemoveQuotesAndReplacePathDelim(WCBuffer textBuffer); // ",;" -> "|" +LPWSTR RemoveQuotesAndReplaceAlias(WCBuffer textBuffer); // ",;" -> "|" and "=" -> "\x1" + +// Safe version of ToLowerCase +// Gaurantees null termination even if buffer size is too small +inline PWSTR WINAPI SafeToLowerCase (PCWSTR pSrc, WCBuffer textBuffer) +{ + PWSTR returnValue = ToLowerCase(pSrc, textBuffer.GetData(), textBuffer.Count()); + if (textBuffer.Count() > 0) + { + textBuffer.SetAt(textBuffer.Count() - 1, 0); + } + return returnValue; +} + +// Joins a relative or absolute filename to the given path and stores the new +// filename in lpBuffer +bool MakePath( /*[in]*/LPCWSTR lpPath, /*[in]*/LPCWSTR lpFileName, WCBuffer pathBuffer); + +#endif // __FILE_CAN_H__ diff --git a/src/tools/util/list.h b/src/tools/util/list.h new file mode 100644 index 0000000000..5dc0aa4c6a --- /dev/null +++ b/src/tools/util/list.h @@ -0,0 +1,26 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#ifndef __GENERIC_LIST_H__ +#define __GENERIC_LIST_H__ + +// Simple parameterized linked list +// with some good ctors +template +struct list +{ + _T arg; + list<_T> *next; + + list(_T t, list<_T> *n) + { + arg = t, next = n; + } + list() : arg(), next(NULL) + { + } +}; + +#endif // __GENERIC_LIST_H__ diff --git a/src/tools/util/tree.h b/src/tools/util/tree.h new file mode 100644 index 0000000000..527f718b00 --- /dev/null +++ b/src/tools/util/tree.h @@ -0,0 +1,243 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#ifndef __GENERIC_TREE_H__ +#define __GENERIC_TREE_H__ + +#include + +// Partially balanced binary tree +// it does individual rotations on insertion, but does nto allow deletion. +// thus the worst case depth is not (n), but (n/2) +// Generic paramter is the element type +// Find and Add require a method that compares 2 elements +template +struct tree +{ + _E name; + tree<_E> *lChild; + tree<_E> *rChild; + size_t lDepth; + size_t rDepth; + + tree(_E e) + { + name = e; + lChild = rChild = NULL; + lDepth = rDepth = 0; + } + ~tree() + { + Cleanup(); + } + + bool InOrderWalk( bool (WalkFunc)(_E)) + { + if (lChild != NULL && !lChild->InOrderWalk(WalkFunc)) + return false; + if (!WalkFunc(name)) + return false; + if (rChild != NULL) + return rChild->InOrderWalk(WalkFunc); + return true; + } + + /* + * return the depths of the tree from here down (minimum of 1) + */ + size_t MaxDepth() + { + return lDepth > rDepth ? lDepth + 1 : rDepth + 1; + } + + /* + * Search the binary tree for the given string + * return a pointer to it was added or NULL if it + * doesn't exist + */ + _E * Find(_E SearchVal, int (__cdecl CompFunc)(_E, _E)) + { + int cmp = CompFunc(name, SearchVal); + if (cmp < 0) + { + if (lChild == NULL) + return NULL; + else + return lChild->Find(SearchVal, CompFunc); + } + else if (cmp > 0) + { + if (rChild == NULL) + return NULL; + else + return rChild->Find(SearchVal, CompFunc); + } + else + return &name; + } + + /* + * Search the binary tree and add the given string + * return S_OK if it was added or S_FALSE if it already + * exists (or E_OUTOFMEMORY) + */ + HRESULT Add(_E add + , int (__cdecl CompFunc)(_E, _E)) + { + int cmp = CompFunc(name, add + ); +REDO: + if (cmp == 0) + return S_FALSE; + + if (cmp < 0) + { + if (lChild == NULL) + { + lDepth = 1; + lChild = new tree<_E>(add + ); + if (lChild == NULL) + return E_OUTOFMEMORY; + return S_OK; + } + else if (rDepth < lDepth) + { + tree<_E> *temp = new tree<_E>(name); + if (temp == NULL) + return E_OUTOFMEMORY; + temp->rChild = rChild; + temp->rDepth = rDepth; + if (lChild != NULL && + (cmp = CompFunc(lChild->name, add + )) > 0) + { + // push right + temp->lChild = NULL; + temp->lDepth = 0; + name = add + ; + rChild = temp; + rDepth++; + return S_OK; + } + else if (cmp == 0) + { + temp->rChild = NULL; + delete temp; + return S_FALSE; + } + else + { + // Rotate right + temp->lChild = lChild->rChild; + temp->lDepth = lChild->rDepth; + name = lChild->name; + lDepth = lChild->lDepth; + rDepth = temp->MaxDepth(); + rChild = temp; + temp = lChild->lChild; + lChild->lChild = lChild->rChild = NULL; + delete lChild; + lChild = temp; + goto REDO; + } + } + else + { + HRESULT hr = lChild->Add(add + , CompFunc); + lDepth = lChild->MaxDepth(); + return hr; + } + } + else + { + if (rChild == NULL) + { + rDepth = 1; + rChild = new tree<_E>(add + ); + if (rChild == NULL) + return E_OUTOFMEMORY; + return S_OK; + } + else if (lDepth < rDepth) + { + tree<_E> *temp = new tree<_E>(name); + if (temp == NULL) + return E_OUTOFMEMORY; + temp->lChild = lChild; + temp->lDepth = lDepth; + if (rChild != NULL && + (cmp = CompFunc(rChild->name, add + )) < 0) + { + // push left + temp->rChild = NULL; + temp->rDepth = 0; + name = add + ; + lChild = temp; + lDepth++; + return S_OK; + } + else if (cmp == 0) + { + temp->lChild = NULL; + delete temp; + return S_FALSE; + } + else + { + // Rotate left + temp->rChild = rChild->lChild; + temp->rDepth = rChild->lDepth; + name = rChild->name; + rDepth = rChild->rDepth; + lDepth = temp->MaxDepth(); + lChild = temp; + temp = rChild->rChild; + rChild->rChild = rChild->lChild = NULL; + delete rChild; + rChild = temp; + goto REDO; + } + } + else + { + HRESULT hr = rChild->Add(add + , CompFunc); + rDepth = rChild->MaxDepth(); + return hr; + } + } + } + + /* + * Free the memory allocated by the tree (recursive) + */ + void Cleanup() + { + if (this == NULL) + return ; + if (lChild != NULL) + { + lChild->Cleanup(); + delete lChild; + lChild = NULL; + } + if (rChild != NULL) + { + rChild->Cleanup(); + delete rChild; + rChild = NULL; + + } + } + +}; + +#endif // __GENERIC_TREE_H__ -- cgit v1.2.3