diff options
author | Adam Sitnik <adam.sitnik@gmail.com> | 2018-05-11 17:47:21 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-05-11 17:47:21 +0200 |
commit | c48c6ca2ee4f5b83bb3e064c4c132b2b15567e2a (patch) | |
tree | d75cea0a1f2a92967ada8a928ab8a3f76a3b764d /tests/src/performance | |
parent | 958b4e7a2fb18403c68bf7fc075b0c0429e29ddc (diff) | |
download | coreclr-c48c6ca2ee4f5b83bb3e064c4c132b2b15567e2a.tar.gz coreclr-c48c6ca2ee4f5b83bb3e064c4c132b2b15567e2a.tar.bz2 coreclr-c48c6ca2ee4f5b83bb3e064c4c132b2b15567e2a.zip |
Switch to Word2Vec fork (#17945)
* update git repo url, remove the patch file (not needed anymore)
* refactoring: there is no need to have an abstract class if only 1 class derives from it
* use CurrentCulture.NumberFormat.NumberDecimalSeparator instead of hardcoded dot (it's , for polish UI and it fails for me)
* use .NET Standard way of accessing CurrentCulture to fix the build
* updated repor url to our fork + removed dead code
Diffstat (limited to 'tests/src/performance')
-rw-r--r-- | tests/src/performance/Scenario/JitBench/Benchmarks/Word2VecBenchmark.cs (renamed from tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs) | 50 | ||||
-rw-r--r-- | tests/src/performance/Scenario/JitBench/JitBench.csproj | 3 | ||||
-rw-r--r-- | tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch | 605 | ||||
-rw-r--r-- | tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj | 7 |
4 files changed, 18 insertions, 647 deletions
diff --git a/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs b/tests/src/performance/Scenario/JitBench/Benchmarks/Word2VecBenchmark.cs index 88dd9d9237..6d6687d5ae 100644 --- a/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs +++ b/tests/src/performance/Scenario/JitBench/Benchmarks/Word2VecBenchmark.cs @@ -3,15 +3,20 @@ using System.Collections.Generic; using System.IO; using System.Text.RegularExpressions; using System.Threading.Tasks; -using System.Reflection; using Microsoft.Xunit.Performance.Api; using System.Runtime.InteropServices; +using System.Globalization; namespace JitBench { - class Word2VecBenchmark : MLBenchmark + class Word2VecBenchmark : Benchmark { - public Word2VecBenchmark() : base("Word2Vec") { } + private static readonly HashSet<int> DefaultExitCodes = new HashSet<int>(new[] { 0 }); + + public Word2VecBenchmark() : base("Word2Vec") + { + ExePath = "Word2VecScenario.dll"; + } public override bool IsArchitectureSupported(Architecture arch) { @@ -24,28 +29,10 @@ namespace JitBench //unless customers tell us its a problem'. I'm OK telling people not to use tiered jitting //if their app already uses most of the address space on x86, and having an intermitently //failing test in a perf suite won't give us useful info hence x64 only for this one. - return arch == Architecture.X64; - } - - protected override string ExecutableName => "Word2VecScenario.dll"; - - protected override string GetWord2VecNetSrcDirectory(string outputDir) - { - return Path.Combine(GetWord2VecNetRepoRootDir(outputDir), "Word2VecScenario"); - } - } - - abstract class MLBenchmark : Benchmark - { - private static readonly HashSet<int> DefaultExitCodes = new HashSet<int>(new[] { 0 }); - public MLBenchmark(string name) : base(name) - { - ExePath = ExecutableName; + return arch == Architecture.X64; } - protected abstract string ExecutableName { get; } - public override async Task Setup(DotNetInstallation dotNetInstall, string outputDir, bool useExistingSetup, ITestOutputHelper output) { if(!useExistingSetup) @@ -67,11 +54,8 @@ namespace JitBench string word2VecNetRepoRootDir = GetWord2VecNetRepoRootDir(outputDir); FileTasks.DeleteDirectory(word2VecNetRepoRootDir, output); - string word2VecPatchFullPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), Word2VecNetPatch); - await ExecuteGitCommand($"clone {Word2VecNetRepoUrl} {word2VecNetRepoRootDir}", output); await ExecuteGitCommand($"checkout {Word2VecNetCommitSha1Id}", output, workingDirectory: word2VecNetRepoRootDir); - await ExecuteGitCommand($"apply {word2VecPatchFullPath}", output, workingDirectory: word2VecNetRepoRootDir); } async Task ExecuteGitCommand(string arguments, ITestOutputHelper output, string workingDirectory = null) @@ -146,6 +130,7 @@ namespace JitBench double? trainingTime = null; double? firstSearchTime = null; double? steadyStateMedianTime = null; + var currentDecimalSeparator = CultureInfo.CurrentCulture.NumberFormat.NumberDecimalSeparator; using (var reader = new StringReader(stdout)) { @@ -166,7 +151,7 @@ namespace JitBench continue; } - match = Regex.Match(line, @"^Steadystate median search time: \s*(\d+\.\d+)ms$"); + match = Regex.Match(line, $@"^Steadystate median search time: \s*(\d+\{currentDecimalSeparator}\d+)ms$"); if (match.Success && match.Groups.Count == 2) { //many lines will match, but the final values of these variables will be from the last batch which is presumably the @@ -226,7 +211,10 @@ namespace JitBench return Path.Combine(outputDir, "W"); } - protected abstract string GetWord2VecNetSrcDirectory(string outputDir); + protected string GetWord2VecNetSrcDirectory(string outputDir) + { + return Path.Combine(GetWord2VecNetRepoRootDir(outputDir), "Word2VecScenario"); + } string GetWord2VecNetPublishDirectory(DotNetInstallation dotNetInstall, string outputDir, string tfm) { @@ -257,15 +245,11 @@ namespace JitBench return workspace; } - private const string Word2VecNetRepoUrl = "https://github.com/eabdullin/Word2Vec.Net"; - private const string Word2VecNetCommitSha1Id = "6012a2b5b886926918d51b1b56387d785115f448"; - private const string Word2VecNetPatch = "word2vecnet.patch"; - private const string EnvironmentFileName = "Word2VecNetEnvironment.txt"; - private const string StoreDirName = ".store"; + private const string Word2VecNetRepoUrl = "https://github.com/dotnet-perf-bot/Word2Vec.Net.git"; + private const string Word2VecNetCommitSha1Id = "bbf60216bd735ba2ccc3a54570ce735789968f2d"; private readonly Metric TrainingMetric = new Metric("Training", "ms"); private readonly Metric FirstSearchMetric = new Metric("First Search", "ms"); private readonly Metric MedianSearchMetric = new Metric("Median Search", "ms"); - private readonly Metric MeanSearchMetric = new Metric("Mean Search", "ms"); } } diff --git a/tests/src/performance/Scenario/JitBench/JitBench.csproj b/tests/src/performance/Scenario/JitBench/JitBench.csproj index 0d1f4fb0df..2e384d183e 100644 --- a/tests/src/performance/Scenario/JitBench/JitBench.csproj +++ b/tests/src/performance/Scenario/JitBench/JitBench.csproj @@ -54,8 +54,5 @@ Overwrite="true" Encoding="Unicode"/> </Target> - <Target Name="AfterBuild"> - <Copy SourceFiles="Resources\word2vecnet.patch" DestinationFolder="$(OutDir)" /> - </Target> </Project> diff --git a/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch b/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch deleted file mode 100644 index dbad57ba5f..0000000000 --- a/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch +++ /dev/null @@ -1,605 +0,0 @@ -diff --git a/.gitignore b/.gitignore -index 8098fe2..7c82f99 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -17,7 +17,6 @@ - [Rr]eleases/ - x64/ - x86/ --build/ - bld/ - [Bb]in/ - [Oo]bj/ -diff --git a/NuGet.config b/NuGet.config -new file mode 100644 -index 0000000..bd3a6f8 ---- /dev/null -+++ b/NuGet.config -@@ -0,0 +1,8 @@ -+<?xml version="1.0" encoding="utf-8"?> -+<configuration> -+ <packageSources> -+ <clear /> -+ <add key="dotnet core" value="https://dotnet.myget.org/F/dotnet-core/api/v3/index.json" /> -+ <add key="NuGet" value="https://api.nuget.org/v3/index.json" /> -+ </packageSources> -+</configuration> -\ No newline at end of file -diff --git a/Word2LibConsole/Word2LibConsole.vcxproj b/Word2LibConsole/Word2LibConsole.vcxproj -index 2caa5a0..03b8ada 100644 ---- a/Word2LibConsole/Word2LibConsole.vcxproj -+++ b/Word2LibConsole/Word2LibConsole.vcxproj -@@ -1,5 +1,5 @@ - <?xml version="1.0" encoding="utf-8"?> --<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> -+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> - <ItemGroup Label="ProjectConfigurations"> - <ProjectConfiguration Include="Debug|Win32"> - <Configuration>Debug</Configuration> -@@ -14,19 +14,19 @@ - <ProjectGuid>{9C719670-3571-4B68-A3DA-053B18C654A0}</ProjectGuid> - <Keyword>Win32Proj</Keyword> - <RootNamespace>Word2LibConsole</RootNamespace> -- <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion> -+ <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion> - </PropertyGroup> - <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> - <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> - <ConfigurationType>Application</ConfigurationType> - <UseDebugLibraries>true</UseDebugLibraries> -- <PlatformToolset>v140</PlatformToolset> -+ <PlatformToolset>v141</PlatformToolset> - <CharacterSet>Unicode</CharacterSet> - </PropertyGroup> - <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> - <ConfigurationType>Application</ConfigurationType> - <UseDebugLibraries>false</UseDebugLibraries> -- <PlatformToolset>v140</PlatformToolset> -+ <PlatformToolset>v141</PlatformToolset> - <WholeProgramOptimization>true</WholeProgramOptimization> - <CharacterSet>Unicode</CharacterSet> - </PropertyGroup> -diff --git a/Word2Vec.Net/Distance.cs b/Word2Vec.Net/Distance.cs -index f2c3cdc..32929cd 100644 ---- a/Word2Vec.Net/Distance.cs -+++ b/Word2Vec.Net/Distance.cs -@@ -46,7 +46,7 @@ namespace Word2Vec.Net - } - if (b == Words) b = -1; - bi[a] = b; -- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); -+ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); - if (b == -1) - { - Console.Write("Out of dictionary word!\n"); -@@ -99,4 +99,4 @@ namespace Word2Vec.Net - public string Word { get; set; } - public float Distance { get; set; } - } --} -\ No newline at end of file -+} -diff --git a/Word2Vec.Net/Properties/AssemblyInfo.cs b/Word2Vec.Net/Properties/AssemblyInfo.cs -deleted file mode 100644 -index 89452bf..0000000 ---- a/Word2Vec.Net/Properties/AssemblyInfo.cs -+++ /dev/null -@@ -1,36 +0,0 @@ --using System.Reflection; --using System.Runtime.CompilerServices; --using System.Runtime.InteropServices; -- --// General Information about an assembly is controlled through the following --// set of attributes. Change these attribute values to modify the information --// associated with an assembly. --[assembly: AssemblyTitle("Word2Vec.Net")] --[assembly: AssemblyDescription("")] --[assembly: AssemblyConfiguration("")] --[assembly: AssemblyCompany("")] --[assembly: AssemblyProduct("Word2Vec.Net")] --[assembly: AssemblyCopyright("Copyright © 2015")] --[assembly: AssemblyTrademark("")] --[assembly: AssemblyCulture("")] -- --// Setting ComVisible to false makes the types in this assembly not visible --// to COM components. If you need to access a type in this assembly from --// COM, set the ComVisible attribute to true on that type. --[assembly: ComVisible(false)] -- --// The following GUID is for the ID of the typelib if this project is exposed to COM --[assembly: Guid("b2bcc46d-a28b-40a4-a873-f0b1ffe65181")] -- --// Version information for an assembly consists of the following four values: --// --// Major Version --// Minor Version --// Build Number --// Revision --// --// You can specify all the values or you can default the Build and Revision Numbers --// by using the '*' as shown below: --// [assembly: AssemblyVersion("1.0.*")] --[assembly: AssemblyVersion("1.0.0.0")] --[assembly: AssemblyFileVersion("1.0.0.0")] -diff --git a/Word2Vec.Net/Word2Vec.Net.csproj b/Word2Vec.Net/Word2Vec.Net.csproj -index ee3ddb9..52cc678 100644 ---- a/Word2Vec.Net/Word2Vec.Net.csproj -+++ b/Word2Vec.Net/Word2Vec.Net.csproj -@@ -1,62 +1,26 @@ --<?xml version="1.0" encoding="utf-8"?> --<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> -- <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" /> -+<Project Sdk="Microsoft.NET.Sdk.Web"> -+ <Import Project="..\build\common.props" /> - <PropertyGroup> -- <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration> -- <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform> -- <ProjectGuid>{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}</ProjectGuid> -- <OutputType>Library</OutputType> -- <AppDesignerFolder>Properties</AppDesignerFolder> -- <RootNamespace>Word2Vec.Net</RootNamespace> -+ <Description>Word2Vec.Net</Description> -+ <TargetFramework>netcoreapp2.1</TargetFramework> -+ <DefineConstants>$(DefineConstants);DEMO</DefineConstants> -+ <WarningsAsErrors>true</WarningsAsErrors> -+ <PreserveCompilationContext>true</PreserveCompilationContext> - <AssemblyName>Word2Vec.Net</AssemblyName> -- <TargetFrameworkVersion>v4.5</TargetFrameworkVersion> -- <FileAlignment>512</FileAlignment> -+ <OutputType>library</OutputType> -+ -+ <!-- This will prevent our build system from trying to package this project. --> -+ <IsPackable>false</IsPackable> -+ -+ <!-- This will be set as an environment variable to pin the version. --> -+ <RuntimeFrameworkVersion>$(WORD2VEC_FRAMEWORK_VERSION)</RuntimeFrameworkVersion> - </PropertyGroup> -- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> -- <DebugSymbols>true</DebugSymbols> -- <DebugType>full</DebugType> -- <Optimize>false</Optimize> -- <OutputPath>bin\Debug\</OutputPath> -- <DefineConstants>DEBUG;TRACE</DefineConstants> -- <ErrorReport>prompt</ErrorReport> -- <WarningLevel>4</WarningLevel> -- <DocumentationFile>bin\Debug\Word2Vec.Net.XML</DocumentationFile> -- <PlatformTarget>AnyCPU</PlatformTarget> -+ -+ <PropertyGroup Condition=" '$(RuntimeFrameworkVersion)' == '' "> -+ <RuntimeFrameworkVersion>2.1.0-*</RuntimeFrameworkVersion> - </PropertyGroup> -- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' "> -- <DebugType>pdbonly</DebugType> -- <Optimize>true</Optimize> -- <OutputPath>bin\Release\</OutputPath> -- <DefineConstants>TRACE</DefineConstants> -- <ErrorReport>prompt</ErrorReport> -- <WarningLevel>4</WarningLevel> -+ -+ <PropertyGroup Condition=" '$(Configuration)' == 'Release' "> -+ <DefineConstants>$(DefineConstants);RELEASE</DefineConstants> - </PropertyGroup> -- <ItemGroup> -- <Reference Include="System" /> -- <Reference Include="System.Core" /> -- <Reference Include="System.Xml.Linq" /> -- <Reference Include="System.Data.DataSetExtensions" /> -- <Reference Include="Microsoft.CSharp" /> -- <Reference Include="System.Data" /> -- <Reference Include="System.Xml" /> -- </ItemGroup> -- <ItemGroup> -- <Compile Include="Distance.cs" /> -- <Compile Include="StringExtension.cs" /> -- <Compile Include="Utils\FileStreamExtensions.cs" /> -- <Compile Include="VocubWord.cs" /> -- <Compile Include="Word2Vec.cs" /> -- <Compile Include="Properties\AssemblyInfo.cs" /> -- <Compile Include="Word2VecAnalysisBase.cs" /> -- <Compile Include="Word2VecBuilder.cs" /> -- <Compile Include="WordAnalogy.cs" /> -- </ItemGroup> -- <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" /> -- <!-- To modify your build process, add your task inside one of the targets below and uncomment it. -- Other similar extension points exist, see Microsoft.Common.targets. -- <Target Name="BeforeBuild"> -- </Target> -- <Target Name="AfterBuild"> -- </Target> -- --> --</Project> -\ No newline at end of file -+</Project> -diff --git a/Word2Vec.Net/Word2Vec.cs b/Word2Vec.Net/Word2Vec.cs -index 968bf88..4142c7b 100644 ---- a/Word2Vec.Net/Word2Vec.cs -+++ b/Word2Vec.Net/Word2Vec.cs -@@ -57,7 +57,7 @@ namespace Word2Vec.Net - private const int TableSize = (int) 1e8; - private int[] _table; - -- internal Word2Vec( -+ public Word2Vec( - string trainFileName, - string outPutfileName, - string saveVocabFileName, -@@ -186,7 +186,7 @@ namespace Word2Vec.Net - for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; - int size = _vocabSize; - _trainWords = 0; -- for (var a = 0; a < size; a++) -+ /*for (var a = 0; a < size; a++) - { - // Words occuring less than min_count times will be discarded from the vocab - if (_vocab[a].Cn < _minCount && (a != 0)) -@@ -203,7 +203,7 @@ namespace Word2Vec.Net - _trainWords += _vocab[a].Cn; - } - } -- Array.Resize(ref _vocab, _vocabSize + 1); -+ Array.Resize(ref _vocab, _vocabSize + 1);*/ - - // Allocate memory for the binary tree construction - for (var a = 0; a < _vocabSize; a++) -@@ -331,56 +331,48 @@ namespace Word2Vec.Net - - private void LearnVocabFromTrainFile() - { -- int i; -- for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; -- using (var fin = File.OpenText(_trainFile)) -+ int i; -+ for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1; -+ string[] fin = System.IO.File.ReadAllLines(_trainFile); -+ _vocabSize = 0; -+ -+ Regex regex = new Regex("\\s"); -+ AddWordToVocab("</s>"); -+ foreach (string line in fin) -+ { -+ string[] words = regex.Split(line); -+ -+ foreach (var word in words) - { -- if (fin == StreamReader.Null) -+ if(string.IsNullOrWhiteSpace(word)) continue; -+ _trainWords++; -+ if ((_debugMode > 1) && (_trainWords%100000 == 0)) - { -- throw new InvalidOperationException("ERROR: training data file not found!\n"); -+ Console.Write("{0}K \r", _trainWords/1000); -+ //printf("%lldK%c", train_words / 1000, 13); -+ //fflush(stdout); - } -- _vocabSize = 0; -- -- string line; -- Regex regex = new Regex("\\s"); -- AddWordToVocab("</s>"); -- while ((line = fin.ReadLine()) != null) -- { -- if (fin.EndOfStream) break; -- string[] words = regex.Split(line); -- -- foreach (var word in words) -- { -- if(string.IsNullOrWhiteSpace(word)) continue; -- _trainWords++; -- if ((_debugMode > 1) && (_trainWords%100000 == 0)) -- { -- Console.Write("{0}K \r", _trainWords/1000); -- //printf("%lldK%c", train_words / 1000, 13); -- //fflush(stdout); -- } -- i = SearchVocab(word); -- if (i == -1) -+ i = SearchVocab(word); -+ if (i == -1) - { - var a = AddWordToVocab(word); - _vocab[a].Cn = 1; - } -- else -- _vocab[i].Cn++; -- if (_vocabSize > VocabHashSize*0.7) -- ReduceVocab(); -- } -- } -- SortVocab(); -- if (_debugMode > 0) -- { -- Console.WriteLine("Vocab size: {0}", _vocabSize); -- Console.WriteLine("Words in train file: {0}", _trainWords); -- } -- //file_size = ftell(fin); -- _fileSize = new FileInfo(_trainFile).Length; -+ else -+ _vocab[i].Cn++; -+ if (_vocabSize > VocabHashSize*0.7) -+ ReduceVocab(); - } - } -+ SortVocab(); -+ if (_debugMode > 0) -+ { -+ Console.WriteLine("Vocab size: {0}", _vocabSize); -+ Console.WriteLine("Words in train file: {0}", _trainWords); -+ } -+ //file_size = ftell(fin); -+ _fileSize = new FileInfo(_trainFile).Length; -+ } - - private void SaveVocab() - { -diff --git a/Word2Vec.Net/WordAnalogy.cs b/Word2Vec.Net/WordAnalogy.cs -index eaa35bf..8347c0f 100644 ---- a/Word2Vec.Net/WordAnalogy.cs -+++ b/Word2Vec.Net/WordAnalogy.cs -@@ -24,7 +24,7 @@ namespace Word2Vec.Net - for (b = 0; b < Words; b++) if (!new string(Vocab, (int)(b * max_w), (int)max_w).Equals(st[a])) break; - if (b == Words) b = -1; - bi[a] = b; -- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); -+ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]); - if (b == -1) - { - Console.Write("Out of dictionary word!\n"); -diff --git a/Word2VecScenario/App.config b/Word2VecScenario/App.config -new file mode 100644 -index 0000000..e8482b1 ---- /dev/null -+++ b/Word2VecScenario/App.config -@@ -0,0 +1,12 @@ -+<?xml version="1.0" encoding="utf-8" ?> -+<configuration> -+ -+ <runtime> -+ <gcAllowVeryLargeObjects enabled="true"/> -+ </runtime> -+ -+ <startup> -+ <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" /> -+ </startup> -+ -+</configuration> -\ No newline at end of file -diff --git a/Word2VecScenario/Corpus.txt.ReadMe.txt b/Word2VecScenario/Corpus.txt.ReadMe.txt -new file mode 100644 -index 0000000..82c04e5 ---- /dev/null -+++ b/Word2VecScenario/Corpus.txt.ReadMe.txt -@@ -0,0 +1,5 @@ -+Please download and rename the following file: -+ -+http://mattmahoney.net/dc/text8.zip -+ -+Renaming the file inside the zip to: Corpus.txt - In place of this File! -diff --git a/Word2VecScenario/Program.cs b/Word2VecScenario/Program.cs -new file mode 100644 -index 0000000..7e9ad31 ---- /dev/null -+++ b/Word2VecScenario/Program.cs -@@ -0,0 +1,161 @@ -+namespace Word2VecScenario -+{ -+ using System; -+ using System.Diagnostics; -+ using System.Linq; -+ using Word2Vec.Net; -+ -+ class Program -+ { -+ static string path = @"Word2VectorOutputFile.bin"; -+ static Distance distance = null; -+ static WordAnalogy wordAnalogy = null; -+ -+ static void Main(string[] args) -+ { -+ // -train <file> Use text data from <file> to train the model -+ string train = "Corpus.txt"; -+ -+ // -output <file> Use <file> to save the resulting word vectors / word clusters -+ string output = "Vectors.bin"; -+ -+ // -save-vocab <file> The vocabulary will be saved to <file> -+ string savevocab = ""; -+ -+ // -read-vocab <file> The vocabulary will be read from <file>, not constructed from the training data -+ string readvocab = ""; -+ -+ // -size <int> Set size of word vectors; default is 100 -+ int size = 100; -+ -+ // -debug <int> Set the debug mode (default = 2 = more info during training) -+ int debug = 1; -+ -+ // -binary <int> Save the resulting vectors in binary moded; default is 0 (off) -+ int binary = 1; -+ -+ // -cbow <int> Use the continuous bag of words model; default is 1 (use 0 for skip-gram model) -+ int cbow = 1; -+ -+ // -alpha <float> Set the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW -+ float alpha = 0.05f; -+ -+ // -sample <float> Set threshold for occurrence of words. Those that appear with higher frequency in the training data -+ float sample = 1e-4f; -+ -+ // -hs <int> Use Hierarchical Softmax; default is 0 (not used) -+ int hs = 0; -+ -+ // -negative <int> Number of negative examples; default is 5, common values are 3 - 10 (0 = not used) -+ int negative = 5; -+ -+ // -threads <int> Use <int> threads (default 12) -+ int threads = 12; -+ -+ // -iter <int> Run more training iterations (default 5) -+ long iter = 15; -+ -+ // -min-count <int> This will discard words that appear less than <int> times; default is 5 -+ int mincount = 5; -+ -+ // -classes <int> Output word classes rather than word vectors; default number of classes is 0 (vectors are written) -+ long classes = 0; -+ -+ // -window <int> Set max skip length between words; default is 5 -+ int window = 12; -+ -+ Word2Vec word2Vec = new Word2Vec(train, output, savevocab, readvocab, size, debug, binary, cbow, alpha, sample, hs, negative, threads, iter, mincount, classes, window); -+ -+ var totalTime = Stopwatch.StartNew(); -+ var highRes = Stopwatch.IsHighResolution; -+ -+ word2Vec.TrainModel(); -+ -+ totalTime.Stop(); -+ -+ var trainingTime = totalTime.ElapsedMilliseconds; -+ Console.WriteLine("Training took {0}ms", trainingTime); -+ -+ path = @"Vectors.bin"; -+ distance = new Distance(path); -+ wordAnalogy = new WordAnalogy(path); -+ -+ string[] wordList = new string[] {"paris france madrid" }; -+ -+ var searchTime = Stopwatch.StartNew(); -+ -+ foreach (string word in wordList) -+ { -+ distance.Search(word); -+ wordAnalogy.Search(word); -+ } -+ -+ searchTime.Stop(); -+ var firstSearchTime = searchTime.ElapsedMilliseconds; -+ Console.WriteLine("Search took {0}ms", firstSearchTime); -+ -+ int outerN = 5; -+ -+ for (int outer = 0; outer < outerN; outer++) -+ { -+ foreach (string word in wordList) -+ { -+ int N = 11; -+ var minSearchTime = long.MaxValue; -+ var maxSearchTime = long.MinValue; -+ long[] searchTimes = new long[N]; -+ -+ Console.WriteLine($"Batch {outer}, searching {word}: running {N} searches"); -+ -+ for (int inner = 0; inner < N; inner++) -+ { -+ searchTime.Restart(); -+ distance.Search(word); -+ BestWord[] result = wordAnalogy.Search(word); -+ searchTime.Stop(); -+ -+ /*foreach (var bestWord in result) -+ { -+ Console.WriteLine("{0}\t\t{1}", bestWord.Word, bestWord.Distance); -+ }*/ -+ -+ long interval = highRes ? searchTime.ElapsedTicks : searchTime.ElapsedMilliseconds; -+ searchTimes[inner] = interval; -+ -+ if (interval < minSearchTime) -+ { -+ minSearchTime = interval; -+ } -+ if (interval > maxSearchTime) -+ { -+ maxSearchTime = interval; -+ } -+ } -+ -+ if (highRes) -+ { -+ double averageSearch = 1000 * ((double)searchTimes.Sum() / N / Stopwatch.Frequency); -+ double medianSearch = 1000 * ((double)searchTimes.OrderBy(t => t).ElementAt(N / 2) / Stopwatch.Frequency); -+ Console.WriteLine("Steadystate min search time: {0:F2}ms", (1000 * minSearchTime) / Stopwatch.Frequency); -+ Console.WriteLine("Steadystate max search time: {0:F2}ms", (1000 * maxSearchTime) / Stopwatch.Frequency); -+ Console.WriteLine("Steadystate average search time: {0:F2}ms", averageSearch); -+ Console.WriteLine("Steadystate median search time: {0:F2}ms", medianSearch); -+ } -+ else -+ { -+ long averageSearch = searchTimes.Sum() / N; -+ long medianSearch = searchTimes.OrderBy(t => t).ElementAt(N / 2); -+ Console.WriteLine("Steadystate min search time: {0}ms", minSearchTime); -+ Console.WriteLine("Steadystate max search time: {0}ms", maxSearchTime); -+ Console.WriteLine("Steadystate average search time: {0}ms", (int)averageSearch); -+ Console.WriteLine("Steadystate median search time: {0}ms", (int)medianSearch); -+ } -+ -+ Console.WriteLine(""); -+ } -+ } -+ } -+ -+ } -+ -+} -diff --git a/Word2VecScenario/Word2VecScenario.csproj b/Word2VecScenario/Word2VecScenario.csproj -new file mode 100644 -index 0000000..cacd48a ---- /dev/null -+++ b/Word2VecScenario/Word2VecScenario.csproj -@@ -0,0 +1,34 @@ -+<Project Sdk="Microsoft.NET.Sdk.Web"> -+ <Import Project="..\build\common.props" /> -+ <PropertyGroup> -+ <Description>Test for Word2Vec</Description> -+ <TargetFramework>netcoreapp2.1</TargetFramework> -+ <DefineConstants>$(DefineConstants);DEMO</DefineConstants> -+ <WarningsAsErrors>true</WarningsAsErrors> -+ <PreserveCompilationContext>true</PreserveCompilationContext> -+ <AssemblyName>Word2VecScenario</AssemblyName> -+ <OutputType>Exe</OutputType> -+ -+ <!-- This will prevent our build system from trying to package this project. --> -+ <IsPackable>false</IsPackable> -+ -+ <!-- This will be set as an environment variable to pin the version. --> -+ <RuntimeFrameworkVersion>$(WORD2VEC_FRAMEWORK_VERSION)</RuntimeFrameworkVersion> -+ </PropertyGroup> -+ -+ <PropertyGroup Condition=" '$(RuntimeFrameworkVersion)' == '' "> -+ <RuntimeFrameworkVersion>2.1.0-*</RuntimeFrameworkVersion> -+ </PropertyGroup> -+ -+ <PropertyGroup Condition=" '$(Configuration)' == 'Release' "> -+ <DefineConstants>$(DefineConstants);RELEASE</DefineConstants> -+ </PropertyGroup> -+ -+ <ItemGroup> -+ <ProjectReference Include="..\Word2Vec.Net\Word2Vec.Net.csproj"> -+ <Name>Word2Vec.Net</Name> -+ </ProjectReference> -+ </ItemGroup> -+ -+</Project> -+ -diff --git a/build/common.props b/build/common.props -new file mode 100644 -index 0000000..36d884c ---- /dev/null -+++ b/build/common.props -@@ -0,0 +1,5 @@ -+<Project> -+ -+ <Import Project="dependencies.props" /> -+ -+</Project> -diff --git a/build/dependencies.props b/build/dependencies.props -new file mode 100644 -index 0000000..95d79b3 ---- /dev/null -+++ b/build/dependencies.props -@@ -0,0 +1,5 @@ -+<Project> -+ <PropertyGroup> -+ <RuntimeFrameworkVersion>2.0.0-*</RuntimeFrameworkVersion> -+ </PropertyGroup> -+</Project> diff --git a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj index 74e633a327..a8616c21e9 100644 --- a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj +++ b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj @@ -55,10 +55,5 @@ <!-- The CoreCLR test build system requires a target named RestorePackage in order to do BatchRestore --> <Target Name="RestorePackage" DependsOnTargets="Restore" /> - <Target Name="CustomCopyContent" AfterTargets="AfterBuild"> - <Copy SourceFiles="..\Resources\word2vecnet.patch" DestinationFolder="$(OutDir)" /> - </Target> - <Target Name="CustomCopyContentOnPublish" AfterTargets="AfterPublish"> - <Copy SourceFiles="..\Resources\word2vecnet.patch" DestinationFolder="$(PublishDir)" /> - </Target> + </Project> |