summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitattributes1
-rwxr-xr-xtests/scripts/run-xunit-perf.py4
-rw-r--r--tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs256
-rw-r--r--tests/src/performance/Scenario/JitBench/JitBench.csproj3
-rw-r--r--tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch605
-rw-r--r--tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs3
-rw-r--r--tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs46
-rw-r--r--tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs2
-rw-r--r--tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj6
9 files changed, 922 insertions, 4 deletions
diff --git a/.gitattributes b/.gitattributes
index 27e6703185..4777297037 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -75,4 +75,5 @@ tests/src/JIT/Performance/CodeQuality/BenchmarksGame/reverse-complement/revcomp-
tests/src/JIT/Performance/CodeQuality/BenchmarksGame/reverse-complement/revcomp-input25000.txt text eol=lf
tests/src/JIT/Performance/CodeQuality/BenchmarksGame/k-nucleotide/knucleotide-input.txt text eol=lf
tests/src/JIT/Performance/CodeQuality/BenchmarksGame/k-nucleotide/knucleotide-input-big.txt text eol=lf
+tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch text eol=lf
diff --git a/tests/scripts/run-xunit-perf.py b/tests/scripts/run-xunit-perf.py
index 3c1cb89dca..18a0b8e446 100755
--- a/tests/scripts/run-xunit-perf.py
+++ b/tests/scripts/run-xunit-perf.py
@@ -182,9 +182,9 @@ def run_benchmark(benchname, benchdir, env, sandboxDir, benchmarkOutputDir, test
myEnv = dict(env)
benchnameWithExt = benchname + '.' + testFileExt
fullPath = os.path.join(benchdir, benchnameWithExt)
- shutil.copy2(fullPath, sandboxDir)
- files = glob.iglob(os.path.join(benchdir, "*.txt"))
+ # Copy all files in the benchmark directory to the sandbox
+ files = glob.iglob(os.path.join(benchdir, "*.*"))
for filename in files:
if os.path.isfile(filename):
shutil.copy2(filename, sandboxDir)
diff --git a/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs b/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs
new file mode 100644
index 0000000000..ce4a45d83f
--- /dev/null
+++ b/tests/src/performance/Scenario/JitBench/Benchmarks/MLBenchmark.cs
@@ -0,0 +1,256 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Text.RegularExpressions;
+using System.Threading.Tasks;
+using System.Reflection;
+using Microsoft.Xunit.Performance.Api;
+
+namespace JitBench
+{
+ class Word2VecBenchmark : MLBenchmark
+ {
+ public Word2VecBenchmark() : base("Word2Vec") { }
+
+ protected override string ExecutableName => "Word2VecScenario.dll";
+
+ protected override string GetWord2VecNetSrcDirectory(string outputDir)
+ {
+ return Path.Combine(GetWord2VecNetRepoRootDir(outputDir), "Word2VecScenario");
+ }
+ }
+
+ abstract class MLBenchmark : Benchmark
+ {
+ private static readonly HashSet<int> DefaultExitCodes = new HashSet<int>(new[] { 0 });
+
+ public MLBenchmark(string name) : base(name)
+ {
+ ExePath = ExecutableName;
+ }
+
+ protected abstract string ExecutableName { get; }
+
+ public override async Task Setup(DotNetInstallation dotNetInstall, string outputDir, bool useExistingSetup, ITestOutputHelper output)
+ {
+ if(!useExistingSetup)
+ {
+ using (var setupSection = new IndentedTestOutputHelper("Setup " + Name, output))
+ {
+ await CloneWord2VecNetRepo(outputDir, setupSection);
+ await Publish(dotNetInstall, outputDir, setupSection);
+ await DownloadAndExtractTextCorpus(dotNetInstall, outputDir, setupSection);
+ }
+ }
+ string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion);
+ WorkingDirPath = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm);
+ }
+
+ async Task CloneWord2VecNetRepo(string outputDir, ITestOutputHelper output)
+ {
+ // If the repo already exists, we delete it and extract it again.
+ string word2VecNetRepoRootDir = GetWord2VecNetRepoRootDir(outputDir);
+ FileTasks.DeleteDirectory(word2VecNetRepoRootDir, output);
+
+ string word2VecPatchFullPath = Path.Combine(Path.GetDirectoryName(Assembly.GetEntryAssembly().Location), Word2VecNetPatch);
+
+ await ExecuteGitCommand($"clone {Word2VecNetRepoUrl} {word2VecNetRepoRootDir}", output);
+ await ExecuteGitCommand($"checkout {Word2VecNetCommitSha1Id}", output, workingDirectory: word2VecNetRepoRootDir);
+ await ExecuteGitCommand($"apply {word2VecPatchFullPath}", output, workingDirectory: word2VecNetRepoRootDir);
+ }
+
+ async Task ExecuteGitCommand(string arguments, ITestOutputHelper output, string workingDirectory = null)
+ {
+ int exitCode = await new ProcessRunner("git", arguments).WithLog(output).WithWorkingDirectory(workingDirectory).Run();
+
+ if (!DefaultExitCodes.Contains(exitCode))
+ throw new Exception($"git {arguments} has failed, the exit code was {exitCode}");
+ }
+
+ async Task DownloadAndExtractTextCorpus(DotNetInstallation dotNetInstall, string outputDir, ITestOutputHelper output)
+ {
+ // If the file already exists, exit
+ string word2VecNetRepoRootDir = GetWord2VecNetRepoRootDir(outputDir);
+ string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion);
+ string word2VecNetPublishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm);
+
+ // Download the corpus of text. This is a zip file that contains a text file of 100M of text from Wikipedia
+ var url = "http://mattmahoney.net/dc/text8.zip";
+ await FileTasks.DownloadAndUnzip(url, word2VecNetRepoRootDir + "_temp", output);
+
+ FileTasks.MoveFile(Path.Combine(word2VecNetRepoRootDir + "_temp", "text8"),
+ Path.Combine(word2VecNetPublishDir, "Corpus.txt"), output);
+ }
+
+ private async Task<string> Publish(DotNetInstallation dotNetInstall, string outputDir, ITestOutputHelper output)
+ {
+ string tfm = DotNetSetup.GetTargetFrameworkMonikerForFrameworkVersion(dotNetInstall.FrameworkVersion);
+ string publishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm);
+ if (publishDir != null)
+ {
+ FileTasks.DeleteDirectory(publishDir, output);
+ }
+ string dotNetExePath = dotNetInstall.DotNetExe;
+ await new ProcessRunner(dotNetExePath, $"publish -c Release -f {tfm}")
+ .WithWorkingDirectory(GetWord2VecNetSrcDirectory(outputDir))
+ .WithEnvironmentVariable("DOTNET_MULTILEVEL_LOOKUP", "0")
+ .WithEnvironmentVariable("WORD2VEC_FRAMEWORK_VERSION", dotNetInstall.FrameworkVersion)
+ .WithEnvironmentVariable("UseSharedCompilation", "false")
+ .WithLog(output)
+ .Run();
+
+ publishDir = GetWord2VecNetPublishDirectory(dotNetInstall, outputDir, tfm);
+ if (publishDir == null)
+ {
+ throw new DirectoryNotFoundException("Could not find 'publish' directory");
+ }
+ return publishDir;
+ }
+
+ public override Metric[] GetDefaultDisplayMetrics()
+ {
+ return new Metric[]
+ {
+ TrainingMetric,
+ FirstSearchMetric,
+ MedianSearchMetric
+ };
+ }
+
+ protected override IterationResult RecordIterationMetrics(ScenarioExecutionResult scenarioIteration, string stdout, string stderr, ITestOutputHelper output)
+ {
+ IterationResult result = base.RecordIterationMetrics(scenarioIteration, stdout, stderr, output);
+ AddConsoleMetrics(result, stdout, output);
+ return result;
+ }
+
+ void AddConsoleMetrics(IterationResult result, string stdout, ITestOutputHelper output)
+ {
+ output.WriteLine("Processing iteration results.");
+
+ double? trainingTime = null;
+ double? firstSearchTime = null;
+ double? steadyStateMedianTime = null;
+
+ using (var reader = new StringReader(stdout))
+ {
+ string line;
+ while ((line = reader.ReadLine()) != null)
+ {
+ Match match = Regex.Match(line, @"^Training took \s*(\d+)ms$");
+ if (match.Success && match.Groups.Count == 2)
+ {
+ trainingTime = Convert.ToDouble(match.Groups[1].Value);
+ continue;
+ }
+
+ match = Regex.Match(line, @"^Search took \s*(\d+)ms$");
+ if (match.Success && match.Groups.Count == 2)
+ {
+ firstSearchTime = Convert.ToDouble(match.Groups[1].Value);
+ continue;
+ }
+
+ match = Regex.Match(line, @"^Steadystate median search time: \s*(\d+\.\d+)ms$");
+ if (match.Success && match.Groups.Count == 2)
+ {
+ //many lines will match, but the final values of these variables will be from the last batch which is presumably the
+ //best measurement of steady state performance
+ steadyStateMedianTime = Convert.ToDouble(match.Groups[1].Value);
+ continue;
+ }
+ }
+ }
+
+ if (!trainingTime.HasValue)
+ throw new FormatException("Training time was not found.");
+ if (!firstSearchTime.HasValue)
+ throw new FormatException("First Search time was not found.");
+ if (!steadyStateMedianTime.HasValue)
+ throw new FormatException("Steady state median response time not found.");
+
+
+ result.Measurements.Add(TrainingMetric, trainingTime.Value);
+ result.Measurements.Add(FirstSearchMetric, firstSearchTime.Value);
+ result.Measurements.Add(MedianSearchMetric, steadyStateMedianTime.Value);
+
+ output.WriteLine($"Training took {trainingTime}ms");
+ output.WriteLine($"Search took {firstSearchTime}ms");
+ output.WriteLine($"Median steady state search {steadyStateMedianTime.Value}ms");
+ }
+
+ /// <summary>
+ /// When serializing the result data to benchview this is called to determine if any of the metrics should be reported differently
+ /// than they were collected. Both web apps use this to collect several measurements in each iteration, then present those measurements
+ /// to benchview as if each was the Duration metric of a distinct scenario test with its own set of iterations.
+ /// </summary>
+ public override bool TryGetBenchviewCustomMetricReporting(Metric originalMetric, out Metric newMetric, out string newScenarioModelName)
+ {
+ if(originalMetric.Equals(TrainingMetric))
+ {
+ newScenarioModelName = "Training";
+ }
+ else if (originalMetric.Equals(FirstSearchMetric))
+ {
+ newScenarioModelName = "First Search";
+ }
+ else if (originalMetric.Equals(MedianSearchMetric))
+ {
+ newScenarioModelName = "Median Search";
+ }
+ else
+ {
+ return base.TryGetBenchviewCustomMetricReporting(originalMetric, out newMetric, out newScenarioModelName);
+ }
+ newMetric = Metric.ElapsedTimeMilliseconds;
+ return true;
+ }
+
+ protected static string GetWord2VecNetRepoRootDir(string outputDir)
+ {
+ return Path.Combine(outputDir, "W");
+ }
+
+ protected abstract string GetWord2VecNetSrcDirectory(string outputDir);
+
+ string GetWord2VecNetPublishDirectory(DotNetInstallation dotNetInstall, string outputDir, string tfm)
+ {
+ string dir = Path.Combine(GetWord2VecNetSrcDirectory(outputDir), "bin", dotNetInstall.Architecture, "Release", tfm, "publish");
+ if (Directory.Exists(dir))
+ {
+ return dir;
+ }
+
+ dir = Path.Combine(GetWord2VecNetSrcDirectory(outputDir), "bin", "Release", tfm, "publish");
+ if (Directory.Exists(dir))
+ {
+ return dir;
+ }
+
+ return null;
+ }
+
+ string GetCoreClrRoot()
+ {
+ string currentDirectory = Directory.GetCurrentDirectory();
+ string workspace = Environment.GetEnvironmentVariable("CORECLR_REPO");
+ if (workspace == null)
+ {
+ workspace = currentDirectory;
+ }
+
+ return workspace;
+ }
+
+ private const string Word2VecNetRepoUrl = "https://github.com/eabdullin/Word2Vec.Net";
+ private const string Word2VecNetCommitSha1Id = "6012a2b5b886926918d51b1b56387d785115f448";
+ private const string Word2VecNetPatch = "word2vecnet.patch";
+ private const string EnvironmentFileName = "Word2VecNetEnvironment.txt";
+ private const string StoreDirName = ".store";
+ private readonly Metric TrainingMetric = new Metric("Training", "ms");
+ private readonly Metric FirstSearchMetric = new Metric("First Search", "ms");
+ private readonly Metric MedianSearchMetric = new Metric("Median Search", "ms");
+ private readonly Metric MeanSearchMetric = new Metric("Mean Search", "ms");
+ }
+}
+
diff --git a/tests/src/performance/Scenario/JitBench/JitBench.csproj b/tests/src/performance/Scenario/JitBench/JitBench.csproj
index 2e384d183e..0d1f4fb0df 100644
--- a/tests/src/performance/Scenario/JitBench/JitBench.csproj
+++ b/tests/src/performance/Scenario/JitBench/JitBench.csproj
@@ -54,5 +54,8 @@
Overwrite="true"
Encoding="Unicode"/>
</Target>
+ <Target Name="AfterBuild">
+ <Copy SourceFiles="Resources\word2vecnet.patch" DestinationFolder="$(OutDir)" />
+ </Target>
</Project>
diff --git a/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch b/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch
new file mode 100644
index 0000000000..dbad57ba5f
--- /dev/null
+++ b/tests/src/performance/Scenario/JitBench/Resources/word2vecnet.patch
@@ -0,0 +1,605 @@
+diff --git a/.gitignore b/.gitignore
+index 8098fe2..7c82f99 100644
+--- a/.gitignore
++++ b/.gitignore
+@@ -17,7 +17,6 @@
+ [Rr]eleases/
+ x64/
+ x86/
+-build/
+ bld/
+ [Bb]in/
+ [Oo]bj/
+diff --git a/NuGet.config b/NuGet.config
+new file mode 100644
+index 0000000..bd3a6f8
+--- /dev/null
++++ b/NuGet.config
+@@ -0,0 +1,8 @@
++<?xml version="1.0" encoding="utf-8"?>
++<configuration>
++ <packageSources>
++ <clear />
++ <add key="dotnet core" value="https://dotnet.myget.org/F/dotnet-core/api/v3/index.json" />
++ <add key="NuGet" value="https://api.nuget.org/v3/index.json" />
++ </packageSources>
++</configuration>
+\ No newline at end of file
+diff --git a/Word2LibConsole/Word2LibConsole.vcxproj b/Word2LibConsole/Word2LibConsole.vcxproj
+index 2caa5a0..03b8ada 100644
+--- a/Word2LibConsole/Word2LibConsole.vcxproj
++++ b/Word2LibConsole/Word2LibConsole.vcxproj
+@@ -1,5 +1,5 @@
+ <?xml version="1.0" encoding="utf-8"?>
+-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
++<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+@@ -14,19 +14,19 @@
+ <ProjectGuid>{9C719670-3571-4B68-A3DA-053B18C654A0}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>Word2LibConsole</RootNamespace>
+- <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
++ <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+- <PlatformToolset>v140</PlatformToolset>
++ <PlatformToolset>v141</PlatformToolset>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+- <PlatformToolset>v140</PlatformToolset>
++ <PlatformToolset>v141</PlatformToolset>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+diff --git a/Word2Vec.Net/Distance.cs b/Word2Vec.Net/Distance.cs
+index f2c3cdc..32929cd 100644
+--- a/Word2Vec.Net/Distance.cs
++++ b/Word2Vec.Net/Distance.cs
+@@ -46,7 +46,7 @@ namespace Word2Vec.Net
+ }
+ if (b == Words) b = -1;
+ bi[a] = b;
+- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]);
++ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]);
+ if (b == -1)
+ {
+ Console.Write("Out of dictionary word!\n");
+@@ -99,4 +99,4 @@ namespace Word2Vec.Net
+ public string Word { get; set; }
+ public float Distance { get; set; }
+ }
+-}
+\ No newline at end of file
++}
+diff --git a/Word2Vec.Net/Properties/AssemblyInfo.cs b/Word2Vec.Net/Properties/AssemblyInfo.cs
+deleted file mode 100644
+index 89452bf..0000000
+--- a/Word2Vec.Net/Properties/AssemblyInfo.cs
++++ /dev/null
+@@ -1,36 +0,0 @@
+-using System.Reflection;
+-using System.Runtime.CompilerServices;
+-using System.Runtime.InteropServices;
+-
+-// General Information about an assembly is controlled through the following
+-// set of attributes. Change these attribute values to modify the information
+-// associated with an assembly.
+-[assembly: AssemblyTitle("Word2Vec.Net")]
+-[assembly: AssemblyDescription("")]
+-[assembly: AssemblyConfiguration("")]
+-[assembly: AssemblyCompany("")]
+-[assembly: AssemblyProduct("Word2Vec.Net")]
+-[assembly: AssemblyCopyright("Copyright © 2015")]
+-[assembly: AssemblyTrademark("")]
+-[assembly: AssemblyCulture("")]
+-
+-// Setting ComVisible to false makes the types in this assembly not visible
+-// to COM components. If you need to access a type in this assembly from
+-// COM, set the ComVisible attribute to true on that type.
+-[assembly: ComVisible(false)]
+-
+-// The following GUID is for the ID of the typelib if this project is exposed to COM
+-[assembly: Guid("b2bcc46d-a28b-40a4-a873-f0b1ffe65181")]
+-
+-// Version information for an assembly consists of the following four values:
+-//
+-// Major Version
+-// Minor Version
+-// Build Number
+-// Revision
+-//
+-// You can specify all the values or you can default the Build and Revision Numbers
+-// by using the '*' as shown below:
+-// [assembly: AssemblyVersion("1.0.*")]
+-[assembly: AssemblyVersion("1.0.0.0")]
+-[assembly: AssemblyFileVersion("1.0.0.0")]
+diff --git a/Word2Vec.Net/Word2Vec.Net.csproj b/Word2Vec.Net/Word2Vec.Net.csproj
+index ee3ddb9..52cc678 100644
+--- a/Word2Vec.Net/Word2Vec.Net.csproj
++++ b/Word2Vec.Net/Word2Vec.Net.csproj
+@@ -1,62 +1,26 @@
+-<?xml version="1.0" encoding="utf-8"?>
+-<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+- <Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
++<Project Sdk="Microsoft.NET.Sdk.Web">
++ <Import Project="..\build\common.props" />
+ <PropertyGroup>
+- <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
+- <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
+- <ProjectGuid>{FEFCA2DC-137B-4EEE-A779-0194BDFEBE1F}</ProjectGuid>
+- <OutputType>Library</OutputType>
+- <AppDesignerFolder>Properties</AppDesignerFolder>
+- <RootNamespace>Word2Vec.Net</RootNamespace>
++ <Description>Word2Vec.Net</Description>
++ <TargetFramework>netcoreapp2.1</TargetFramework>
++ <DefineConstants>$(DefineConstants);DEMO</DefineConstants>
++ <WarningsAsErrors>true</WarningsAsErrors>
++ <PreserveCompilationContext>true</PreserveCompilationContext>
+ <AssemblyName>Word2Vec.Net</AssemblyName>
+- <TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
+- <FileAlignment>512</FileAlignment>
++ <OutputType>library</OutputType>
++
++ <!-- This will prevent our build system from trying to package this project. -->
++ <IsPackable>false</IsPackable>
++
++ <!-- This will be set as an environment variable to pin the version. -->
++ <RuntimeFrameworkVersion>$(WORD2VEC_FRAMEWORK_VERSION)</RuntimeFrameworkVersion>
+ </PropertyGroup>
+- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
+- <DebugSymbols>true</DebugSymbols>
+- <DebugType>full</DebugType>
+- <Optimize>false</Optimize>
+- <OutputPath>bin\Debug\</OutputPath>
+- <DefineConstants>DEBUG;TRACE</DefineConstants>
+- <ErrorReport>prompt</ErrorReport>
+- <WarningLevel>4</WarningLevel>
+- <DocumentationFile>bin\Debug\Word2Vec.Net.XML</DocumentationFile>
+- <PlatformTarget>AnyCPU</PlatformTarget>
++
++ <PropertyGroup Condition=" '$(RuntimeFrameworkVersion)' == '' ">
++ <RuntimeFrameworkVersion>2.1.0-*</RuntimeFrameworkVersion>
+ </PropertyGroup>
+- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
+- <DebugType>pdbonly</DebugType>
+- <Optimize>true</Optimize>
+- <OutputPath>bin\Release\</OutputPath>
+- <DefineConstants>TRACE</DefineConstants>
+- <ErrorReport>prompt</ErrorReport>
+- <WarningLevel>4</WarningLevel>
++
++ <PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
++ <DefineConstants>$(DefineConstants);RELEASE</DefineConstants>
+ </PropertyGroup>
+- <ItemGroup>
+- <Reference Include="System" />
+- <Reference Include="System.Core" />
+- <Reference Include="System.Xml.Linq" />
+- <Reference Include="System.Data.DataSetExtensions" />
+- <Reference Include="Microsoft.CSharp" />
+- <Reference Include="System.Data" />
+- <Reference Include="System.Xml" />
+- </ItemGroup>
+- <ItemGroup>
+- <Compile Include="Distance.cs" />
+- <Compile Include="StringExtension.cs" />
+- <Compile Include="Utils\FileStreamExtensions.cs" />
+- <Compile Include="VocubWord.cs" />
+- <Compile Include="Word2Vec.cs" />
+- <Compile Include="Properties\AssemblyInfo.cs" />
+- <Compile Include="Word2VecAnalysisBase.cs" />
+- <Compile Include="Word2VecBuilder.cs" />
+- <Compile Include="WordAnalogy.cs" />
+- </ItemGroup>
+- <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
+- <!-- To modify your build process, add your task inside one of the targets below and uncomment it.
+- Other similar extension points exist, see Microsoft.Common.targets.
+- <Target Name="BeforeBuild">
+- </Target>
+- <Target Name="AfterBuild">
+- </Target>
+- -->
+-</Project>
+\ No newline at end of file
++</Project>
+diff --git a/Word2Vec.Net/Word2Vec.cs b/Word2Vec.Net/Word2Vec.cs
+index 968bf88..4142c7b 100644
+--- a/Word2Vec.Net/Word2Vec.cs
++++ b/Word2Vec.Net/Word2Vec.cs
+@@ -57,7 +57,7 @@ namespace Word2Vec.Net
+ private const int TableSize = (int) 1e8;
+ private int[] _table;
+
+- internal Word2Vec(
++ public Word2Vec(
+ string trainFileName,
+ string outPutfileName,
+ string saveVocabFileName,
+@@ -186,7 +186,7 @@ namespace Word2Vec.Net
+ for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1;
+ int size = _vocabSize;
+ _trainWords = 0;
+- for (var a = 0; a < size; a++)
++ /*for (var a = 0; a < size; a++)
+ {
+ // Words occuring less than min_count times will be discarded from the vocab
+ if (_vocab[a].Cn < _minCount && (a != 0))
+@@ -203,7 +203,7 @@ namespace Word2Vec.Net
+ _trainWords += _vocab[a].Cn;
+ }
+ }
+- Array.Resize(ref _vocab, _vocabSize + 1);
++ Array.Resize(ref _vocab, _vocabSize + 1);*/
+
+ // Allocate memory for the binary tree construction
+ for (var a = 0; a < _vocabSize; a++)
+@@ -331,56 +331,48 @@ namespace Word2Vec.Net
+
+ private void LearnVocabFromTrainFile()
+ {
+- int i;
+- for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1;
+- using (var fin = File.OpenText(_trainFile))
++ int i;
++ for (var a = 0; a < VocabHashSize; a++) _vocabHash[a] = -1;
++ string[] fin = System.IO.File.ReadAllLines(_trainFile);
++ _vocabSize = 0;
++
++ Regex regex = new Regex("\\s");
++ AddWordToVocab("</s>");
++ foreach (string line in fin)
++ {
++ string[] words = regex.Split(line);
++
++ foreach (var word in words)
+ {
+- if (fin == StreamReader.Null)
++ if(string.IsNullOrWhiteSpace(word)) continue;
++ _trainWords++;
++ if ((_debugMode > 1) && (_trainWords%100000 == 0))
+ {
+- throw new InvalidOperationException("ERROR: training data file not found!\n");
++ Console.Write("{0}K \r", _trainWords/1000);
++ //printf("%lldK%c", train_words / 1000, 13);
++ //fflush(stdout);
+ }
+- _vocabSize = 0;
+-
+- string line;
+- Regex regex = new Regex("\\s");
+- AddWordToVocab("</s>");
+- while ((line = fin.ReadLine()) != null)
+- {
+- if (fin.EndOfStream) break;
+- string[] words = regex.Split(line);
+-
+- foreach (var word in words)
+- {
+- if(string.IsNullOrWhiteSpace(word)) continue;
+- _trainWords++;
+- if ((_debugMode > 1) && (_trainWords%100000 == 0))
+- {
+- Console.Write("{0}K \r", _trainWords/1000);
+- //printf("%lldK%c", train_words / 1000, 13);
+- //fflush(stdout);
+- }
+- i = SearchVocab(word);
+- if (i == -1)
++ i = SearchVocab(word);
++ if (i == -1)
+ {
+ var a = AddWordToVocab(word);
+ _vocab[a].Cn = 1;
+ }
+- else
+- _vocab[i].Cn++;
+- if (_vocabSize > VocabHashSize*0.7)
+- ReduceVocab();
+- }
+- }
+- SortVocab();
+- if (_debugMode > 0)
+- {
+- Console.WriteLine("Vocab size: {0}", _vocabSize);
+- Console.WriteLine("Words in train file: {0}", _trainWords);
+- }
+- //file_size = ftell(fin);
+- _fileSize = new FileInfo(_trainFile).Length;
++ else
++ _vocab[i].Cn++;
++ if (_vocabSize > VocabHashSize*0.7)
++ ReduceVocab();
+ }
+ }
++ SortVocab();
++ if (_debugMode > 0)
++ {
++ Console.WriteLine("Vocab size: {0}", _vocabSize);
++ Console.WriteLine("Words in train file: {0}", _trainWords);
++ }
++ //file_size = ftell(fin);
++ _fileSize = new FileInfo(_trainFile).Length;
++ }
+
+ private void SaveVocab()
+ {
+diff --git a/Word2Vec.Net/WordAnalogy.cs b/Word2Vec.Net/WordAnalogy.cs
+index eaa35bf..8347c0f 100644
+--- a/Word2Vec.Net/WordAnalogy.cs
++++ b/Word2Vec.Net/WordAnalogy.cs
+@@ -24,7 +24,7 @@ namespace Word2Vec.Net
+ for (b = 0; b < Words; b++) if (!new string(Vocab, (int)(b * max_w), (int)max_w).Equals(st[a])) break;
+ if (b == Words) b = -1;
+ bi[a] = b;
+- Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]);
++ //Console.Write("\nWord: {0} Position in vocabulary: {1}\n", st[a], bi[a]);
+ if (b == -1)
+ {
+ Console.Write("Out of dictionary word!\n");
+diff --git a/Word2VecScenario/App.config b/Word2VecScenario/App.config
+new file mode 100644
+index 0000000..e8482b1
+--- /dev/null
++++ b/Word2VecScenario/App.config
+@@ -0,0 +1,12 @@
++<?xml version="1.0" encoding="utf-8" ?>
++<configuration>
++
++ <runtime>
++ <gcAllowVeryLargeObjects enabled="true"/>
++ </runtime>
++
++ <startup>
++ <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.5" />
++ </startup>
++
++</configuration>
+\ No newline at end of file
+diff --git a/Word2VecScenario/Corpus.txt.ReadMe.txt b/Word2VecScenario/Corpus.txt.ReadMe.txt
+new file mode 100644
+index 0000000..82c04e5
+--- /dev/null
++++ b/Word2VecScenario/Corpus.txt.ReadMe.txt
+@@ -0,0 +1,5 @@
++Please download and rename the following file:
++
++http://mattmahoney.net/dc/text8.zip
++
++Renaming the file inside the zip to: Corpus.txt - In place of this File!
+diff --git a/Word2VecScenario/Program.cs b/Word2VecScenario/Program.cs
+new file mode 100644
+index 0000000..7e9ad31
+--- /dev/null
++++ b/Word2VecScenario/Program.cs
+@@ -0,0 +1,161 @@
++namespace Word2VecScenario
++{
++ using System;
++ using System.Diagnostics;
++ using System.Linq;
++ using Word2Vec.Net;
++
++ class Program
++ {
++ static string path = @"Word2VectorOutputFile.bin";
++ static Distance distance = null;
++ static WordAnalogy wordAnalogy = null;
++
++ static void Main(string[] args)
++ {
++ // -train <file> Use text data from <file> to train the model
++ string train = "Corpus.txt";
++
++ // -output <file> Use <file> to save the resulting word vectors / word clusters
++ string output = "Vectors.bin";
++
++ // -save-vocab <file> The vocabulary will be saved to <file>
++ string savevocab = "";
++
++ // -read-vocab <file> The vocabulary will be read from <file>, not constructed from the training data
++ string readvocab = "";
++
++ // -size <int> Set size of word vectors; default is 100
++ int size = 100;
++
++ // -debug <int> Set the debug mode (default = 2 = more info during training)
++ int debug = 1;
++
++ // -binary <int> Save the resulting vectors in binary moded; default is 0 (off)
++ int binary = 1;
++
++ // -cbow <int> Use the continuous bag of words model; default is 1 (use 0 for skip-gram model)
++ int cbow = 1;
++
++ // -alpha <float> Set the starting learning rate; default is 0.025 for skip-gram and 0.05 for CBOW
++ float alpha = 0.05f;
++
++ // -sample <float> Set threshold for occurrence of words. Those that appear with higher frequency in the training data
++ float sample = 1e-4f;
++
++ // -hs <int> Use Hierarchical Softmax; default is 0 (not used)
++ int hs = 0;
++
++ // -negative <int> Number of negative examples; default is 5, common values are 3 - 10 (0 = not used)
++ int negative = 5;
++
++ // -threads <int> Use <int> threads (default 12)
++ int threads = 12;
++
++ // -iter <int> Run more training iterations (default 5)
++ long iter = 15;
++
++ // -min-count <int> This will discard words that appear less than <int> times; default is 5
++ int mincount = 5;
++
++ // -classes <int> Output word classes rather than word vectors; default number of classes is 0 (vectors are written)
++ long classes = 0;
++
++ // -window <int> Set max skip length between words; default is 5
++ int window = 12;
++
++ Word2Vec word2Vec = new Word2Vec(train, output, savevocab, readvocab, size, debug, binary, cbow, alpha, sample, hs, negative, threads, iter, mincount, classes, window);
++
++ var totalTime = Stopwatch.StartNew();
++ var highRes = Stopwatch.IsHighResolution;
++
++ word2Vec.TrainModel();
++
++ totalTime.Stop();
++
++ var trainingTime = totalTime.ElapsedMilliseconds;
++ Console.WriteLine("Training took {0}ms", trainingTime);
++
++ path = @"Vectors.bin";
++ distance = new Distance(path);
++ wordAnalogy = new WordAnalogy(path);
++
++ string[] wordList = new string[] {"paris france madrid" };
++
++ var searchTime = Stopwatch.StartNew();
++
++ foreach (string word in wordList)
++ {
++ distance.Search(word);
++ wordAnalogy.Search(word);
++ }
++
++ searchTime.Stop();
++ var firstSearchTime = searchTime.ElapsedMilliseconds;
++ Console.WriteLine("Search took {0}ms", firstSearchTime);
++
++ int outerN = 5;
++
++ for (int outer = 0; outer < outerN; outer++)
++ {
++ foreach (string word in wordList)
++ {
++ int N = 11;
++ var minSearchTime = long.MaxValue;
++ var maxSearchTime = long.MinValue;
++ long[] searchTimes = new long[N];
++
++ Console.WriteLine($"Batch {outer}, searching {word}: running {N} searches");
++
++ for (int inner = 0; inner < N; inner++)
++ {
++ searchTime.Restart();
++ distance.Search(word);
++ BestWord[] result = wordAnalogy.Search(word);
++ searchTime.Stop();
++
++ /*foreach (var bestWord in result)
++ {
++ Console.WriteLine("{0}\t\t{1}", bestWord.Word, bestWord.Distance);
++ }*/
++
++ long interval = highRes ? searchTime.ElapsedTicks : searchTime.ElapsedMilliseconds;
++ searchTimes[inner] = interval;
++
++ if (interval < minSearchTime)
++ {
++ minSearchTime = interval;
++ }
++ if (interval > maxSearchTime)
++ {
++ maxSearchTime = interval;
++ }
++ }
++
++ if (highRes)
++ {
++ double averageSearch = 1000 * ((double)searchTimes.Sum() / N / Stopwatch.Frequency);
++ double medianSearch = 1000 * ((double)searchTimes.OrderBy(t => t).ElementAt(N / 2) / Stopwatch.Frequency);
++ Console.WriteLine("Steadystate min search time: {0:F2}ms", (1000 * minSearchTime) / Stopwatch.Frequency);
++ Console.WriteLine("Steadystate max search time: {0:F2}ms", (1000 * maxSearchTime) / Stopwatch.Frequency);
++ Console.WriteLine("Steadystate average search time: {0:F2}ms", averageSearch);
++ Console.WriteLine("Steadystate median search time: {0:F2}ms", medianSearch);
++ }
++ else
++ {
++ long averageSearch = searchTimes.Sum() / N;
++ long medianSearch = searchTimes.OrderBy(t => t).ElementAt(N / 2);
++ Console.WriteLine("Steadystate min search time: {0}ms", minSearchTime);
++ Console.WriteLine("Steadystate max search time: {0}ms", maxSearchTime);
++ Console.WriteLine("Steadystate average search time: {0}ms", (int)averageSearch);
++ Console.WriteLine("Steadystate median search time: {0}ms", (int)medianSearch);
++ }
++
++ Console.WriteLine("");
++ }
++ }
++ }
++
++ }
++
++}
+diff --git a/Word2VecScenario/Word2VecScenario.csproj b/Word2VecScenario/Word2VecScenario.csproj
+new file mode 100644
+index 0000000..cacd48a
+--- /dev/null
++++ b/Word2VecScenario/Word2VecScenario.csproj
+@@ -0,0 +1,34 @@
++<Project Sdk="Microsoft.NET.Sdk.Web">
++ <Import Project="..\build\common.props" />
++ <PropertyGroup>
++ <Description>Test for Word2Vec</Description>
++ <TargetFramework>netcoreapp2.1</TargetFramework>
++ <DefineConstants>$(DefineConstants);DEMO</DefineConstants>
++ <WarningsAsErrors>true</WarningsAsErrors>
++ <PreserveCompilationContext>true</PreserveCompilationContext>
++ <AssemblyName>Word2VecScenario</AssemblyName>
++ <OutputType>Exe</OutputType>
++
++ <!-- This will prevent our build system from trying to package this project. -->
++ <IsPackable>false</IsPackable>
++
++ <!-- This will be set as an environment variable to pin the version. -->
++ <RuntimeFrameworkVersion>$(WORD2VEC_FRAMEWORK_VERSION)</RuntimeFrameworkVersion>
++ </PropertyGroup>
++
++ <PropertyGroup Condition=" '$(RuntimeFrameworkVersion)' == '' ">
++ <RuntimeFrameworkVersion>2.1.0-*</RuntimeFrameworkVersion>
++ </PropertyGroup>
++
++ <PropertyGroup Condition=" '$(Configuration)' == 'Release' ">
++ <DefineConstants>$(DefineConstants);RELEASE</DefineConstants>
++ </PropertyGroup>
++
++ <ItemGroup>
++ <ProjectReference Include="..\Word2Vec.Net\Word2Vec.Net.csproj">
++ <Name>Word2Vec.Net</Name>
++ </ProjectReference>
++ </ItemGroup>
++
++</Project>
++
+diff --git a/build/common.props b/build/common.props
+new file mode 100644
+index 0000000..36d884c
+--- /dev/null
++++ b/build/common.props
+@@ -0,0 +1,5 @@
++<Project>
++
++ <Import Project="dependencies.props" />
++
++</Project>
+diff --git a/build/dependencies.props b/build/dependencies.props
+new file mode 100644
+index 0000000..95d79b3
+--- /dev/null
++++ b/build/dependencies.props
+@@ -0,0 +1,5 @@
++<Project>
++ <PropertyGroup>
++ <RuntimeFrameworkVersion>2.0.0-*</RuntimeFrameworkVersion>
++ </PropertyGroup>
++</Project>
diff --git a/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs b/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs
index 206cd556da..d47d127851 100644
--- a/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs
+++ b/tests/src/performance/Scenario/JitBench/Runner/Benchmark.cs
@@ -84,7 +84,7 @@ namespace JitBench
BenchmarkRunResult result = new BenchmarkRunResult(this, config);
StringBuilder stderr = new StringBuilder();
StringBuilder stdout = new StringBuilder();
- var scenarioConfiguration = new ScenarioTestConfiguration(TimeSpan.FromMinutes(1), startInfo)
+ var scenarioConfiguration = new ScenarioTestConfiguration(TimeSpan.FromMinutes(20), startInfo)
{
//XUnitPerformanceHarness writes files to disk starting with {runid}-{ScenarioBenchmarkName}-{TestName}
TestName = (Name + "-" + config.Name).Replace(' ', '_'),
@@ -143,6 +143,7 @@ namespace JitBench
"dotnet.exe",
"MusicStore.dll",
"AllReady.dll",
+ "Word2VecScenario.dll",
"ntoskrnl.exe",
"System.Private.CoreLib.dll",
"Unknown",
diff --git a/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs b/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs
index 5e9efa2ffb..e5391cafd0 100644
--- a/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs
+++ b/tests/src/performance/Scenario/JitBench/Utilities/FileTasks.cs
@@ -178,6 +178,9 @@ namespace JitBench
}
try
{
+ // On some systems, directories/files created programmatically are created with attributes
+ // that prevent them from being deleted. Set those attributes to be normal
+ SetAttributesNormal(path);
Directory.Delete(path, true);
return;
}
@@ -194,6 +197,18 @@ namespace JitBench
}
}
+ public static void SetAttributesNormal(string path)
+ {
+ foreach (var subDir in Directory.GetDirectories(path))
+ {
+ SetAttributesNormal(subDir);
+ }
+ foreach (var file in Directory.GetFiles(path))
+ {
+ File.SetAttributes(file, FileAttributes.Normal);
+ }
+ }
+
public static void MoveDirectory(string sourceDirName, string destDirName, ITestOutputHelper output)
{
if (output != null)
@@ -225,6 +240,37 @@ namespace JitBench
}
}
+ public static void MoveFile(string sourceFileName, string destFileName, ITestOutputHelper output)
+ {
+ if (output != null)
+ {
+ output.WriteLine("Moving " + sourceFileName + " -> " + destFileName);
+ }
+ int retries = 10;
+ for (int i = 0; i < retries; i++)
+ {
+ if (!File.Exists(sourceFileName) && File.Exists(destFileName))
+ {
+ return;
+ }
+ try
+ {
+ File.Move(sourceFileName, destFileName);
+ return;
+ }
+ catch (IOException e) when (i < retries - 1)
+ {
+ output.WriteLine($" Attempt #{i + 1} failed: {e.Message}");
+ }
+ catch (UnauthorizedAccessException e) when (i < retries - 1)
+ {
+ output.WriteLine($" Attempt #{i + 1} failed: {e.Message}");
+ }
+ // if something has a transient lock on the file waiting may resolve the issue
+ Thread.Sleep((i + 1) * 10);
+ }
+ }
+
public static void CreateDirectory(string path, ITestOutputHelper output)
{
output.WriteLine("Creating " + path);
diff --git a/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs b/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs
index 60d30e1af6..467ba7dbf7 100644
--- a/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs
+++ b/tests/src/performance/Scenario/JitBench/Utilities/ProcessRunner.cs
@@ -67,7 +67,7 @@ namespace JitBench
_p.StartInfo = psi;
_p.EnableRaisingEvents = false;
_loggers = new List<IProcessLogger>();
- _timeout = TimeSpan.FromMinutes(10);
+ _timeout = TimeSpan.FromMinutes(60);
_cancelSource = new CancellationTokenSource();
_killReason = null;
_waitForProcessStartTaskSource = new TaskCompletionSource<Process>();
diff --git a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj
index 009949ab1b..74e633a327 100644
--- a/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj
+++ b/tests/src/performance/Scenario/JitBench/unofficial_dotnet/JitBench.csproj
@@ -55,4 +55,10 @@
<!-- The CoreCLR test build system requires a target named RestorePackage in order to do BatchRestore -->
<Target Name="RestorePackage" DependsOnTargets="Restore" />
+ <Target Name="CustomCopyContent" AfterTargets="AfterBuild">
+ <Copy SourceFiles="..\Resources\word2vecnet.patch" DestinationFolder="$(OutDir)" />
+ </Target>
+ <Target Name="CustomCopyContentOnPublish" AfterTargets="AfterPublish">
+ <Copy SourceFiles="..\Resources\word2vecnet.patch" DestinationFolder="$(PublishDir)" />
+ </Target>
</Project>