19 files changed, 1406 insertions, 77 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
index 946431209e..fa5bbc1b18 100644
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@@ -4,15 +4,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 include_directories("./jitstd")
 include_directories("../inc")
 
-if (CLR_CMAKE_TARGET_ARCH_AMD64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_PLATFORM_UNIX))
+if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_PLATFORM_UNIX))
   add_definitions(-DFEATURE_SIMD)
   add_definitions(-DFEATURE_HW_INTRINSICS)
 endif ()
 
-if (CLR_CMAKE_TARGET_ARCH_ARM64)
-  add_definitions(-DFEATURE_SIMD)
-endif ()
-
 # JIT_BUILD disables certain PAL_TRY debugging features
 add_definitions(-DJIT_BUILD=1)
 
@@ -265,6 +261,7 @@ set( JIT_ARM64_SOURCES
   targetarm64.cpp
   unwindarm.cpp
   unwindarm64.cpp
+  hwintrinsicArm64.cpp
 )
 
 if(CLR_CMAKE_TARGET_ARCH_AMD64)
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 8515103a5d..0928b6f40e 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -4958,6 +4958,152 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
 
 #endif // FEATURE_SIMD
 
+#ifdef FEATURE_HW_INTRINSICS
+#include "hwintrinsicArm64.h"
+
+instruction CodeGen::getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType)
+{
+    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+
+    unsigned int instrTypeIndex = varTypeIsFloating(instrType) ? 0 : varTypeIsUnsigned(instrType) ? 2 : 1;
+
+    return compiler->getHWIntrinsicInfo(intrinsicID).instrs[instrTypeIndex];
+}
+
+void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+
+    switch (compiler->getHWIntrinsicInfo(intrinsicID).form)
+    {
+        case HWIntrinsicInfo::UnaryOp:
+            genHWIntrinsicUnaryOp(node);
+            break;
+        case HWIntrinsicInfo::CrcOp:
+            genHWIntrinsicCrcOp(node);
+            break;
+        case HWIntrinsicInfo::SimdBinaryOp:
+            genHWIntrinsicSimdBinaryOp(node);
+            break;
+        case HWIntrinsicInfo::SimdExtractOp:
+            genHWIntrinsicSimdExtractOp(node);
+            break;
+        case HWIntrinsicInfo::SimdInsertOp:
+            genHWIntrinsicSimdInsertOp(node);
+            break;
+        case HWIntrinsicInfo::SimdSelectOp:
+            genHWIntrinsicSimdSelectOp(node);
+            break;
+        case HWIntrinsicInfo::SimdUnaryOp:
+            genHWIntrinsicSimdUnaryOp(node);
+            break;
+        default:
+            NYI("HWIntrinsic form not implemented");
+    }
+}
+
+void CodeGen::genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node)
+{
+    GenTree*  op1       = node->gtGetOp1();
+    regNumber targetReg = node->gtRegNum;
+    emitAttr  attr      = emitActualTypeSize(node);
+
+    assert(targetReg != REG_NA);
+    var_types targetType = node->TypeGet();
+
+    genConsumeOperands(node);
+
+    regNumber op1Reg = op1->gtRegNum;
+
+    instruction ins = getOpForHWIntrinsic(node, node->TypeGet());
+    assert(ins != INS_invalid);
+
+    getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg);
+
+    genProduceReg(node);
+}
+
+void CodeGen::genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node)
+{
+    NYI("genHWIntrinsicCrcOp not implemented");
+}
+
+void CodeGen::genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node)
+{
+    GenTree*  op1       = node->gtGetOp1();
+    GenTree*  op2       = node->gtGetOp2();
+    var_types baseType  = node->gtSIMDBaseType;
+    regNumber targetReg = node->gtRegNum;
+
+    assert(targetReg != REG_NA);
+    var_types targetType = node->TypeGet();
+
+    genConsumeOperands(node);
+
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = op2->gtRegNum;
+
+    assert(genIsValidFloatReg(op1Reg));
+    assert(genIsValidFloatReg(op2Reg));
+    assert(genIsValidFloatReg(targetReg));
+
+    instruction ins = getOpForHWIntrinsic(node, baseType);
+    assert(ins != INS_invalid);
+
+    bool     is16Byte = (node->gtSIMDSize > 8);
+    emitAttr attr     = is16Byte ? EA_16BYTE : EA_8BYTE;
+    insOpts  opt      = genGetSimdInsOpt(is16Byte, baseType);
+
+    getEmitter()->emitIns_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, opt);
+
+    genProduceReg(node);
+}
+
+void CodeGen::genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node)
+{
+    NYI("HWIntrinsic form not implemented");
+}
+
+void CodeGen::genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node)
+{
+    NYI("genHWIntrinsicSimdExtractOp not implemented");
+}
+
+void CodeGen::genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node)
+{
+    NYI("genHWIntrinsicSimdSelectOp not implemented");
+}
+
+void CodeGen::genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node)
+{
+    GenTree*  op1       = node->gtGetOp1();
+    var_types baseType  = node->gtSIMDBaseType;
+    regNumber targetReg = node->gtRegNum;
+
+    assert(targetReg != REG_NA);
+    var_types targetType = node->TypeGet();
+
+    genConsumeOperands(node);
+
+    regNumber op1Reg = op1->gtRegNum;
+
+    assert(genIsValidFloatReg(op1Reg));
+    assert(genIsValidFloatReg(targetReg));
+
+    instruction ins = getOpForHWIntrinsic(node, baseType);
+    assert(ins != INS_invalid);
+
+    bool     is16Byte = (node->gtSIMDSize > 8);
+    emitAttr attr     = is16Byte ? EA_16BYTE : EA_8BYTE;
+    insOpts  opt      = genGetSimdInsOpt(is16Byte, baseType);
+
+    getEmitter()->emitIns_R_R(ins, attr, targetReg, op1Reg, opt);
+
+    genProduceReg(node);
+}
+
+#endif // FEATURE_HW_INTRINSICS
+
 /*****************************************************************************
  * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
  * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index 9ed3617f30..680ff6db4f 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -263,6 +263,12 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
             break;
 #endif // FEATURE_SIMD
 
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HWIntrinsic:
+            genHWIntrinsic(treeNode->AsHWIntrinsic());
+            break;
+#endif // FEATURE_HW_INTRINSICS
+
         case GT_EQ:
         case GT_NE:
         case GT_LT:
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 4263b2cc40..f7d43d73e5 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -114,8 +114,9 @@ void genPutArgStkSIMD12(GenTree* treeNode);
 #endif // _TARGET_X86_
 #endif // FEATURE_SIMD
 
-#if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_)
+#ifdef FEATURE_HW_INTRINSICS
 void genHWIntrinsic(GenTreeHWIntrinsic* node);
+#if defined(_TARGET_XARCH_)
 void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
 void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins);
 void genSSEIntrinsic(GenTreeHWIntrinsic* node);
@@ -133,7 +134,18 @@ void genFMAIntrinsic(GenTreeHWIntrinsic* node);
 void genLZCNTIntrinsic(GenTreeHWIntrinsic* node);
 void genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node);
 void genPOPCNTIntrinsic(GenTreeHWIntrinsic* node);
-#endif // defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_)
+#endif // defined(_TARGET_XARCH_)
+#if defined(_TARGET_ARM64_)
+instruction getOpForHWIntrinsic(GenTreeHWIntrinsic* node, var_types instrType);
+void genHWIntrinsicUnaryOp(GenTreeHWIntrinsic* node);
+void genHWIntrinsicCrcOp(GenTreeHWIntrinsic* node);
+void genHWIntrinsicSimdBinaryOp(GenTreeHWIntrinsic* node);
+void genHWIntrinsicSimdExtractOp(GenTreeHWIntrinsic* node);
+void genHWIntrinsicSimdInsertOp(GenTreeHWIntrinsic* node);
+void genHWIntrinsicSimdSelectOp(GenTreeHWIntrinsic* node);
+void genHWIntrinsicSimdUnaryOp(GenTreeHWIntrinsic* node);
+#endif // defined(_TARGET_XARCH_)
+#endif // FEATURE_HW_INTRINSICS
 
 #if !defined(_TARGET_64BIT_)
 
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index e297faac14..fd3264e0c2 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -2119,13 +2119,12 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
 #ifdef FEATURE_HW_INTRINSICS
 #if defined(_TARGET_ARM64_)
     Vector64FloatHandle  = nullptr;
-    Vector64DoubleHandle = nullptr;
-    Vector64IntHandle    = nullptr;
+    Vector64UIntHandle   = nullptr;
     Vector64UShortHandle = nullptr;
     Vector64UByteHandle  = nullptr;
+    Vector64IntHandle    = nullptr;
     Vector64ShortHandle  = nullptr;
     Vector64ByteHandle   = nullptr;
-    Vector64LongHandle   = nullptr;
 #endif // defined(_TARGET_ARM64_)
     Vector128FloatHandle  = nullptr;
     Vector128DoubleHandle = nullptr;
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index a97530c6c1..5146d4f37f 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -1507,6 +1507,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 */
 
+struct HWIntrinsicInfo;
+
 class Compiler
 {
     friend class emitter;
@@ -3040,12 +3042,12 @@ protected:
     NamedIntrinsic lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method);
 
 #ifdef FEATURE_HW_INTRINSICS
+#ifdef _TARGET_XARCH_
     static InstructionSet lookupHWIntrinsicISA(const char* className);
     static NamedIntrinsic lookupHWIntrinsic(const char* methodName, InstructionSet isa);
     static InstructionSet isaOfHWIntrinsic(NamedIntrinsic intrinsic);
     static bool isIntrinsicAnIsSupportedPropertyGetter(NamedIntrinsic intrinsic);
     static bool isFullyImplmentedISAClass(InstructionSet isa);
-#ifdef _TARGET_XARCH_
     GenTree* impUnsupportedHWIntrinsic(unsigned              helper,
                                        CORINFO_METHOD_HANDLE method,
                                        CORINFO_SIG_INFO*     sig,
@@ -3124,6 +3126,19 @@ protected:
     GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass);
     GenTreeArgList* buildArgList(CORINFO_SIG_INFO* sig);
 #endif // _TARGET_XARCH_
+#ifdef _TARGET_ARM64_
+    InstructionSet lookupHWIntrinsicISA(const char* className);
+    NamedIntrinsic lookupHWIntrinsic(const char* className, const char* methodName);
+    GenTree* impHWIntrinsic(NamedIntrinsic        intrinsic,
+                            CORINFO_METHOD_HANDLE method,
+                            CORINFO_SIG_INFO*     sig,
+                            bool                  mustExpand);
+    GenTree* impUnsupportedHWIntrinsic(unsigned              helper,
+                                       CORINFO_METHOD_HANDLE method,
+                                       CORINFO_SIG_INFO*     sig,
+                                       bool                  mustExpand);
+    const HWIntrinsicInfo& getHWIntrinsicInfo(NamedIntrinsic);
+#endif // _TARGET_ARM64_
 #endif // FEATURE_HW_INTRINSICS
     GenTreePtr impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
                                        CORINFO_SIG_INFO*    sig,
@@ -7476,15 +7491,12 @@ private:
 #ifdef FEATURE_HW_INTRINSICS
 #if defined(_TARGET_ARM64_)
     CORINFO_CLASS_HANDLE Vector64FloatHandle;
-    CORINFO_CLASS_HANDLE Vector64DoubleHandle;
-    CORINFO_CLASS_HANDLE Vector64IntHandle;
+    CORINFO_CLASS_HANDLE Vector64UIntHandle;
     CORINFO_CLASS_HANDLE Vector64UShortHandle;
     CORINFO_CLASS_HANDLE Vector64UByteHandle;
     CORINFO_CLASS_HANDLE Vector64ShortHandle;
     CORINFO_CLASS_HANDLE Vector64ByteHandle;
-    CORINFO_CLASS_HANDLE Vector64LongHandle;
-    CORINFO_CLASS_HANDLE Vector64UIntHandle;
-    CORINFO_CLASS_HANDLE Vector64ULongHandle;
+    CORINFO_CLASS_HANDLE Vector64IntHandle;
 #endif // defined(_TARGET_ARM64_)
     CORINFO_CLASS_HANDLE Vector128FloatHandle;
     CORINFO_CLASS_HANDLE Vector128DoubleHandle;
diff --git a/src/jit/hwintrinsicArm64.cpp b/src/jit/hwintrinsicArm64.cpp
new file mode 100644
index 0000000000..3a49ff9d18
--- /dev/null
+++ b/src/jit/hwintrinsicArm64.cpp
@@ -0,0 +1,302 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#include "hwintrinsicArm64.h"
+
+#ifdef FEATURE_HW_INTRINSICS
+
+namespace IsaFlag
+{
+enum Flag
+{
+#define HARDWARE_INTRINSIC_CLASS(flag, isa) isa = 1ULL << InstructionSet_##isa,
+#include "hwintrinsiclistArm64.h"
+    None     = 0,
+    Base     = 1ULL << InstructionSet_Base,
+    EveryISA = ~0ULL
+};
+
+Flag operator|(Flag a, Flag b)
+{
+    return Flag(uint64_t(a) | uint64_t(b));
+}
+
+Flag flag(InstructionSet isa)
+{
+    return Flag(1ULL << isa);
+}
+}
+
+// clang-format off
+static const HWIntrinsicInfo hwIntrinsicInfoArray[] = {
+    // Add lookupHWIntrinsic special cases see lookupHWIntrinsic() below
+    //     NI_ARM64_IsSupported_True is used to expand get_IsSupported to const true
+    //     NI_ARM64_IsSupported_False is used to expand get_IsSupported to const false
+    //     NI_ARM64_PlatformNotSupported to throw PlatformNotSupported exception for every intrinsic not supported on the running platform
+    {NI_ARM64_IsSupported_True,     "get_IsSupported",                 IsaFlag::EveryISA, HWIntrinsicInfo::IsSupported, HWIntrinsicInfo::None, {}},
+    {NI_ARM64_IsSupported_False,    "::NI_ARM64_IsSupported_False",    IsaFlag::EveryISA, HWIntrinsicInfo::IsSupported, HWIntrinsicInfo::None, {}},
+    {NI_ARM64_PlatformNotSupported, "::NI_ARM64_PlatformNotSupported", IsaFlag::EveryISA, HWIntrinsicInfo::Unsupported, HWIntrinsicInfo::None, {}},
+#define HARDWARE_INTRINSIC(id, isa, name, form, i0, i1, i2, flags) \
+    {id,                            #name,                             IsaFlag::isa,      HWIntrinsicInfo::form,        HWIntrinsicInfo::flags, { i0, i1, i2 }},
+#include "hwintrinsiclistArm64.h"
+};
+// clang-format on
+
+extern const char* getHWIntrinsicName(NamedIntrinsic intrinsic)
+{
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1].intrinsicName;
+}
+
+const HWIntrinsicInfo& Compiler::getHWIntrinsicInfo(NamedIntrinsic intrinsic)
+{
+    assert(intrinsic > NI_HW_INTRINSIC_START);
+    assert(intrinsic < NI_HW_INTRINSIC_END);
+
+    return hwIntrinsicInfoArray[intrinsic - NI_HW_INTRINSIC_START - 1];
+}
+
+//------------------------------------------------------------------------
+// lookupHWIntrinsicISA: map class name to InstructionSet value
+//
+// Arguments:
+//    className -- class name in System.Runtime.Intrinsics.Arm.Arm64
+//
+// Return Value:
+//    Id for the ISA class if enabled.
+//
+InstructionSet Compiler::lookupHWIntrinsicISA(const char* className)
+{
+    if (className != nullptr)
+    {
+        if (strcmp(className, "Base") == 0)
+            return InstructionSet_Base;
+#define HARDWARE_INTRINSIC_CLASS(flag, isa)                                                                            \
+    if (strcmp(className, #isa) == 0)                                                                                  \
+        return InstructionSet_##isa;
+#include "hwintrinsiclistArm64.h"
+    }
+
+    return InstructionSet_NONE;
+}
+
+//------------------------------------------------------------------------
+// lookupHWIntrinsic: map intrinsic name to named intrinsic value
+//
+// Arguments:
+//    methodName -- name of the intrinsic function.
+//    isa        -- instruction set of the intrinsic.
+//
+// Return Value:
+//    Id for the hardware intrinsic.
+//
+// TODO-Throughput: replace sequential search by hash lookup
+NamedIntrinsic Compiler::lookupHWIntrinsic(const char* className, const char* methodName)
+{
+    InstructionSet isa    = lookupHWIntrinsicISA(className);
+    NamedIntrinsic result = NI_Illegal;
+    if (isa != InstructionSet_NONE)
+    {
+        IsaFlag::Flag isaFlag = IsaFlag::flag(isa);
+        for (int i = 0; i < NI_HW_INTRINSIC_END - NI_HW_INTRINSIC_START; i++)
+        {
+            if ((isaFlag & hwIntrinsicInfoArray[i].isaflags) &&
+                strcmp(methodName, hwIntrinsicInfoArray[i].intrinsicName) == 0)
+            {
+                if (compSupports(isa))
+                {
+                    // Intrinsic is supported on platform
+                    result = hwIntrinsicInfoArray[i].intrinsicID;
+                }
+                else
+                {
+                    // When the intrinsic class is not supported
+                    // Return NI_ARM64_PlatformNotSupported for all intrinsics
+                    // Return NI_ARM64_IsSupported_False for the IsSupported property
+                    result = (hwIntrinsicInfoArray[i].intrinsicID != NI_ARM64_IsSupported_True)
+                                 ? NI_ARM64_PlatformNotSupported
+                                 : NI_ARM64_IsSupported_False;
+                }
+                break;
+            }
+        }
+    }
+    return result;
+}
+
+//------------------------------------------------------------------------
+// impUnsupportedHWIntrinsic: returns a node for an unsupported HWIntrinsic
+//
+// Arguments:
+//    helper     - JIT helper ID for the exception to be thrown
+//    method     - method handle of the intrinsic function.
+//    sig        - signature of the intrinsic call
+//    mustExpand - true if the intrinsic must return a GenTree*; otherwise, false
+//
+// Return Value:
+//    a gtNewMustThrowException if mustExpand is true; otherwise, nullptr
+//
+GenTree* Compiler::impUnsupportedHWIntrinsic(unsigned              helper,
+                                             CORINFO_METHOD_HANDLE method,
+                                             CORINFO_SIG_INFO*     sig,
+                                             bool                  mustExpand)
+{
+    // We've hit some error case and may need to return a node for the given error.
+    //
+    // When `mustExpand=false`, we are attempting to inline the intrinsic directly into another method. In this
+    // scenario, we need to return `nullptr` so that a GT_CALL to the intrinsic is emitted instead. This is to
+    // ensure that everything continues to behave correctly when optimizations are enabled (e.g. things like the
+    // inliner may expect the node we return to have a certain signature, and the `MustThrowException` node won't
+    // match that).
+    //
+    // When `mustExpand=true`, we are in a GT_CALL to the intrinsic and are attempting to JIT it. This will generally
+    // be in response to an indirect call (e.g. done via reflection) or in response to an earlier attempt returning
+    // `nullptr` (under `mustExpand=false`). In that scenario, we are safe to return the `MustThrowException` node.
+
+    if (mustExpand)
+    {
+        for (unsigned i = 0; i < sig->numArgs; i++)
+        {
+            impPopStack();
+        }
+
+        return gtNewMustThrowException(helper, JITtype2varType(sig->retType), sig->retTypeClass);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+//------------------------------------------------------------------------
+// impHWIntrinsic: dispatch hardware intrinsics to their own implementation
+// function
+//
+// Arguments:
+//    intrinsic -- id of the intrinsic function.
+//    method    -- method handle of the intrinsic function.
+//    sig       -- signature of the intrinsic call
+//
+// Return Value:
+//    the expanded intrinsic.
+//
+GenTree* Compiler::impHWIntrinsic(NamedIntrinsic        intrinsic,
+                                  CORINFO_METHOD_HANDLE method,
+                                  CORINFO_SIG_INFO*     sig,
+                                  bool                  mustExpand)
+{
+    GenTree*  retNode       = nullptr;
+    GenTree*  op1           = nullptr;
+    GenTree*  op2           = nullptr;
+    var_types simdType      = TYP_UNKNOWN;
+    var_types simdBaseType  = TYP_UNKNOWN;
+    unsigned  simdSizeBytes = 0;
+
+    // Instantiation type check
+    switch (getHWIntrinsicInfo(intrinsic).form)
+    {
+        case HWIntrinsicInfo::SimdBinaryOp:
+        case HWIntrinsicInfo::SimdUnaryOp:
+            simdBaseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSizeBytes);
+
+            if (simdBaseType == TYP_UNKNOWN)
+            {
+                // TODO-FIXME Add CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED
+                unsigned CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED = CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED;
+
+                return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_TYPE_NOT_SUPPORTED, method, sig, mustExpand);
+            }
+            simdType = getSIMDTypeForSize(simdSizeBytes);
+            break;
+        default:
+            break;
+    }
+
+    switch (getHWIntrinsicInfo(intrinsic).form)
+    {
+        case HWIntrinsicInfo::IsSupported:
+            return gtNewIconNode((intrinsic == NI_ARM64_IsSupported_True) ? 1 : 0);
+
+        case HWIntrinsicInfo::Unsupported:
+            return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
+
+        case HWIntrinsicInfo::SimdBinaryOp:
+            // op1 is the first operand
+            // op2 is the second operand
+            op2 = impSIMDPopStack(simdType);
+            op1 = impSIMDPopStack(simdType);
+
+            return gtNewSimdHWIntrinsicNode(simdType, op1, op2, intrinsic, simdBaseType, simdSizeBytes);
+
+        case HWIntrinsicInfo::SimdUnaryOp:
+            op1 = impSIMDPopStack(simdType);
+
+            return gtNewSimdHWIntrinsicNode(simdType, op1, nullptr, intrinsic, simdBaseType, simdSizeBytes);
+
+        default:
+            JITDUMP("Not implemented hardware intrinsic form");
+            assert(!"Unimplemented SIMD Intrinsic form");
+
+            break;
+    }
+    return retNode;
+}
+
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType)
+{
+    if (simdType == TYP_SIMD16)
+    {
+        switch (simdBaseType)
+        {
+            case TYP_FLOAT:
+                return Vector128FloatHandle;
+            case TYP_DOUBLE:
+                return Vector128DoubleHandle;
+            case TYP_INT:
+                return Vector128IntHandle;
+            case TYP_USHORT:
+                return Vector128UShortHandle;
+            case TYP_UBYTE:
+                return Vector128UByteHandle;
+            case TYP_SHORT:
+                return Vector128ShortHandle;
+            case TYP_BYTE:
+                return Vector128ByteHandle;
+            case TYP_LONG:
+                return Vector128LongHandle;
+            case TYP_UINT:
+                return Vector128UIntHandle;
+            case TYP_ULONG:
+                return Vector128ULongHandle;
+            default:
+                assert(!"Didn't find a class handle for simdType");
+        }
+    }
+    else if (simdType == TYP_SIMD8)
+    {
+        switch (simdBaseType)
+        {
+            case TYP_FLOAT:
+                return Vector64FloatHandle;
+            case TYP_UINT:
+                return Vector64UIntHandle;
+            case TYP_USHORT:
+                return Vector64UShortHandle;
+            case TYP_UBYTE:
+                return Vector64UByteHandle;
+            case TYP_SHORT:
+                return Vector64ShortHandle;
+            case TYP_BYTE:
+                return Vector64ByteHandle;
+            case TYP_INT:
+                return Vector64IntHandle;
+            default:
+                assert(!"Didn't find a class handle for simdType");
+        }
+    }
+
+    return NO_CLASS_HANDLE;
+}
+
+#endif // FEATURE_HW_INTRINSICS
diff --git a/src/jit/hwintrinsicArm64.h b/src/jit/hwintrinsicArm64.h
new file mode 100644
index 0000000000..8647702b74
--- /dev/null
+++ b/src/jit/hwintrinsicArm64.h
@@ -0,0 +1,49 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _HW_INTIRNSIC_ARM64_H_
+#define _HW_INTIRNSIC_ARM64_H_
+
+#ifdef FEATURE_HW_INTRINSICS
+
+struct HWIntrinsicInfo
+{
+    // Forms are used to gather inrinsics with similar characteristics
+    // Generally instructions with the same form will be treated
+    // identically by the Importer, LSRA, Lowering, and CodeGen
+    enum Form
+    {
+        // Shared forms
+        IsSupported, // The IsSupported property will use this form
+        Unsupported, // Any intrisic which is unsupported and must throw PlatformNotSupportException will use this form
+        // Non SIMD forms
+        UnaryOp, // Non SIMD intrinsics which take a single argument
+        CrcOp,   // Crc intrinsics.
+        // SIMD common forms
+        SimdBinaryOp, // SIMD intrinsics which take two vector operands and return a vector
+        SimdUnaryOp,  // SIMD intrinsics which take one vector operand and return a vector
+        // SIMD custom forms
+        SimdExtractOp, // SIMD intrinsics which take one vector operand and a lane index and return an element
+        SimdInsertOp,  // SIMD intrinsics which take one vector operand and a lane index and value and return a vector
+        SimdSelectOp,  // BitwiseSelect intrinsic which takes three vector operands and returns a vector
+        SimdSetAllOp,  // Simd intrinsics which take one numeric operand and return a vector
+    };
+
+    // Flags will be used to handle secondary meta-data which will help
+    // Reduce the number of forms
+    enum Flags
+    {
+        None
+    };
+
+    NamedIntrinsic intrinsicID;
+    const char*    intrinsicName;
+    uint64_t       isaflags;
+    Form           form;
+    Flags          flags;
+    instruction    instrs[3];
+};
+
+#endif // FEATURE_HW_INTRINSICS
+#endif // _HW_INTIRNSIC_ARM64_H_
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index 69b3cf54ba..c9ff3edb49 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -15,7 +15,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #pragma hdrstop
 #endif
 
-#if FEATURE_HW_INTRINSICS
+#ifdef FEATURE_HW_INTRINSICS
 
 #include "emit.h"
 #include "codegen.h"
diff --git a/src/jit/hwintrinsiclistArm64.h b/src/jit/hwintrinsiclistArm64.h
index e71ac6cfee..a6ec892039 100644
--- a/src/jit/hwintrinsiclistArm64.h
+++ b/src/jit/hwintrinsiclistArm64.h
@@ -11,29 +11,79 @@
 // clang-format off
 
 #if defined(HARDWARE_INTRINSIC_CLASS)
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_AES,        Aes      )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_ATOMICS,    Atomics  )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_CRC32,      Crc32    )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_DCPOP,      Dcpop    )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_DP,         Dp       )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_FCMA,       Fcma     )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_FP,         Fp       )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_FP16,       Fp16     )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_JSCVT,      Jscvt    )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_LRCPC,      Lrcpc    )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_PMULL,      Pmull    )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA1,       Sha1     )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA2,       Sha2     )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA512,     Sha512   )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA3,       Sha3     )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SIMD,       Simd     )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SIMD_V81,   Simd_v81 )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SIMD_FP16,  Simd_fp16)
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SM3,        Sm3      )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SM4,        Sm4      )
-HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SVE,        Sve      )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_AES       , Aes      )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_ATOMICS   , Atomics  )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_CRC32     , Crc32    )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_DCPOP     , Dcpop    )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_DP        , Dp       )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_FCMA      , Fcma     )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_FP        , Fp       )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_FP16      , Fp16     )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_JSCVT     , Jscvt    )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_LRCPC     , Lrcpc    )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_PMULL     , Pmull    )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA1      , Sha1     )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA2      , Sha2     )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA512    , Sha512   )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SHA3      , Sha3     )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SIMD      , Simd     )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SIMD_V81  , Simd_v81 )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SIMD_FP16 , Simd_fp16)
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SM3       , Sm3      )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SM4       , Sm4      )
+HARDWARE_INTRINSIC_CLASS(JIT_FLAG_HAS_ARM64_SVE       , Sve      )
 #endif // defined(HARDWARE_INTRINSIC_CLASS)
 
+#if defined(HARDWARE_INTRINSIC)
+//                (ID                             Class     Function name                   Form           Floating,    Signed,      Unsigned,     Flags)
+//  None (For internal use only)
+HARDWARE_INTRINSIC(NI_ARM64_NONE_MOV,             None,     None,                           UnaryOp,       INS_mov,     INS_mov,     INS_mov,      None )
+//  Base
+HARDWARE_INTRINSIC(NI_ARM64_BASE_CLS,             Base,     LeadingSignCount,               UnaryOp,       INS_invalid, INS_cls,     INS_cls,      None )
+HARDWARE_INTRINSIC(NI_ARM64_BASE_CLZ,             Base,     LeadingZeroCount,               UnaryOp,       INS_invalid, INS_clz,     INS_clz,      None )
+#if NYI
+// Crc32
+HARDWARE_INTRINSIC(NI_ARM64_CRC32_CRC32,          Crc32,    Crc32,                          CrcOp,         INS_invalid, INS_invalid, INS_crc32,    None )
+HARDWARE_INTRINSIC(NI_ARM64_CRC32_CRC32C,         Crc32,    Crc32C,                         CrcOp,         INS_invalid, INS_invalid, INS_crc32c,   None )
+#endif
+//  Simd
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Abs,             Simd,     Abs,                            SimdUnaryOp,   INS_fabs,    INS_invalid, INS_abs,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Add,             Simd,     Add,                            SimdBinaryOp,  INS_fadd,    INS_add,     INS_add,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_BitwiseAnd,      Simd,     And,                            SimdBinaryOp,  INS_and,     INS_and,     INS_and,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_BitwiseAndNot,   Simd,     AndNot,                         SimdBinaryOp,  INS_bic,     INS_bic,     INS_bic,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_BitwiseOr,       Simd,     Or,                             SimdBinaryOp,  INS_orr,     INS_orr,     INS_orr,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_BitwiseOrNot,    Simd,     OrNot,                          SimdBinaryOp,  INS_orn,     INS_orn,     INS_orn,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_BitwiseNot,      Simd,     Not,                            SimdUnaryOp,   INS_not,     INS_not,     INS_not,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_BitwiseSelect,   Simd,     BitwiseSelect,                  SimdSelectOp,  INS_bsl,     INS_bsl,     INS_bsl,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_BitwiseXor,      Simd,     Xor,                            SimdBinaryOp,  INS_eor,     INS_eor,     INS_eor,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_CLS,             Simd,     LeadingSignCount,               SimdUnaryOp,   INS_invalid, INS_cls,     INS_cls,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_CLZ,             Simd,     LeadingZeroCount,               SimdUnaryOp,   INS_invalid, INS_clz,     INS_clz,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_CNT,             Simd,     PopCount,                       SimdUnaryOp,   INS_invalid, INS_cnt,     INS_cnt,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_EQ,              Simd,     CompareEqual,                   SimdBinaryOp,  INS_fcmeq,   INS_cmeq,    INS_cmeq,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_EQ_ZERO,         Simd,     CompareEqualZero,               SimdUnaryOp,   INS_fcmeq,   INS_cmeq,    INS_cmeq,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_GE,              Simd,     CompareGreaterThanOrEqual,      SimdBinaryOp,  INS_fcmge,   INS_cmge,    INS_cmhs,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_GE_ZERO,         Simd,     CompareGreaterThanOrEqualZero,  SimdUnaryOp,   INS_fcmge,   INS_cmge,    INS_invalid,  None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_GT,              Simd,     CompareGreaterThan,             SimdBinaryOp,  INS_fcmgt,   INS_cmgt,    INS_cmhi,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_GT_ZERO,         Simd,     CompareGreaterThanZero,         SimdUnaryOp,   INS_fcmgt,   INS_cmgt,    INS_invalid,  None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_LE_ZERO,         Simd,     CompareLessThanOrEqualZero,     SimdUnaryOp,   INS_fcmle,   INS_cmle,    INS_cmeq,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_LT_ZERO,         Simd,     CompareLessThanZero,            SimdUnaryOp,   INS_fcmlt,   INS_cmlt,    INS_invalid,  None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_TST,             Simd,     CompareTest,                    SimdBinaryOp,  INS_ctst,    INS_ctst,    INS_ctst,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Div,             Simd,     Divide,                         SimdBinaryOp,  INS_fdiv,    INS_invalid, INS_invalid,  None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Negate,          Simd,     Negate,                         SimdUnaryOp,   INS_fneg,    INS_neg,     INS_invalid,  None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Max,             Simd,     Max,                            SimdBinaryOp,  INS_fmax,    INS_smax,    INS_umax,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Min,             Simd,     Min,                            SimdBinaryOp,  INS_fmin,    INS_smin,    INS_umin,     None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Mul,             Simd,     Multiply,                       SimdBinaryOp,  INS_fmul,    INS_mul,     INS_mul,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Sqrt,            Simd,     Sqrt,                           SimdUnaryOp,   INS_fsqrt,   INS_invalid, INS_invalid,  None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_StaticCast,      Simd,     StaticCast,                     SimdUnaryOp,   INS_mov,     INS_mov,     INS_mov,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_Sub,             Simd,     Subtract,                       SimdBinaryOp,  INS_fsub,    INS_sub,     INS_sub,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_GetItem,         Simd,     Extract,                        SimdExtractOp, INS_mov,     INS_mov,     INS_mov,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_SetItem,         Simd,     Insert,                         SimdInsertOp,  INS_mov,     INS_mov,     INS_mov,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_SetAllVector64,  Simd,     SetAllVector64,                 SimdSetAllOp,  INS_dup,     INS_dup,     INS_dup,      None )
+HARDWARE_INTRINSIC(NI_ARM64_SIMD_SetAllVector128, Simd,     SetAllVector128,                SimdSetAllOp,  INS_dup,     INS_dup,     INS_dup,      None )
+
+#endif
+
+
 #undef HARDWARE_INTRINSIC_CLASS
 #undef HARDWARE_INTRINSIC
 
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index 0321bc2213..e242d3f9d1 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -1,4 +1,3 @@
-
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
@@ -3402,6 +3401,12 @@ GenTree* Compiler::impIntrinsic(GenTree*                newobjThis,
                 return impX86HWIntrinsic(ni, method, sig, mustExpand);
             }
 #endif // _TARGET_XARCH_
+#ifdef _TARGET_ARM64_
+            if (ni > NI_HW_INTRINSIC_START && ni < NI_HW_INTRINSIC_END)
+            {
+                return impHWIntrinsic(ni, method, sig, mustExpand);
+            }
+#endif // _TARGET_XARCH_
 #endif // FEATURE_HW_INTRINSICS
         }
     }
@@ -4118,13 +4123,22 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method)
         }
     }
 
-#if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_)
+#ifdef FEATURE_HW_INTRINSICS
+#if defined(_TARGET_XARCH_)
     if ((namespaceName != nullptr) && strcmp(namespaceName, "System.Runtime.Intrinsics.X86") == 0)
     {
         InstructionSet isa = lookupHWIntrinsicISA(className);
         result             = lookupHWIntrinsic(methodName, isa);
     }
-#endif // defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_XARCH_)
+#elif defined(_TARGET_ARM64_)
+    if ((namespaceName != nullptr) && strcmp(namespaceName, "System.Runtime.Intrinsics.Arm.Arm64") == 0)
+    {
+        result = lookupHWIntrinsic(className, methodName);
+    }
+#else // !defined(_TARGET_XARCH_) && !defined(_TARGET_ARM64_)
+#error Unsupported platform
+#endif // !defined(_TARGET_XARCH_) && !defined(_TARGET_ARM64_)
+#endif // FEATURE_HW_INTRINSICS
     return result;
 }
 
@@ -8739,6 +8753,18 @@ REDO_RETURN_NODE:
             return op;
         }
     }
+#if defined(FEATURE_HW_INTRINSICS) && defined(_TARGET_ARM64_)
+    else if ((op->gtOper == GT_HWIntrinsic) && varTypeIsSIMD(op->gtType))
+    {
+        // TODO-ARM64-FIXME Implement ARM64 ABI for Short Vectors properly
+        // assert(op->gtType == info.compRetNativeType)
+        if (op->gtType != info.compRetNativeType)
+        {
+            // Insert a register move to keep target type of SIMD intrinsic intact
+            op = gtNewScalarHWIntrinsicNode(info.compRetNativeType, op, NI_ARM64_NONE_MOV);
+        }
+    }
+#endif
     else if (op->gtOper == GT_COMMA)
     {
         op->gtOp.gtOp2 = impFixupStructReturnType(op->gtOp.gtOp2, retClsHnd);
diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp
index 5aa3ff1e6b..757ac52db8 100644
--- a/src/jit/lowerarmarch.cpp
+++ b/src/jit/lowerarmarch.cpp
@@ -506,6 +506,19 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode)
 }
 #endif // FEATURE_SIMD
 
+#ifdef FEATURE_HW_INTRINSICS
+//----------------------------------------------------------------------------------------------
+// Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+    ContainCheckHWIntrinsic(node);
+}
+#endif // FEATURE_HW_INTRINSICS
+
 //------------------------------------------------------------------------
 // Containment analysis
 //------------------------------------------------------------------------
@@ -815,6 +828,28 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode)
 }
 #endif // FEATURE_SIMD
 
+#ifdef FEATURE_HW_INTRINSICS
+//----------------------------------------------------------------------------------------------
+// ContainCheckHWIntrinsic: Perform containment analysis for a hardware intrinsic node.
+//
+//  Arguments:
+//     node - The hardware intrinsic node.
+//
+void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
+{
+    NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
+    GenTree*       op1         = node->gtOp.gtOp1;
+    GenTree*       op2         = node->gtOp.gtOp2;
+
+    switch (node->gtHWIntrinsicId)
+    {
+        default:
+            assert((intrinsicID > NI_HW_INTRINSIC_START) && (intrinsicID < NI_HW_INTRINSIC_END));
+            break;
+    }
+}
+#endif // FEATURE_HW_INTRINSICS
+
 #endif // _TARGET_ARMARCH_
 
 #endif // !LEGACY_BACKEND
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index abfdcb9009..e549976f6e 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -285,6 +285,12 @@ void LinearScan::TreeNodeInfoInit(GenTree* tree, TreeNodeInfo* info)
             break;
 #endif // FEATURE_SIMD
 
+#ifdef FEATURE_HW_INTRINSICS
+        case GT_HWIntrinsic:
+            TreeNodeInfoInitHWIntrinsic(tree->AsHWIntrinsic(), info);
+            break;
+#endif // FEATURE_HW_INTRINSICS
+
         case GT_CAST:
         {
             // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
@@ -975,6 +981,27 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree, TreeNodeInfo* info)
 }
 #endif // FEATURE_SIMD
 
+#ifdef FEATURE_HW_INTRINSICS
+//------------------------------------------------------------------------
+// TreeNodeInfoInitHWIntrinsic: Set the NodeInfo for a GT_HWIntrinsic tree.
+//
+// Arguments:
+//    tree       - The GT_HWIntrinsic node of interest
+//
+// Return Value:
+//    None.
+
+void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, TreeNodeInfo* info)
+{
+    NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
+    info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp1);
+    if (intrinsicTree->gtGetOp2IfPresent() != nullptr)
+    {
+        info->srcCount += GetOperandInfo(intrinsicTree->gtOp.gtOp2);
+    }
+}
+#endif
+
 #endif // _TARGET_ARM64_
 
 #endif // !LEGACY_BACKEND
diff --git a/src/jit/namedintrinsiclist.h b/src/jit/namedintrinsiclist.h
index 6387f60cbe..772f40368e 100644
--- a/src/jit/namedintrinsiclist.h
+++ b/src/jit/namedintrinsiclist.h
@@ -14,11 +14,19 @@ enum NamedIntrinsic : unsigned int
     NI_MathF_Round                                             = 2,
     NI_Math_Round                                              = 3,
     NI_System_Collections_Generic_EqualityComparer_get_Default = 4,
-#if FEATURE_HW_INTRINSICS
+#ifdef FEATURE_HW_INTRINSICS
     NI_HW_INTRINSIC_START,
+#if defined(_TARGET_XARCH_)
 #define HARDWARE_INTRINSIC(id, name, isa, ival, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \
     NI_##id,
 #include "hwintrinsiclistxarch.h"
+#elif defined(_TARGET_ARM64_)
+    NI_ARM64_IsSupported_False,
+    NI_ARM64_IsSupported_True,
+    NI_ARM64_PlatformNotSupported,
+#define HARDWARE_INTRINSIC(id, isa, name, form, ins0, ins1, ins2, flags) id,
+#include "hwintrinsiclistArm64.h"
+#endif // !defined(_TARGET_XARCH_) && !defined(_TARGET_ARM64_)
     NI_HW_INTRINSIC_END
 #endif
 };
diff --git a/src/jit/protononjit/CMakeLists.txt b/src/jit/protononjit/CMakeLists.txt
index 1d82086bed..cb1c42cc43 100644
--- a/src/jit/protononjit/CMakeLists.txt
+++ b/src/jit/protononjit/CMakeLists.txt
@@ -5,8 +5,6 @@ add_definitions(-DFEATURE_NO_HOST)
 add_definitions(-DSELF_NO_HOST)
 remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
 
-remove_definitions(-DFEATURE_HW_INTRINSICS)
-
 if(FEATURE_READYTORUN)
   add_definitions(-DFEATURE_READYTORUN_COMPILER)
 endif(FEATURE_READYTORUN)
@@ -14,6 +12,7 @@ endif(FEATURE_READYTORUN)
 if (CLR_CMAKE_PLATFORM_ARCH_I386)
     remove_definitions(-D_TARGET_X86_=1)
     remove_definitions(-DFEATURE_SIMD)
+    remove_definitions(-DFEATURE_HW_INTRINSICS)
     add_definitions(-D_TARGET_ARM_)
     set(JIT_ARCH_ALTJIT_SOURCES ${JIT_ARM_SOURCES})
     set(JIT_ARCH_LINK_LIBRARIES gcinfo_arm)
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 520d62416c..e67662361a 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -482,12 +482,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
             JITDUMP("  Known type Vector128<ulong>\n");
         }
 #if defined(_TARGET_ARM64_)
-        else if (typeHnd == Vector64DoubleHandle)
-        {
-            simdBaseType = TYP_DOUBLE;
-            size         = Vector64SizeBytes;
-            JITDUMP("  Known type Vector64<double>\n");
-        }
         else if (typeHnd == Vector64IntHandle)
         {
             simdBaseType = TYP_INT;
@@ -524,18 +518,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
             size         = Vector64SizeBytes;
             JITDUMP("  Known type Vector64<byte>\n");
         }
-        else if (typeHnd == Vector64LongHandle)
-        {
-            simdBaseType = TYP_LONG;
-            size         = Vector64SizeBytes;
-            JITDUMP("  Known type Vector64<long>\n");
-        }
-        else if (typeHnd == Vector64ULongHandle)
-        {
-            simdBaseType = TYP_ULONG;
-            size         = Vector64SizeBytes;
-            JITDUMP("  Known type Vector64<ulong>\n");
-        }
 #endif // defined(_TARGET_ARM64_)
 
         // slow path search
@@ -686,11 +668,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
                             simdBaseType        = TYP_FLOAT;
                             JITDUMP("  Found type Hardware Intrinsic SIMD Vector64<float>\n");
                             break;
-                        case CORINFO_TYPE_DOUBLE:
-                            Vector64DoubleHandle = typeHnd;
-                            simdBaseType         = TYP_DOUBLE;
-                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector64<double>\n");
-                            break;
                         case CORINFO_TYPE_INT:
                             Vector64IntHandle = typeHnd;
                             simdBaseType      = TYP_INT;
@@ -711,16 +688,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
                             simdBaseType         = TYP_USHORT;
                             JITDUMP("  Found type Hardware Intrinsic SIMD Vector64<ushort>\n");
                             break;
-                        case CORINFO_TYPE_LONG:
-                            Vector64LongHandle = typeHnd;
-                            simdBaseType       = TYP_LONG;
-                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector64<long>\n");
-                            break;
-                        case CORINFO_TYPE_ULONG:
-                            Vector64ULongHandle = typeHnd;
-                            simdBaseType        = TYP_ULONG;
-                            JITDUMP("  Found type Hardware Intrinsic SIMD Vector64<ulong>\n");
-                            break;
                         case CORINFO_TYPE_UBYTE:
                             Vector64UByteHandle = typeHnd;
                             simdBaseType        = TYP_UBYTE;
diff --git a/src/mscorlib/System.Private.CoreLib.csproj b/src/mscorlib/System.Private.CoreLib.csproj
index 904bb48980..ec57b2a622 100644
--- a/src/mscorlib/System.Private.CoreLib.csproj
+++ b/src/mscorlib/System.Private.CoreLib.csproj
@@ -296,6 +296,12 @@
     <Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\X86\Sse42.PlatformNotSupported.cs" />
     <Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\X86\Ssse3.PlatformNotSupported.cs" />
   </ItemGroup>
+  <ItemGroup Condition="'$(Platform)' == 'arm64'">
+    <Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\Arm\Arm64\Simd.cs" />
+  </ItemGroup>
+  <ItemGroup Condition="'$(Platform)' != 'arm64'">
+    <Compile Include="$(BclSourcesRoot)\System\Runtime\Intrinsics\Arm\Arm64\Simd.PlatformNotSupported.cs" />
+  </ItemGroup>
   <ItemGroup>
     <Compile Include="$(BclSourcesRoot)\System\AppContext\AppContext.cs" />
     <Compile Include="$(BclSourcesRoot)\System\AppContext\AppContextSwitches.cs" />
diff --git a/src/mscorlib/src/System/Runtime/Intrinsics/Arm/Arm64/Simd.PlatformNotSupported.cs b/src/mscorlib/src/System/Runtime/Intrinsics/Arm/Arm64/Simd.PlatformNotSupported.cs
new file mode 100644
index 0000000000..4b377902c7
--- /dev/null
+++ b/src/mscorlib/src/System/Runtime/Intrinsics/Arm/Arm64/Simd.PlatformNotSupported.cs
@@ -0,0 +1,344 @@
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+
+namespace System.Runtime.Intrinsics.Arm.Arm64
+{
+    /// <summary>
+    /// This class provides access to the Arm64 AdvSIMD intrinsics
+    ///
+    /// Arm64 CPU indicate support for this feature by setting
+    /// ID_AA64PFR0_EL1.AdvSIMD == 0 or better.
+    /// </summary>
+    [CLSCompliant(false)]
+    public static class Simd
+    {
+        /// <summary>
+        /// IsSupported property indicates whether any method provided
+        /// by this class is supported by the current runtime.
+        /// </summary>
+        public static bool IsSupported { get { return false; }}
+
+        /// <summary>
+        /// Vector abs
+        /// Corresponds to vector forms of ARM64 ABS & FABS
+        /// </summary>
+        public static Vector64<byte>    Abs(Vector64<sbyte>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<ushort>  Abs(Vector64<short>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<uint>    Abs(Vector64<int>     value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<float>   Abs(Vector64<float>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<byte>   Abs(Vector128<sbyte>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<ushort> Abs(Vector128<short>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<uint>   Abs(Vector128<int>    value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<ulong>  Abs(Vector128<long>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float>  Abs(Vector128<float>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Abs(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector add
+        /// Corresponds to vector forms of ARM64 ADD & FADD
+        /// </summary>
+        public static Vector64<T>  Add<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> Add<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector and
+        /// Corresponds to vector forms of ARM64 AND
+        /// </summary>
+        public static Vector64<T>  And<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> And<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector and not
+        /// Corresponds to vector forms of ARM64 BIC
+        /// </summary>
+        public static Vector64<T>  AndNot<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> AndNot<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector BitwiseSelect
+        /// For each bit in the vector result[bit] = sel[bit] ? left[bit] : right[bit]
+        /// Corresponds to vector forms of ARM64 BSL (Also BIF & BIT)
+        /// </summary>
+        public static Vector64<T>  BitwiseSelect<T>(Vector64<T>  sel, Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> BitwiseSelect<T>(Vector128<T> sel, Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareEqual
+        /// For each element result[elem] = (left[elem] == right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMEQ & FCMEQ
+        /// </summary>
+        public static Vector64<T>  CompareEqual<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareEqual<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareEqualZero
+        /// For each element result[elem] = (left[elem] == 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMEQ & FCMEQ
+        /// </summary>
+        public static Vector64<T>  CompareEqualZero<T>(Vector64<T>  value) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareEqualZero<T>(Vector128<T> value) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareGreaterThan
+        /// For each element result[elem] = (left[elem] > right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT/CMHI & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThan<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareGreaterThan<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareGreaterThanZero
+        /// For each element result[elem] = (left[elem] > 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThanZero<T>(Vector64<T>  value) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareGreaterThanZero<T>(Vector128<T> value) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareGreaterThanOrEqual
+        /// For each element result[elem] = (left[elem] >= right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGE/CMHS & FCMGE
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThanOrEqual<T>(Vector64<T>  left, Vector64<T>    right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareGreaterThanOrEqual<T>(Vector128<T> left, Vector128<T>   right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareGreaterThanOrEqualZero
+        /// For each element result[elem] = (left[elem] >= 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGE & FCMGE
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThanOrEqualZero<T>(Vector64<T>  value) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareGreaterThanOrEqualZero<T>(Vector128<T> value) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareLessThanZero
+        /// For each element result[elem] = (left[elem] < 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareLessThanZero<T>(Vector64<T>  value) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareLessThanZero<T>(Vector128<T> value) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareLessThanOrEqualZero
+        /// For each element result[elem] = (left[elem] < 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareLessThanOrEqualZero<T>(Vector64<T>  value) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareLessThanOrEqualZero<T>(Vector128<T> value) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector CompareTest
+        /// For each element result[elem] = (left[elem] & right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMTST
+        /// </summary>
+        public static Vector64<T>  CompareTest<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> CompareTest<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// TBD Convert...
+
+        /// <summary>
+        /// Vector Divide
+        /// Corresponds to vector forms of ARM64 FDIV
+        /// </summary>
+        public static Vector64<float>   Divide(Vector64<float>   left, Vector64<float>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float>  Divide(Vector128<float>  left, Vector128<float>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector extract item
+        ///
+        /// result = vector[index]
+        ///
+        /// Note: In order to be inlined, index must be a JIT time const expression which can be used to
+        /// populate the literal immediate field.  Use of a non constant will result in generation of a switch table
+        ///
+        /// Corresponds to vector forms of ARM64 MOV
+        /// </summary>
+        public static T Extract<T>(Vector64<T>  vector, byte index) where T : struct { throw new PlatformNotSupportedException(); }
+        public static T Extract<T>(Vector128<T> vector, byte index) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector insert item
+        ///
+        /// result = vector;
+        /// result[index] = data;
+        ///
+        /// Note: In order to be inlined, index must be a JIT time const expression which can be used to
+        /// populate the literal immediate field.  Use of a non constant will result in generation of a switch table
+        ///
+        /// Corresponds to vector forms of ARM64 INS
+        /// </summary>
+        public static Vector64<T>  Insert<T>(Vector64<T>  vector, byte index, T data) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> Insert<T>(Vector128<T> vector, byte index, T data) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector LeadingSignCount
+        /// Corresponds to vector forms of ARM64 CLS
+        /// </summary>
+        public static Vector64<sbyte>  LeadingSignCount(Vector64<sbyte>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<short>  LeadingSignCount(Vector64<short>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<int>    LeadingSignCount(Vector64<int>    value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<sbyte> LeadingSignCount(Vector128<sbyte> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<short> LeadingSignCount(Vector128<short> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<int>   LeadingSignCount(Vector128<int>   value) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector LeadingZeroCount
+        /// Corresponds to vector forms of ARM64 CLZ
+        /// </summary>
+        public static Vector64<byte>    LeadingZeroCount(Vector64<byte>    value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<sbyte>   LeadingZeroCount(Vector64<sbyte>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<ushort>  LeadingZeroCount(Vector64<ushort>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<short>   LeadingZeroCount(Vector64<short>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<uint>    LeadingZeroCount(Vector64<uint>    value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<int>     LeadingZeroCount(Vector64<int>     value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<byte>   LeadingZeroCount(Vector128<byte>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<sbyte>  LeadingZeroCount(Vector128<sbyte>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<ushort> LeadingZeroCount(Vector128<ushort> value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<short>  LeadingZeroCount(Vector128<short>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<uint>   LeadingZeroCount(Vector128<uint>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<int>    LeadingZeroCount(Vector128<int>    value) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector max
+        /// Corresponds to vector forms of ARM64 SMAX, UMAX & FMAX
+        /// </summary>
+        public static Vector64<byte>    Max(Vector64<byte>    left, Vector64<byte>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<sbyte>   Max(Vector64<sbyte>   left, Vector64<sbyte>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<ushort>  Max(Vector64<ushort>  left, Vector64<ushort>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<short>   Max(Vector64<short>   left, Vector64<short>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<uint>    Max(Vector64<uint>    left, Vector64<uint>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<int>     Max(Vector64<int>     left, Vector64<int>     right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<float>   Max(Vector64<float>   left, Vector64<float>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<byte>   Max(Vector128<byte>   left, Vector128<byte>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<sbyte>  Max(Vector128<sbyte>  left, Vector128<sbyte>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<ushort> Max(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<short>  Max(Vector128<short>  left, Vector128<short>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<uint>   Max(Vector128<uint>   left, Vector128<uint>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<int>    Max(Vector128<int>    left, Vector128<int>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float>  Max(Vector128<float>  left, Vector128<float>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Max(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector min
+        /// Corresponds to vector forms of ARM64 SMIN, UMIN & FMIN
+        /// </summary>
+        public static Vector64<byte>    Min(Vector64<byte>    left, Vector64<byte>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<sbyte>   Min(Vector64<sbyte>   left, Vector64<sbyte>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<ushort>  Min(Vector64<ushort>  left, Vector64<ushort>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<short>   Min(Vector64<short>   left, Vector64<short>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<uint>    Min(Vector64<uint>    left, Vector64<uint>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<int>     Min(Vector64<int>     left, Vector64<int>     right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<float>   Min(Vector64<float>   left, Vector64<float>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<byte>   Min(Vector128<byte>   left, Vector128<byte>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<sbyte>  Min(Vector128<sbyte>  left, Vector128<sbyte>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<ushort> Min(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<short>  Min(Vector128<short>  left, Vector128<short>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<uint>   Min(Vector128<uint>   left, Vector128<uint>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<int>    Min(Vector128<int>    left, Vector128<int>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float>  Min(Vector128<float>  left, Vector128<float>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Min(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
+        /// TBD MOV, FMOV
+
+        /// <summary>
+        /// Vector multiply
+        ///
+        /// For each element result[elem] = left[elem] * right[elem]
+        ///
+        /// Corresponds to vector forms of ARM64 MUL & FMUL
+        /// </summary>
+        public static Vector64<byte>    Multiply(Vector64<byte>    left, Vector64<byte>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<sbyte>   Multiply(Vector64<sbyte>   left, Vector64<sbyte>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<ushort>  Multiply(Vector64<ushort>  left, Vector64<ushort>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<short>   Multiply(Vector64<short>   left, Vector64<short>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<uint>    Multiply(Vector64<uint>    left, Vector64<uint>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<int>     Multiply(Vector64<int>     left, Vector64<int>     right) { throw new PlatformNotSupportedException(); }
+        public static Vector64<float>   Multiply(Vector64<float>   left, Vector64<float>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<byte>   Multiply(Vector128<byte>   left, Vector128<byte>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<sbyte>  Multiply(Vector128<sbyte>  left, Vector128<sbyte>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<ushort> Multiply(Vector128<ushort> left, Vector128<ushort> right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<short>  Multiply(Vector128<short>  left, Vector128<short>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<uint>   Multiply(Vector128<uint>   left, Vector128<uint>   right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<int>    Multiply(Vector128<int>    left, Vector128<int>    right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float>  Multiply(Vector128<float>  left, Vector128<float>  right) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Multiply(Vector128<double> left, Vector128<double> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector negate
+        /// Corresponds to vector forms of ARM64 NEG & FNEG
+        /// </summary>
+        public static Vector64<sbyte>   Negate(Vector64<sbyte>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<short>   Negate(Vector64<short>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<int>     Negate(Vector64<int>     value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<float>   Negate(Vector64<float>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<sbyte>  Negate(Vector128<sbyte>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<short>  Negate(Vector128<short>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<int>    Negate(Vector128<int>    value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<long>   Negate(Vector128<long>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float>  Negate(Vector128<float>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Negate(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector not
+        /// Corresponds to vector forms of ARM64 NOT
+        /// </summary>
+        public static Vector64<T>  Not<T>(Vector64<T>  value) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> Not<T>(Vector128<T> value) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector or
+        /// Corresponds to vector forms of ARM64 ORR
+        /// </summary>
+        public static Vector64<T>  Or<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> Or<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector or not
+        /// Corresponds to vector forms of ARM64 ORN
+        /// </summary>
+        public static Vector64<T>  OrNot<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> OrNot<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector PopCount
+        /// Corresponds to vector forms of ARM64 CNT
+        /// </summary>
+        public static Vector64<byte>    PopCount(Vector64<byte>    value) { throw new PlatformNotSupportedException(); }
+        public static Vector64<sbyte>   PopCount(Vector64<sbyte>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<byte>   PopCount(Vector128<byte>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<sbyte>  PopCount(Vector128<sbyte>  value) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// SetVector* Fill vector elements by replicating element value
+        ///
+        /// Corresponds to vector forms of ARM64 DUP (general), DUP (element 0), FMOV (vector, immediate)
+        /// </summary>
+        public static Vector64<T>    SetAllVector64<T>(T value) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T>   SetAllVector128<T>(T value) where T : struct { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector square root
+        /// Corresponds to vector forms of ARM64 FRSQRT
+        /// </summary>
+        public static Vector64<float>   Sqrt(Vector64<float>   value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<float>  Sqrt(Vector128<float>  value) { throw new PlatformNotSupportedException(); }
+        public static Vector128<double> Sqrt(Vector128<double> value) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// Vector subtract
+        /// Corresponds to vector forms of ARM64 SUB & FSUB
+        /// </summary>
+        public static Vector64<T>  Subtract<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> Subtract<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+
+
+        /// <summary>
+        /// Vector exclusive or
+        /// Corresponds to vector forms of ARM64 EOR
+        /// </summary>
+        public static Vector64<T>  Xor<T>(Vector64<T>  left, Vector64<T>  right) where T : struct { throw new PlatformNotSupportedException(); }
+        public static Vector128<T> Xor<T>(Vector128<T> left, Vector128<T> right) where T : struct { throw new PlatformNotSupportedException(); }
+    }
+}
diff --git a/src/mscorlib/src/System/Runtime/Intrinsics/Arm/Arm64/Simd.cs b/src/mscorlib/src/System/Runtime/Intrinsics/Arm/Arm64/Simd.cs
new file mode 100644
index 0000000000..f162483daf
--- /dev/null
+++ b/src/mscorlib/src/System/Runtime/Intrinsics/Arm/Arm64/Simd.cs
@@ -0,0 +1,344 @@
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+
+namespace System.Runtime.Intrinsics.Arm.Arm64
+{
+    /// <summary>
+    /// This class provides access to the Arm64 AdvSIMD intrinsics
+    ///
+    /// Arm64 CPU indicate support for this feature by setting
+    /// ID_AA64PFR0_EL1.AdvSIMD == 0 or better.
+    /// </summary>
+    [CLSCompliant(false)]
+    public static class Simd
+    {
+        /// <summary>
+        /// IsSupported property indicates whether any method provided
+        /// by this class is supported by the current runtime.
+        /// </summary>
+        public static bool IsSupported { get => IsSupported; }
+
+        /// <summary>
+        /// Vector abs
+        /// Corresponds to vector forms of ARM64 ABS & FABS
+        /// </summary>
+        public static Vector64<byte>    Abs(Vector64<sbyte>   value) => Abs(value);
+        public static Vector64<ushort>  Abs(Vector64<short>   value) => Abs(value);
+        public static Vector64<uint>    Abs(Vector64<int>     value) => Abs(value);
+        public static Vector64<float>   Abs(Vector64<float>   value) => Abs(value);
+        public static Vector128<byte>   Abs(Vector128<sbyte>  value) => Abs(value);
+        public static Vector128<ushort> Abs(Vector128<short>  value) => Abs(value);
+        public static Vector128<uint>   Abs(Vector128<int>    value) => Abs(value);
+        public static Vector128<ulong>  Abs(Vector128<long>   value) => Abs(value);
+        public static Vector128<float>  Abs(Vector128<float>  value) => Abs(value);
+        public static Vector128<double> Abs(Vector128<double> value) => Abs(value);
+
+        /// <summary>
+        /// Vector add
+        /// Corresponds to vector forms of ARM64 ADD & FADD
+        /// </summary>
+        public static Vector64<T>  Add<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => Add(left, right);
+        public static Vector128<T> Add<T>(Vector128<T> left, Vector128<T> right) where T : struct => Add(left, right);
+
+        /// <summary>
+        /// Vector and
+        /// Corresponds to vector forms of ARM64 AND
+        /// </summary>
+        public static Vector64<T>  And<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => And(left, right);
+        public static Vector128<T> And<T>(Vector128<T> left, Vector128<T> right) where T : struct => And(left, right);
+
+        /// <summary>
+        /// Vector and not
+        /// Corresponds to vector forms of ARM64 BIC
+        /// </summary>
+        public static Vector64<T>  AndNot<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => AndNot(left, right);
+        public static Vector128<T> AndNot<T>(Vector128<T> left, Vector128<T> right) where T : struct => AndNot(left, right);
+
+        /// <summary>
+        /// Vector BitwiseSelect
+        /// For each bit in the vector result[bit] = sel[bit] ? left[bit] : right[bit]
+        /// Corresponds to vector forms of ARM64 BSL (Also BIF & BIT)
+        /// </summary>
+        public static Vector64<T>  BitwiseSelect<T>(Vector64<T>  sel, Vector64<T>  left, Vector64<T>  right) where T : struct => BitwiseSelect(sel, left, right);
+        public static Vector128<T> BitwiseSelect<T>(Vector128<T> sel, Vector128<T> left, Vector128<T> right) where T : struct => BitwiseSelect(sel, left, right);
+
+        /// <summary>
+        /// Vector CompareEqual
+        /// For each element result[elem] = (left[elem] == right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMEQ & FCMEQ
+        /// </summary>
+        public static Vector64<T>  CompareEqual<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => CompareEqual(left, right);
+        public static Vector128<T> CompareEqual<T>(Vector128<T> left, Vector128<T> right) where T : struct => CompareEqual(left, right);
+
+        /// <summary>
+        /// Vector CompareEqualZero
+        /// For each element result[elem] = (left[elem] == 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMEQ & FCMEQ
+        /// </summary>
+        public static Vector64<T>  CompareEqualZero<T>(Vector64<T>  value) where T : struct => CompareEqualZero(value);
+        public static Vector128<T> CompareEqualZero<T>(Vector128<T> value) where T : struct => CompareEqualZero(value);
+
+        /// <summary>
+        /// Vector CompareGreaterThan
+        /// For each element result[elem] = (left[elem] > right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT/CMHI & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThan<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => CompareGreaterThan(left, right);
+        public static Vector128<T> CompareGreaterThan<T>(Vector128<T> left, Vector128<T> right) where T : struct => CompareGreaterThan(left, right);
+
+        /// <summary>
+        /// Vector CompareGreaterThanZero
+        /// For each element result[elem] = (left[elem] > 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThanZero<T>(Vector64<T>  value) where T : struct => CompareGreaterThanZero(value);
+        public static Vector128<T> CompareGreaterThanZero<T>(Vector128<T> value) where T : struct => CompareGreaterThanZero(value);
+
+        /// <summary>
+        /// Vector CompareGreaterThanOrEqual
+        /// For each element result[elem] = (left[elem] >= right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGE/CMHS & FCMGE
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThanOrEqual<T>(Vector64<T>  left, Vector64<T>    right) where T : struct => CompareGreaterThanOrEqual(left, right);
+        public static Vector128<T> CompareGreaterThanOrEqual<T>(Vector128<T> left, Vector128<T>   right) where T : struct => CompareGreaterThanOrEqual(left, right);
+
+        /// <summary>
+        /// Vector CompareGreaterThanOrEqualZero
+        /// For each element result[elem] = (left[elem] >= 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGE & FCMGE
+        /// </summary>
+        public static Vector64<T>  CompareGreaterThanOrEqualZero<T>(Vector64<T>  value) where T : struct => CompareGreaterThanOrEqualZero(value);
+        public static Vector128<T> CompareGreaterThanOrEqualZero<T>(Vector128<T> value) where T : struct => CompareGreaterThanOrEqualZero(value);
+
+        /// <summary>
+        /// Vector CompareLessThanZero
+        /// For each element result[elem] = (left[elem] < 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareLessThanZero<T>(Vector64<T>  value) where T : struct => CompareLessThanZero(value);
+        public static Vector128<T> CompareLessThanZero<T>(Vector128<T> value) where T : struct => CompareLessThanZero(value);
+
+        /// <summary>
+        /// Vector CompareLessThanOrEqualZero
+        /// For each element result[elem] = (left[elem] < 0) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMGT & FCMGT
+        /// </summary>
+        public static Vector64<T>  CompareLessThanOrEqualZero<T>(Vector64<T>  value) where T : struct => CompareLessThanOrEqualZero(value);
+        public static Vector128<T> CompareLessThanOrEqualZero<T>(Vector128<T> value) where T : struct => CompareLessThanOrEqualZero(value);
+
+        /// <summary>
+        /// Vector CompareTest
+        /// For each element result[elem] = (left[elem] & right[elem]) ? ~0 : 0
+        /// Corresponds to vector forms of ARM64 CMTST
+        /// </summary>
+        public static Vector64<T>  CompareTest<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => CompareTest(left, right);
+        public static Vector128<T> CompareTest<T>(Vector128<T> left, Vector128<T> right) where T : struct => CompareTest(left, right);
+
+        /// TBD Convert...
+
+        /// <summary>
+        /// Vector Divide
+        /// Corresponds to vector forms of ARM64 FDIV
+        /// </summary>
+        public static Vector64<float>   Divide(Vector64<float>   left, Vector64<float>   right) => Divide(left, right);
+        public static Vector128<float>  Divide(Vector128<float>  left, Vector128<float>  right) => Divide(left, right);
+        public static Vector128<double> Divide(Vector128<double> left, Vector128<double> right) => Divide(left, right);
+
+        /// <summary>
+        /// Vector extract item
+        ///
+        /// result = vector[index]
+        ///
+        /// Note: In order to be inlined, index must be a JIT time const expression which can be used to
+        /// populate the literal immediate field.  Use of a non constant will result in generation of a switch table
+        ///
+        /// Corresponds to vector forms of ARM64 MOV
+        /// </summary>
+        public static T Extract<T>(Vector64<T>  vector, byte index) where T : struct => Extract(vector, index);
+        public static T Extract<T>(Vector128<T> vector, byte index) where T : struct => Extract(vector, index);
+
+        /// <summary>
+        /// Vector insert item
+        ///
+        /// result = vector;
+        /// result[index] = data;
+        ///
+        /// Note: In order to be inlined, index must be a JIT time const expression which can be used to
+        /// populate the literal immediate field.  Use of a non constant will result in generation of a switch table
+        ///
+        /// Corresponds to vector forms of ARM64 INS
+        /// </summary>
+        public static Vector64<T>  Insert<T>(Vector64<T>  vector, byte index, T data) where T : struct => Insert(vector, index, data);
+        public static Vector128<T> Insert<T>(Vector128<T> vector, byte index, T data) where T : struct => Insert(vector, index, data);
+
+        /// <summary>
+        /// Vector LeadingSignCount
+        /// Corresponds to vector forms of ARM64 CLS
+        /// </summary>
+        public static Vector64<sbyte>  LeadingSignCount(Vector64<sbyte>  value) => LeadingSignCount(value);
+        public static Vector64<short>  LeadingSignCount(Vector64<short>  value) => LeadingSignCount(value);
+        public static Vector64<int>    LeadingSignCount(Vector64<int>    value) => LeadingSignCount(value);
+        public static Vector128<sbyte> LeadingSignCount(Vector128<sbyte> value) => LeadingSignCount(value);
+        public static Vector128<short> LeadingSignCount(Vector128<short> value) => LeadingSignCount(value);
+        public static Vector128<int>   LeadingSignCount(Vector128<int>   value) => LeadingSignCount(value);
+
+        /// <summary>
+        /// Vector LeadingZeroCount
+        /// Corresponds to vector forms of ARM64 CLZ
+        /// </summary>
+        public static Vector64<byte>    LeadingZeroCount(Vector64<byte>    value) => LeadingZeroCount(value);
+        public static Vector64<sbyte>   LeadingZeroCount(Vector64<sbyte>   value) => LeadingZeroCount(value);
+        public static Vector64<ushort>  LeadingZeroCount(Vector64<ushort>  value) => LeadingZeroCount(value);
+        public static Vector64<short>   LeadingZeroCount(Vector64<short>   value) => LeadingZeroCount(value);
+        public static Vector64<uint>    LeadingZeroCount(Vector64<uint>    value) => LeadingZeroCount(value);
+        public static Vector64<int>     LeadingZeroCount(Vector64<int>     value) => LeadingZeroCount(value);
+        public static Vector128<byte>   LeadingZeroCount(Vector128<byte>   value) => LeadingZeroCount(value);
+        public static Vector128<sbyte>  LeadingZeroCount(Vector128<sbyte>  value) => LeadingZeroCount(value);
+        public static Vector128<ushort> LeadingZeroCount(Vector128<ushort> value) => LeadingZeroCount(value);
+        public static Vector128<short>  LeadingZeroCount(Vector128<short>  value) => LeadingZeroCount(value);
+        public static Vector128<uint>   LeadingZeroCount(Vector128<uint>   value) => LeadingZeroCount(value);
+        public static Vector128<int>    LeadingZeroCount(Vector128<int>    value) => LeadingZeroCount(value);
+
+        /// <summary>
+        /// Vector max
+        /// Corresponds to vector forms of ARM64 SMAX, UMAX & FMAX
+        /// </summary>
+        public static Vector64<byte>    Max(Vector64<byte>    left, Vector64<byte>    right) => Max(left, right);
+        public static Vector64<sbyte>   Max(Vector64<sbyte>   left, Vector64<sbyte>   right) => Max(left, right);
+        public static Vector64<ushort>  Max(Vector64<ushort>  left, Vector64<ushort>  right) => Max(left, right);
+        public static Vector64<short>   Max(Vector64<short>   left, Vector64<short>   right) => Max(left, right);
+        public static Vector64<uint>    Max(Vector64<uint>    left, Vector64<uint>    right) => Max(left, right);
+        public static Vector64<int>     Max(Vector64<int>     left, Vector64<int>     right) => Max(left, right);
+        public static Vector64<float>   Max(Vector64<float>   left, Vector64<float>   right) => Max(left, right);
+        public static Vector128<byte>   Max(Vector128<byte>   left, Vector128<byte>   right) => Max(left, right);
+        public static Vector128<sbyte>  Max(Vector128<sbyte>  left, Vector128<sbyte>  right) => Max(left, right);
+        public static Vector128<ushort> Max(Vector128<ushort> left, Vector128<ushort> right) => Max(left, right);
+        public static Vector128<short>  Max(Vector128<short>  left, Vector128<short>  right) => Max(left, right);
+        public static Vector128<uint>   Max(Vector128<uint>   left, Vector128<uint>   right) => Max(left, right);
+        public static Vector128<int>    Max(Vector128<int>    left, Vector128<int>    right) => Max(left, right);
+        public static Vector128<float>  Max(Vector128<float>  left, Vector128<float>  right) => Max(left, right);
+        public static Vector128<double> Max(Vector128<double> left, Vector128<double> right) => Max(left, right);
+
+        /// <summary>
+        /// Vector min
+        /// Corresponds to vector forms of ARM64 SMIN, UMIN & FMIN
+        /// </summary>
+        public static Vector64<byte>    Min(Vector64<byte>    left, Vector64<byte>    right) => Min(left, right);
+        public static Vector64<sbyte>   Min(Vector64<sbyte>   left, Vector64<sbyte>   right) => Min(left, right);
+        public static Vector64<ushort>  Min(Vector64<ushort>  left, Vector64<ushort>  right) => Min(left, right);
+        public static Vector64<short>   Min(Vector64<short>   left, Vector64<short>   right) => Min(left, right);
+        public static Vector64<uint>    Min(Vector64<uint>    left, Vector64<uint>    right) => Min(left, right);
+        public static Vector64<int>     Min(Vector64<int>     left, Vector64<int>     right) => Min(left, right);
+        public static Vector64<float>   Min(Vector64<float>   left, Vector64<float>   right) => Min(left, right);
+        public static Vector128<byte>   Min(Vector128<byte>   left, Vector128<byte>   right) => Min(left, right);
+        public static Vector128<sbyte>  Min(Vector128<sbyte>  left, Vector128<sbyte>  right) => Min(left, right);
+        public static Vector128<ushort> Min(Vector128<ushort> left, Vector128<ushort> right) => Min(left, right);
+        public static Vector128<short>  Min(Vector128<short>  left, Vector128<short>  right) => Min(left, right);
+        public static Vector128<uint>   Min(Vector128<uint>   left, Vector128<uint>   right) => Min(left, right);
+        public static Vector128<int>    Min(Vector128<int>    left, Vector128<int>    right) => Min(left, right);
+        public static Vector128<float>  Min(Vector128<float>  left, Vector128<float>  right) => Min(left, right);
+        public static Vector128<double> Min(Vector128<double> left, Vector128<double> right) => Min(left, right);
+
+        /// TBD MOV, FMOV
+
+        /// <summary>
+        /// Vector multiply
+        ///
+        /// For each element result[elem] = left[elem] * right[elem]
+        ///
+        /// Corresponds to vector forms of ARM64 MUL & FMUL
+        /// </summary>
+        public static Vector64<byte>    Multiply(Vector64<byte>    left, Vector64<byte>    right) => Multiply(left, right);
+        public static Vector64<sbyte>   Multiply(Vector64<sbyte>   left, Vector64<sbyte>   right) => Multiply(left, right);
+        public static Vector64<ushort>  Multiply(Vector64<ushort>  left, Vector64<ushort>  right) => Multiply(left, right);
+        public static Vector64<short>   Multiply(Vector64<short>   left, Vector64<short>   right) => Multiply(left, right);
+        public static Vector64<uint>    Multiply(Vector64<uint>    left, Vector64<uint>    right) => Multiply(left, right);
+        public static Vector64<int>     Multiply(Vector64<int>     left, Vector64<int>     right) => Multiply(left, right);
+        public static Vector64<float>   Multiply(Vector64<float>   left, Vector64<float>   right) => Multiply(left, right);
+        public static Vector128<byte>   Multiply(Vector128<byte>   left, Vector128<byte>   right) => Multiply(left, right);
+        public static Vector128<sbyte>  Multiply(Vector128<sbyte>  left, Vector128<sbyte>  right) => Multiply(left, right);
+        public static Vector128<ushort> Multiply(Vector128<ushort> left, Vector128<ushort> right) => Multiply(left, right);
+        public static Vector128<short>  Multiply(Vector128<short>  left, Vector128<short>  right) => Multiply(left, right);
+        public static Vector128<uint>   Multiply(Vector128<uint>   left, Vector128<uint>   right) => Multiply(left, right);
+        public static Vector128<int>    Multiply(Vector128<int>    left, Vector128<int>    right) => Multiply(left, right);
+        public static Vector128<float>  Multiply(Vector128<float>  left, Vector128<float>  right) => Multiply(left, right);
+        public static Vector128<double> Multiply(Vector128<double> left, Vector128<double> right) => Multiply(left, right);
+
+        /// <summary>
+        /// Vector negate
+        /// Corresponds to vector forms of ARM64 NEG & FNEG
+        /// </summary>
+        public static Vector64<sbyte>   Negate(Vector64<sbyte>   value) => Negate(value);
+        public static Vector64<short>   Negate(Vector64<short>   value) => Negate(value);
+        public static Vector64<int>     Negate(Vector64<int>     value) => Negate(value);
+        public static Vector64<float>   Negate(Vector64<float>   value) => Negate(value);
+        public static Vector128<sbyte>  Negate(Vector128<sbyte>  value) => Negate(value);
+        public static Vector128<short>  Negate(Vector128<short>  value) => Negate(value);
+        public static Vector128<int>    Negate(Vector128<int>    value) => Negate(value);
+        public static Vector128<long>   Negate(Vector128<long>   value) => Negate(value);
+        public static Vector128<float>  Negate(Vector128<float>  value) => Negate(value);
+        public static Vector128<double> Negate(Vector128<double> value) => Negate(value);
+
+        /// <summary>
+        /// Vector not
+        /// Corresponds to vector forms of ARM64 NOT
+        /// </summary>
+        public static Vector64<T>  Not<T>(Vector64<T>  value) where T : struct => Not(value);
+        public static Vector128<T> Not<T>(Vector128<T> value) where T : struct => Not(value);
+
+        /// <summary>
+        /// Vector or
+        /// Corresponds to vector forms of ARM64 ORR
+        /// </summary>
+        public static Vector64<T>  Or<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => Or(left, right);
+        public static Vector128<T> Or<T>(Vector128<T> left, Vector128<T> right) where T : struct => Or(left, right);
+
+        /// <summary>
+        /// Vector or not
+        /// Corresponds to vector forms of ARM64 ORN
+        /// </summary>
+        public static Vector64<T>  OrNot<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => OrNot(left, right);
+        public static Vector128<T> OrNot<T>(Vector128<T> left, Vector128<T> right) where T : struct => OrNot(left, right);
+
+        /// <summary>
+        /// Vector PopCount
+        /// Corresponds to vector forms of ARM64 CNT
+        /// </summary>
+        public static Vector64<byte>    PopCount(Vector64<byte>    value) => PopCount(value);
+        public static Vector64<sbyte>   PopCount(Vector64<sbyte>   value) => PopCount(value);
+        public static Vector128<byte>   PopCount(Vector128<byte>   value) => PopCount(value);
+        public static Vector128<sbyte>  PopCount(Vector128<sbyte>  value) => PopCount(value);
+
+        /// <summary>
+        /// SetVector* Fill vector elements by replicating element value
+        ///
+        /// Corresponds to vector forms of ARM64 DUP (general), DUP (element 0), FMOV (vector, immediate)
+        /// </summary>
+        public static Vector64<T>    SetAllVector64<T>(T value)  where T : struct => SetAllVector64(value);
+        public static Vector128<T>   SetAllVector128<T>(T value) where T : struct => SetAllVector128(value);
+
+        /// <summary>
+        /// Vector square root
+        /// Corresponds to vector forms of ARM64 FRSQRT
+        /// </summary>
+        public static Vector64<float>   Sqrt(Vector64<float>   value) => Sqrt(value);
+        public static Vector128<float>  Sqrt(Vector128<float>  value) => Sqrt(value);
+        public static Vector128<double> Sqrt(Vector128<double> value) => Sqrt(value);
+
+        /// <summary>
+        /// Vector subtract
+        /// Corresponds to vector forms of ARM64 SUB & FSUB
+        /// </summary>
+        public static Vector64<T>  Subtract<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => Subtract(left, right);
+        public static Vector128<T> Subtract<T>(Vector128<T> left, Vector128<T> right) where T : struct => Subtract(left, right);
+
+
+        /// <summary>
+        /// Vector exclusive or
+        /// Corresponds to vector forms of ARM64 EOR
+        /// </summary>
+        public static Vector64<T>  Xor<T>(Vector64<T>  left, Vector64<T>  right) where T : struct => Xor(left, right);
+        public static Vector128<T> Xor<T>(Vector128<T> left, Vector128<T> right) where T : struct => Xor(left, right);
+    }
+}