summaryrefslogtreecommitdiff
path: root/src/cpu.h
blob: 7d6bfce1108a1dc5a27b22b1cdbfb1f4df5f4808 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

#ifndef NCNN_CPU_H
#define NCNN_CPU_H

#include <stddef.h>

#if (defined _WIN32 && !(defined __MINGW32__))
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#if defined __ANDROID__ || defined __linux__
#include <sched.h> // cpu_set_t
#endif

#include "platform.h"

namespace ncnn {

class NCNN_EXPORT CpuSet
{
public:
    CpuSet();
    void enable(int cpu);
    void disable(int cpu);
    void disable_all();
    bool is_enabled(int cpu) const;
    int num_enabled() const;

public:
#if (defined _WIN32 && !(defined __MINGW32__))
    ULONG_PTR mask;
#endif
#if defined __ANDROID__ || defined __linux__
    cpu_set_t cpu_set;
#endif
#if __APPLE__
    unsigned int policy;
#endif
};

// test optional cpu features
// edsp = armv7 edsp
NCNN_EXPORT int cpu_support_arm_edsp();
// neon = armv7 neon or aarch64 asimd
NCNN_EXPORT int cpu_support_arm_neon();
// vfpv4 = armv7 fp16 + fma
NCNN_EXPORT int cpu_support_arm_vfpv4();
// asimdhp = aarch64 asimd half precision
NCNN_EXPORT int cpu_support_arm_asimdhp();
// cpuid = aarch64 cpuid info
NCNN_EXPORT int cpu_support_arm_cpuid();
// asimddp = aarch64 asimd dot product
NCNN_EXPORT int cpu_support_arm_asimddp();
// asimdfhm = aarch64 asimd fhm
NCNN_EXPORT int cpu_support_arm_asimdfhm();
// bf16 = aarch64 bf16
NCNN_EXPORT int cpu_support_arm_bf16();
// i8mm = aarch64 i8mm
NCNN_EXPORT int cpu_support_arm_i8mm();
// sve = aarch64 sve
NCNN_EXPORT int cpu_support_arm_sve();
// sve2 = aarch64 sve2
NCNN_EXPORT int cpu_support_arm_sve2();
// svebf16 = aarch64 svebf16
NCNN_EXPORT int cpu_support_arm_svebf16();
// svei8mm = aarch64 svei8mm
NCNN_EXPORT int cpu_support_arm_svei8mm();
// svef32mm = aarch64 svef32mm
NCNN_EXPORT int cpu_support_arm_svef32mm();

// avx = x86 avx
NCNN_EXPORT int cpu_support_x86_avx();
// fma = x86 fma
NCNN_EXPORT int cpu_support_x86_fma();
// xop = x86 xop
NCNN_EXPORT int cpu_support_x86_xop();
// f16c = x86 f16c
NCNN_EXPORT int cpu_support_x86_f16c();
// avx2 = x86 avx2 + fma + f16c
NCNN_EXPORT int cpu_support_x86_avx2();
// avx_vnni = x86 avx vnni
NCNN_EXPORT int cpu_support_x86_avx_vnni();
// avx512 = x86 avx512f + avx512cd + avx512bw + avx512dq + avx512vl
NCNN_EXPORT int cpu_support_x86_avx512();
// avx512_vnni = x86 avx512 vnni
NCNN_EXPORT int cpu_support_x86_avx512_vnni();
// avx512_bf16 = x86 avx512 bf16
NCNN_EXPORT int cpu_support_x86_avx512_bf16();
// avx512_fp16 = x86 avx512 fp16
NCNN_EXPORT int cpu_support_x86_avx512_fp16();

// lsx = loongarch lsx
NCNN_EXPORT int cpu_support_loongarch_lsx();
// lasx = loongarch lasx
NCNN_EXPORT int cpu_support_loongarch_lasx();

// msa = mips mas
NCNN_EXPORT int cpu_support_mips_msa();
// mmi = loongson mmi
NCNN_EXPORT int cpu_support_loongson_mmi();

// v = riscv vector
NCNN_EXPORT int cpu_support_riscv_v();
// zfh = riscv half-precision float
NCNN_EXPORT int cpu_support_riscv_zfh();
// vlenb = riscv vector length in bytes
NCNN_EXPORT int cpu_riscv_vlenb();

// cpu info
NCNN_EXPORT int get_cpu_count();
NCNN_EXPORT int get_little_cpu_count();
NCNN_EXPORT int get_big_cpu_count();

NCNN_EXPORT int get_physical_cpu_count();
NCNN_EXPORT int get_physical_little_cpu_count();
NCNN_EXPORT int get_physical_big_cpu_count();

// cpu l2 varies from 64k to 1M, but l3 can be zero
NCNN_EXPORT int get_cpu_level2_cache_size();
NCNN_EXPORT int get_cpu_level3_cache_size();

// bind all threads on little clusters if powersave enabled
// affects HMP arch cpu like ARM big.LITTLE
// only implemented on android at the moment
// switching powersave is expensive and not thread-safe
// 0 = all cores enabled(default)
// 1 = only little clusters enabled
// 2 = only big clusters enabled
// return 0 if success for setter function
NCNN_EXPORT int get_cpu_powersave();
NCNN_EXPORT int set_cpu_powersave(int powersave);

// convenient wrapper
NCNN_EXPORT const CpuSet& get_cpu_thread_affinity_mask(int powersave);

// set explicit thread affinity
NCNN_EXPORT int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask);

// runtime thread affinity info
NCNN_EXPORT int is_current_thread_running_on_a53_a55();

// misc function wrapper for openmp routines
NCNN_EXPORT int get_omp_num_threads();
NCNN_EXPORT void set_omp_num_threads(int num_threads);

NCNN_EXPORT int get_omp_dynamic();
NCNN_EXPORT void set_omp_dynamic(int dynamic);

NCNN_EXPORT int get_omp_thread_num();

NCNN_EXPORT int get_kmp_blocktime();
NCNN_EXPORT void set_kmp_blocktime(int time_ms);

// need to flush denormals on Intel Chipset.
// Other architectures such as ARM can be added as needed.
// 0 = DAZ OFF, FTZ OFF
// 1 = DAZ ON , FTZ OFF
// 2 = DAZ OFF, FTZ ON
// 3 = DAZ ON,  FTZ ON
NCNN_EXPORT int get_flush_denormals();
NCNN_EXPORT int set_flush_denormals(int flush_denormals);

} // namespace ncnn

#endif // NCNN_CPU_H