summaryrefslogtreecommitdiff
path: root/src/CMakeLists.txt
blob: dc987991113fedbcc94524dda1d559d819adaf6d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686

##############################################

configure_file(platform.h.in ${CMAKE_CURRENT_BINARY_DIR}/platform.h)

# Add source file to list, and add to special visual folder
function(ncnn_src_group ncnn_src_string folder)
    string(REPLACE " " ";" _ncnn_src_list ${ncnn_src_string})

    string(REGEX REPLACE "/" "\\\\" _target_folder "${folder}")

    foreach(_file IN LISTS ${_ncnn_src_list})
        source_group ("${_target_folder}" FILES "${_file}")
    endforeach ()
endfunction()

set(ncnn_SRCS
    allocator.cpp
    benchmark.cpp
    blob.cpp
    c_api.cpp
    command.cpp
    cpu.cpp
    datareader.cpp
    gpu.cpp
    layer.cpp
    mat.cpp
    mat_pixel.cpp
    mat_pixel_affine.cpp
    mat_pixel_drawing.cpp
    mat_pixel_resize.cpp
    mat_pixel_rotate.cpp
    modelbin.cpp
    net.cpp
    option.cpp
    paramdict.cpp
    pipeline.cpp
    pipelinecache.cpp
    simpleocv.cpp
    simpleomp.cpp
    simplestl.cpp
    simplemath.cpp
    simplevk.cpp
)

if(ANDROID)
    list(APPEND ncnn_SRCS mat_pixel_android.cpp)
endif()

ncnn_src_group(ncnn_SRCS "sources")

include_directories("${CMAKE_CURRENT_SOURCE_DIR}/layer/${NCNN_TARGET_ARCH}")

include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_generate_shader_spv_header.cmake)

# ncnn macro
include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_add_shader.cmake)
include(${CMAKE_CURRENT_SOURCE_DIR}/../cmake/ncnn_add_layer.cmake)

# look for vulkan compute shader and compile
set(NCNN_SHADER_SPV_HEX_FILES)

set(__LAYER_TYPE_ENUM_INDEX 0)
set(__LAYER_SHADER_TYPE_ENUM_INDEX 0)

# layer implementation
ncnn_add_layer(AbsVal)
ncnn_add_layer(ArgMax OFF)
ncnn_add_layer(BatchNorm)
ncnn_add_layer(Bias)
ncnn_add_layer(BNLL)
ncnn_add_layer(Concat)
ncnn_add_layer(Convolution)
ncnn_add_layer(Crop)
ncnn_add_layer(Deconvolution)
ncnn_add_layer(Dropout)
ncnn_add_layer(Eltwise)
ncnn_add_layer(ELU)
ncnn_add_layer(Embed)
ncnn_add_layer(Exp)
ncnn_add_layer(Flatten)
ncnn_add_layer(InnerProduct)
ncnn_add_layer(Input)
ncnn_add_layer(Log)
ncnn_add_layer(LRN)
ncnn_add_layer(MemoryData)
ncnn_add_layer(MVN)
ncnn_add_layer(Pooling)
ncnn_add_layer(Power)
ncnn_add_layer(PReLU)
ncnn_add_layer(Proposal)
ncnn_add_layer(Reduction)
ncnn_add_layer(ReLU)
ncnn_add_layer(Reshape)
ncnn_add_layer(ROIPooling)
ncnn_add_layer(Scale)
ncnn_add_layer(Sigmoid)
ncnn_add_layer(Slice)
ncnn_add_layer(Softmax)
ncnn_add_layer(Split)
ncnn_add_layer(SPP OFF)
ncnn_add_layer(TanH)
ncnn_add_layer(Threshold)
ncnn_add_layer(Tile)
ncnn_add_layer(RNN)
ncnn_add_layer(LSTM)
ncnn_add_layer(BinaryOp)
ncnn_add_layer(UnaryOp)
ncnn_add_layer(ConvolutionDepthWise)
ncnn_add_layer(Padding)
ncnn_add_layer(Squeeze)
ncnn_add_layer(ExpandDims)
ncnn_add_layer(Normalize)
ncnn_add_layer(Permute)
ncnn_add_layer(PriorBox)
ncnn_add_layer(DetectionOutput)
ncnn_add_layer(Interp)
ncnn_add_layer(DeconvolutionDepthWise)
ncnn_add_layer(ShuffleChannel)
ncnn_add_layer(InstanceNorm)
ncnn_add_layer(Clip)
ncnn_add_layer(Reorg)
ncnn_add_layer(YoloDetectionOutput)
ncnn_add_layer(Quantize)
ncnn_add_layer(Dequantize)
ncnn_add_layer(Yolov3DetectionOutput)
ncnn_add_layer(PSROIPooling)
ncnn_add_layer(ROIAlign)
ncnn_add_layer(Packing)
ncnn_add_layer(Requantize)
ncnn_add_layer(Cast)
ncnn_add_layer(HardSigmoid)
ncnn_add_layer(SELU)
ncnn_add_layer(HardSwish)
ncnn_add_layer(Noop)
ncnn_add_layer(PixelShuffle)
ncnn_add_layer(DeepCopy)
ncnn_add_layer(Mish)
ncnn_add_layer(StatisticsPooling)
ncnn_add_layer(Swish)
ncnn_add_layer(Gemm)
ncnn_add_layer(GroupNorm)
ncnn_add_layer(LayerNorm)
ncnn_add_layer(Softplus)
ncnn_add_layer(GRU)
ncnn_add_layer(MultiHeadAttention)
ncnn_add_layer(GELU)
ncnn_add_layer(Convolution1D)
ncnn_add_layer(Pooling1D)
ncnn_add_layer(ConvolutionDepthWise1D)
ncnn_add_layer(Convolution3D)
ncnn_add_layer(ConvolutionDepthWise3D)
ncnn_add_layer(Pooling3D)
ncnn_add_layer(MatMul)
ncnn_add_layer(Deconvolution1D)
ncnn_add_layer(DeconvolutionDepthWise1D)
ncnn_add_layer(Deconvolution3D)
ncnn_add_layer(DeconvolutionDepthWise3D)
ncnn_add_layer(Einsum)
ncnn_add_layer(DeformableConv2D)
ncnn_add_layer(GLU)
ncnn_add_layer(Fold)
ncnn_add_layer(Unfold)
ncnn_add_layer(GridSample)
ncnn_add_layer(CumulativeSum)
ncnn_add_layer(CopyTo)
ncnn_add_layer(Erf)
ncnn_add_layer(Diag)
ncnn_add_layer(CELU)
ncnn_add_layer(Shrink)

if(NCNN_VULKAN)
    ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/convert_ycbcr.comp)
    ncnn_add_shader(${CMAKE_CURRENT_SOURCE_DIR}/layer/vulkan/shader/vulkan_activation.comp)
endif()

add_custom_target(ncnn-generate-spirv DEPENDS ${NCNN_SHADER_SPV_HEX_FILES})

# create new
configure_file(layer_declaration.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_declaration.h)
configure_file(layer_registry.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_registry.h)
configure_file(layer_type_enum.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_type_enum.h)
configure_file(layer_shader_registry.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_registry.h)
configure_file(layer_shader_spv_data.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_spv_data.h)
configure_file(layer_shader_type_enum.h.in ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_type_enum.h)

if(NCNN_SHARED_LIB)
    add_library(ncnn SHARED ${ncnn_SRCS})
else()
    add_library(ncnn STATIC ${ncnn_SRCS})
endif()
set_target_properties(ncnn PROPERTIES DEBUG_POSTFIX "d")
if(APPLE OR IOS)
    # macos / ios only accepts a.b.c.d.e where a=24bit b/c/d/e=10bit
    # 20201228 to 20.12.28
    string(SUBSTRING ${NCNN_VERSION} 2 2 NCNN_VERSION_YEAR)
    string(SUBSTRING ${NCNN_VERSION} 4 2 NCNN_VERSION_MONTH)
    string(SUBSTRING ${NCNN_VERSION} 6 2 NCNN_VERSION_DAY)
    set(NCNN_VERSION_APPLE_STRING ${NCNN_VERSION_MAJOR}.${NCNN_VERSION_MINOR}.${NCNN_VERSION_YEAR}.${NCNN_VERSION_MONTH}.${NCNN_VERSION_DAY})
    set_target_properties(ncnn PROPERTIES VERSION ${NCNN_VERSION_APPLE_STRING} SOVERSION ${NCNN_VERSION_MAJOR})
else()
    set_target_properties(ncnn PROPERTIES VERSION ${NCNN_VERSION_STRING} SOVERSION ${NCNN_VERSION_MAJOR})
endif()

include(GenerateExportHeader)
generate_export_header(ncnn)

if(NOT NCNN_SHARED_LIB)
    set_target_properties(ncnn PROPERTIES COMPILE_FLAGS -DNCNN_STATIC_DEFINE)
endif()

if(NCNN_SIMPLESTL AND NOT NCNN_SIMPLEMATH)
    # link math lib explicitly
    target_link_libraries(ncnn PUBLIC m)
endif()

target_include_directories(ncnn
    PUBLIC
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
        $<INSTALL_INTERFACE:include/ncnn>
        $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
    PRIVATE
        $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/layer>)

if(NCNN_OPENMP)
    if(NOT NCNN_SIMPLEOMP)
        find_package(OpenMP)
        if(NOT TARGET OpenMP::OpenMP_CXX AND (OpenMP_CXX_FOUND OR OPENMP_FOUND))
            target_compile_options(ncnn PRIVATE ${OpenMP_CXX_FLAGS})
        endif()
    endif()

    if(NCNN_SIMPLEOMP OR OpenMP_CXX_FOUND OR OPENMP_FOUND)
        if(NCNN_CMAKE_VERBOSE)
            message("Building with OpenMP")
        endif()

        if(NCNN_SIMPLEOMP)
            if(IOS OR APPLE)
                target_compile_options(ncnn PRIVATE -Xpreprocessor -fopenmp)
            else()
                target_compile_options(ncnn PRIVATE -fopenmp)
            endif()
        elseif(ANDROID_NDK_MAJOR AND (ANDROID_NDK_MAJOR GREATER 20))
            target_compile_options(ncnn PRIVATE -fopenmp)
            target_link_libraries(ncnn PUBLIC -fopenmp -static-openmp)
            # see cpu.cpp __wrap___kmp_abort_process comment for the linker magic
            target_link_libraries(ncnn PUBLIC -Wl,-wrap,__kmp_affinity_determine_capable)
        elseif(OpenMP_CXX_FOUND)
            target_link_libraries(ncnn PUBLIC OpenMP::OpenMP_CXX)
        else()
            target_link_libraries(ncnn PRIVATE "${OpenMP_CXX_FLAGS}")
        endif()
    endif()
endif()

if(NCNN_THREADS)
    set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
    set(THREADS_PREFER_PTHREAD_FLAG TRUE)
    find_package(Threads REQUIRED)

    if(TARGET Threads::Threads)
        target_link_libraries(ncnn PUBLIC Threads::Threads)
    endif()
    if(NCNN_SIMPLEOMP OR NCNN_SIMPLESTL)
        target_link_libraries(ncnn PUBLIC pthread)
    endif()
endif()

if(NCNN_VULKAN)
    if(NCNN_SIMPLEVK)
        if(APPLE)
            # simplevk always use static vulkan linkage on apple platform
            if(NOT DEFINED Vulkan_LIBRARY)
                message(WARNING "Vulkan_LIBRARY shall be defined for simplevk static linkage on APPLE platforms")
            endif()
            target_link_libraries(ncnn PUBLIC ${Vulkan_LIBRARY})
        else()
            target_link_libraries(ncnn PRIVATE ${CMAKE_DL_LIBS})
        endif()
    else()
        find_package(Vulkan QUIET)
        if(NOT Vulkan_FOUND)
            if(DEFINED ENV{VULKAN_SDK})
                if(CMAKE_SYSTEM_NAME MATCHES "Linux")
                    list(APPEND CMAKE_MODULE_PATH "$ENV{VULKAN_SDK}/../source/VulkanTools/cmake")
                elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
                    list(APPEND CMAKE_MODULE_PATH "$ENV{VULKAN_SDK}/Samples/cmake")
                elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
                    message(WARNING "Failed to find vulkan since cmake is too old\n"
                        "cmake >= 3.7 required. Consider `brew upgrade cmake`")
                endif()
            else()
                message(FATAL_ERROR "Error: CMake didn't find Vulkan. Please set VULKAN_SDK env var, e.g.:\n"
                    "Linux: export VULKAN_SDK=~/soft/vulkansdk/1.2.148.0/x86_64\n"
                    "Windows: set VULKAN_SDK=E:/lib/VulkanSDK/1.2.148.0\n"
                    "MacOS: export VULKAN_SDK=~/soft/vulkansdk/1.2.148.0/macOS\n"
                )
            endif()
            find_package(Vulkan REQUIRED)
        endif()

        target_link_libraries(ncnn PUBLIC Vulkan::Vulkan)
    endif()

    # Support mac platform static library compilation
    if(NOT NCNN_SHARED_LIB AND APPLE AND NOT CMAKE_SYSTEM_NAME STREQUAL "iOS")
        find_library(CoreFoundation NAMES CoreFoundation)
        find_library(Foundation NAMES Foundation)
        find_library(QuartzCore NAMES QuartzCore)
        find_library(CoreGraphics NAMES CoreGraphics)
        find_library(Cocoa NAMES Cocoa)
        find_library(Metal NAMES Metal)
        find_library(IOKit NAMES IOKit)
        find_library(IOSurface NAMES IOSurface)
        list(APPEND vulkan_dependent_LINK_LIBRARIES
            ${Metal}
            ${IOKit}
            ${IOSurface}
            ${QuartzCore}
            ${CoreGraphics}
            ${Cocoa}
            ${Foundation}
            ${CoreFoundation}
        )
        target_link_libraries(ncnn PRIVATE ${vulkan_dependent_LINK_LIBRARIES})
    endif()

    # link in-house glslang
    target_include_directories(ncnn PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../>)
    target_link_libraries(ncnn PRIVATE glslang SPIRV)
endif()

if(NCNN_PLATFORM_API AND ANDROID_NDK)
    target_link_libraries(ncnn PUBLIC android jnigraphics log)
endif()

if(WIN32)
    target_compile_definitions(ncnn PUBLIC NOMINMAX)
endif()

if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
    target_compile_definitions(ncnn PRIVATE _SCL_SECURE_NO_WARNINGS _CRT_SECURE_NO_DEPRECATE)

    if(CMAKE_BUILD_TYPE MATCHES "(Release|RELEASE|release)")
        target_compile_options(ncnn PRIVATE /fp:fast)
    endif()

    if(NCNN_SHARED_LIB)
        # msvc argues about stl string and vector uses in exported functions
        target_compile_options(ncnn PRIVATE /wd4251)
    endif()
else()
    target_compile_options(ncnn PRIVATE -Wall -Wextra -Wno-unused-function)

    if(NOT NCNN_DISABLE_PIC)
        set_target_properties(ncnn PROPERTIES POSITION_INDEPENDENT_CODE ON INTERFACE_POSITION_INDEPENDENT_CODE ON)
    endif()

    if(CMAKE_BUILD_TYPE MATCHES "(Release|RELEASE|release)")
        if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT (CMAKE_CXX_COMPILER_ID MATCHES "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.6))
            target_compile_options(ncnn PRIVATE -Ofast)
        endif()

        target_compile_options(ncnn PRIVATE -ffast-math)
    endif()

    # target_compile_options(ncnn PRIVATE -march=native)
    target_compile_options(ncnn PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
    if(NCNN_SHARED_LIB AND NCNN_ENABLE_LTO)
        set_target_properties(ncnn PROPERTIES INTERPROCEDURAL_OPTIMIZATION ON)
    endif()
endif()

if(NCNN_DISABLE_RTTI)
    if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
        target_compile_options(ncnn PUBLIC /GR-)
    else()
        target_compile_options(ncnn PUBLIC -fno-rtti)
    endif()
endif()

if(NCNN_DISABLE_EXCEPTION)
    if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
        target_compile_options(ncnn PUBLIC /EHsc /D_HAS_EXCEPTIONS=0)
    else()
        target_compile_options(ncnn PUBLIC -fno-exceptions)
    endif()
endif()

if(NCNN_TARGET_ARCH STREQUAL "x86")
    if(NCNN_SSE2)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            if(CMAKE_SIZEOF_VOID_P EQUAL 4)
                target_compile_options(ncnn PRIVATE /arch:SSE2)
            endif()
            target_compile_options(ncnn PRIVATE /D__SSE2__)
        else()
            target_compile_options(ncnn PRIVATE -msse2 -msse)
            if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
                target_compile_options(ncnn PRIVATE -msimd128)
            endif()
        endif()
    endif()

    if(NOT NCNN_RUNTIME_CPU AND NCNN_AVX512)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            target_compile_options(ncnn PRIVATE /arch:AVX512 /D__SSE4_1__ /D__FMA__ /D__F16C__)
            if(NCNN_AVX512VNNI)
                target_compile_options(ncnn PRIVATE /D__AVX512VNNI__)
            endif()
        else()
            target_compile_options(ncnn PRIVATE -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl -mfma -mf16c)
            if(NCNN_AVX512VNNI)
                target_compile_options(ncnn PRIVATE -mavx512vnni)
            endif()
            if(NCNN_AVX512BF16)
                target_compile_options(ncnn PRIVATE -mavx512bf16)
            endif()
            if(NCNN_AVX512FP16)
                target_compile_options(ncnn PRIVATE -mavx512fp16)
            endif()
        endif()
    elseif(NOT NCNN_RUNTIME_CPU AND NCNN_FMA)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            if(NCNN_AVX2)
                target_compile_options(ncnn PRIVATE /arch:AVX2 /D__SSE4_1__ /D__FMA__)
            else()
                target_compile_options(ncnn PRIVATE /arch:AVX /D__SSE4_1__ /D__FMA__)
            endif()
            if(NCNN_AVXVNNI)
                target_compile_options(ncnn PRIVATE /D__AVXVNNI__)
            elseif(NCNN_XOP)
                target_compile_options(ncnn PRIVATE /D__XOP__)
            endif()
            if(NCNN_F16C)
                target_compile_options(ncnn PRIVATE /D__F16C__)
            endif()
        else()
            if(NCNN_AVX2)
                target_compile_options(ncnn PRIVATE -mavx2 -mfma)
            else()
                target_compile_options(ncnn PRIVATE -mavx -mfma)
            endif()
            if(NCNN_AVXVNNI)
                target_compile_options(ncnn PRIVATE -mavxvnni)
            elseif(NCNN_XOP)
                target_compile_options(ncnn PRIVATE -mxop)
            endif()
            if(NCNN_F16C)
                target_compile_options(ncnn PRIVATE -mf16c)
            endif()
        endif()
    elseif(NOT NCNN_RUNTIME_CPU AND NCNN_AVX)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            target_compile_options(ncnn PRIVATE /arch:AVX /D__SSE4_1__)
            if(NCNN_XOP)
                target_compile_options(ncnn PRIVATE /D__XOP__)
            endif()
            if(NCNN_F16C)
                target_compile_options(ncnn PRIVATE /D__F16C__)
            endif()
        else()
            target_compile_options(ncnn PRIVATE -mavx)
            if(NCNN_XOP)
                target_compile_options(ncnn PRIVATE -mxop)
            endif()
            if(NCNN_F16C)
                target_compile_options(ncnn PRIVATE -mf16c)
            endif()
        endif()
    endif()
endif()

if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 4)
    if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
        # always enable neon for msvc arm
        target_compile_options(ncnn PRIVATE /D__arm__ /D__ARM_NEON)
    endif()

    if(NOT NCNN_RUNTIME_CPU AND NCNN_VFPV4)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            target_compile_options(ncnn PRIVATE /arch:VFPv4 /D__ARM_FP=0x0E)
        else()
            if(NCNN_COMPILER_SUPPORT_ARM_VFPV4)
                target_compile_options(ncnn PRIVATE -mfpu=neon-vfpv4)
            elseif(NCNN_COMPILER_SUPPORT_ARM_VFPV4_FP16)
                target_compile_options(ncnn PRIVATE -mfpu=neon-vfpv4 -mfp16-format=ieee)
            endif()
        endif()
    elseif(NOT NCNN_RUNTIME_CPU)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            target_compile_options(ncnn PRIVATE /D__ARM_FP=0x0C)
        endif()
    endif()
endif()

if(NCNN_TARGET_ARCH STREQUAL "arm" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
    if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
        # always enable neon and vfpv4 for msvc arm64
        target_compile_options(ncnn PRIVATE /D__arm__ /D__aarch64__ /D__ARM_NEON /D__ARM_FP=0x0E)
    endif()

    if(NOT NCNN_RUNTIME_CPU AND NCNN_ARM86SVE)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            # TODO add support for sve family
            target_compile_options(ncnn PRIVATE /arch:armv8.6 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC /D__ARM_FEATURE_MATMUL_INT8)
            if(NCNN_ARM86SVE2)
            endif()
            if(NCNN_ARM86SVEBF16)
            endif()
            if(NCNN_ARM86SVEI8MM)
            endif()
            if(NCNN_ARM86SVEF32MM)
            endif()
        else()
            set(ARM_MARCH_FLAG "-march=armv8.6-a+fp16+dotprod+sve")
            if(NCNN_ARM86SVE2)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+sve2")
            endif()
            if(NCNN_ARM86SVEBF16)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16")
            endif()
            if(NCNN_ARM86SVEI8MM)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+i8mm")
            endif()
            if(NCNN_ARM86SVEF32MM)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+f32mm")
            endif()
        endif()
    elseif(NOT NCNN_RUNTIME_CPU AND (NCNN_ARM84BF16 OR NCNN_ARM84I8MM))
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            target_compile_options(ncnn PRIVATE /arch:armv8.4 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC /D__ARM_FEATURE_DOTPROD /D__ARM_FEATURE_FP16_FML)
            if(NCNN_ARM84BF16)
                target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)
            endif()
            if(NCNN_ARM84I8MM)
                target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_MATMUL_INT8)
            endif()
        else()
            set(ARM_MARCH_FLAG "-march=armv8.4-a+fp16+dotprod")
            if(NCNN_ARM84BF16)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+bf16")
            endif()
            if(NCNN_ARM84I8MM)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+i8mm")
            endif()
        endif()
    elseif(NOT NCNN_RUNTIME_CPU AND NCNN_ARM82)
        if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC" OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC" AND CMAKE_CXX_COMPILER_FRONTEND_VARIANT MATCHES "MSVC"))
            target_compile_options(ncnn PRIVATE /arch:armv8.2 /D__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
            if(NCNN_ARM82DOT)
                target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_DOTPROD)
            endif()
            if(NCNN_ARM82FP16FML)
                target_compile_options(ncnn PRIVATE /D__ARM_FEATURE_FP16_FML)
            endif()
        else()
            set(ARM_MARCH_FLAG "-march=armv8.2-a+fp16")
            if(NCNN_ARM82DOT)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+dotprod")
            endif()
            if(NCNN_ARM82FP16FML)
                set(ARM_MARCH_FLAG "${ARM_MARCH_FLAG}+fp16fml")
            endif()
        endif()
    endif()
    target_compile_options(ncnn PRIVATE ${ARM_MARCH_FLAG})

    if(ANDROID_NDK_MAJOR AND (ANDROID_NDK_MAJOR GREATER_EQUAL 23))
        # llvm 12 in ndk-23 enables out-of-line atomics by default
        # disable this feature for fixing linking atomic builtins issue with old ndk
        target_compile_options(ncnn PRIVATE -mno-outline-atomics)
    endif()

    find_library(MVEC_LIB "mvec")
    if (MVEC_LIB)
        target_link_libraries(ncnn PRIVATE mvec)
    endif()
endif()

if(NCNN_TARGET_ARCH STREQUAL "mips")
    if(NOT NCNN_RUNTIME_CPU AND NCNN_MSA)
        target_compile_options(ncnn PRIVATE -mmsa)
    endif()
    if(NOT NCNN_RUNTIME_CPU AND NCNN_MMI)
        target_compile_options(ncnn PRIVATE -mloongson-mmi)
    endif()
endif()

if(NCNN_TARGET_ARCH STREQUAL "loongarch")
    if(NOT NCNN_RUNTIME_CPU AND NCNN_LSX)
        target_compile_options(ncnn PRIVATE -mlsx)
        if(NCNN_LASX)
            target_compile_options(ncnn PRIVATE -mlasx)
        endif()
    endif()
endif()

if(NCNN_TARGET_ARCH STREQUAL "riscv" AND NOT C906)
    if(NOT NCNN_RUNTIME_CPU AND NCNN_RVV)
        if(NCNN_COMPILER_SUPPORT_RVV_ZFH)
            target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh)
        elseif(NCNN_COMPILER_SUPPORT_RVV_ZVFH)
            target_compile_options(ncnn PRIVATE -march=rv64gcv_zfh_zvfh0p1 -menable-experimental-extensions -D__fp16=_Float16)
        elseif(NCNN_COMPILER_SUPPORT_RVV)
            target_compile_options(ncnn PRIVATE -march=rv64gcv)
        endif()
    endif()
endif()

if(NCNN_PPC64LE_VSX)
    # Auto-translate SSE2 to VSX if compiler is new enough.
    if(NCNN_VSX_SSE2)
        target_compile_options(ncnn PRIVATE -DNO_WARN_X86_INTRINSICS -D__SSE2__)
    endif()

    # Auto-translate SSE4.1 to VSX if compiler is new enough.
    if(NCNN_VSX_SSE41)
        target_compile_options(ncnn PRIVATE -DNO_WARN_X86_INTRINSICS -D__SSE4_1__)
    endif()
endif()

if(NCNN_COVERAGE)
    target_compile_options(ncnn PUBLIC -coverage -fprofile-arcs -ftest-coverage)
    target_link_libraries(ncnn PUBLIC -coverage -lgcov)
endif()

if(NCNN_ASAN)
    target_compile_options(ncnn PUBLIC -fsanitize=address)
    target_link_libraries(ncnn PUBLIC -fsanitize=address)
endif()

add_dependencies(ncnn ncnn-generate-spirv)

if(NCNN_INSTALL_SDK)
    include(GNUInstallDirs)

    install(TARGETS ncnn EXPORT ncnn
        ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
        LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
        RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
    )
    install(FILES
        allocator.h
        benchmark.h
        blob.h
        c_api.h
        command.h
        cpu.h
        datareader.h
        gpu.h
        layer.h
        layer_shader_type.h
        layer_type.h
        mat.h
        modelbin.h
        net.h
        option.h
        paramdict.h
        pipeline.h
        pipelinecache.h
        simpleocv.h
        simpleomp.h
        simplestl.h
        simplemath.h
        simplevk.h
        vulkan_header_fix.h
        ${CMAKE_CURRENT_BINARY_DIR}/ncnn_export.h
        ${CMAKE_CURRENT_BINARY_DIR}/layer_shader_type_enum.h
        ${CMAKE_CURRENT_BINARY_DIR}/layer_type_enum.h
        ${CMAKE_CURRENT_BINARY_DIR}/platform.h
        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ncnn
    )
    install(EXPORT ncnn DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ncnn)
    configure_file(${CMAKE_CURRENT_LIST_DIR}/../cmake/ncnnConfig.cmake.in ncnnConfig.cmake @ONLY)
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ncnnConfig.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ncnn)
    # pkgconfig
    configure_file(ncnn.pc.in ${CMAKE_CURRENT_BINARY_DIR}/ncnn.pc @ONLY)
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ncnn.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
endif()

# add ncnn and generate-spirv to a virtual project group
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
set_property(TARGET ncnn PROPERTY FOLDER "libncnn")
set_property(TARGET ncnn-generate-spirv PROPERTY FOLDER "libncnn")