diff --git a/.github/workflows/android-arm-cpu.yml b/.github/workflows/android-arm-cpu.yml index d7515d45a..15cf7a16d 100644 --- a/.github/workflows/android-arm-cpu.yml +++ b/.github/workflows/android-arm-cpu.yml @@ -13,19 +13,19 @@ jobs: outputs: CONDITION: ${{ steps.preflight.outputs.CONDITION }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Preflight id: preflight run: | - echo ::set-output name=CONDITION::0 - ./scripts/.ci/preflight.sh android || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + echo "CONDITION=0" >> $GITHUB_OUTPUT + ./scripts/.ci/preflight.sh android || ret=$? && echo $ret && echo "CONDITION=$ret" >> $GITHUB_OUTPUT android: needs: [setup] if: ${{ needs.setup.outputs.CONDITION != '11' }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: configure run: sudo apt-get install attr - name: build diff --git a/.github/workflows/ios-cpu.yml b/.github/workflows/ios-cpu.yml index 3aeb4af4c..da1beae88 100644 --- a/.github/workflows/ios-cpu.yml +++ b/.github/workflows/ios-cpu.yml @@ -13,18 +13,18 @@ jobs: outputs: CONDITION: ${{ steps.preflight.outputs.CONDITION }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Preflight id: preflight run: | - echo ::set-output name=CONDITION::0 - ./scripts/.ci/preflight.sh ios || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + echo "CONDITION=0" >> $GITHUB_OUTPUT + ./scripts/.ci/preflight.sh ios || ret=$? && echo $ret && echo "CONDITION=$ret" >> $GITHUB_OUTPUT ios-iphone-os: needs: [setup] if: ${{ needs.setup.outputs.CONDITION != '11' }} runs-on: macos-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: build run: ./scripts/build_framework_ios.sh diff --git a/.github/workflows/linux-x86-cpu-gcc.yml b/.github/workflows/linux-x86-cpu-gcc.yml index 3e8a716d5..b1a5aba6a 100644 --- a/.github/workflows/linux-x86-cpu-gcc.yml +++ b/.github/workflows/linux-x86-cpu-gcc.yml @@ -9,22 +9,22 @@ on: jobs: setup: - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 outputs: CONDITION: ${{ steps.preflight.outputs.CONDITION }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Preflight id: preflight run: | - echo ::set-output name=CONDITION::0 - ./scripts/.ci/preflight.sh x86 || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + echo "CONDITION=0" >> $GITHUB_OUTPUT + ./scripts/.ci/preflight.sh x86 || ret=$? && echo $ret && echo "CONDITION=$ret" >> $GITHUB_OUTPUT linux-gcc: needs: [setup] if: ${{ needs.setup.outputs.CONDITION != '11' }} - runs-on: ubuntu-18.04 + runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: build run: ./scripts/build_x86_linux.sh diff --git a/.github/workflows/macos-x64-cpu.yml b/.github/workflows/macos-x64-cpu.yml index 1fa1e4924..252f995a8 100644 --- a/.github/workflows/macos-x64-cpu.yml +++ b/.github/workflows/macos-x64-cpu.yml @@ -13,19 +13,19 @@ jobs: outputs: CONDITION: ${{ steps.preflight.outputs.CONDITION }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Preflight id: preflight run: | - echo ::set-output name=CONDITION::0 - ./scripts/.ci/preflight.sh x86 || ret=$? && echo $ret && echo ::set-output name=CONDITION::$ret + echo "CONDITION=0" >> $GITHUB_OUTPUT + ./scripts/.ci/preflight.sh x86 || ret=$? && echo $ret && echo "CONDITION=$ret" >> $GITHUB_OUTPUT macos-clang: needs: [setup] if: ${{ needs.setup.outputs.CONDITION != '11' }} runs-on: macos-11 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: protobuf run: brew install protobuf opencv3 - name: build diff --git a/.gitignore b/.gitignore index b37594314..b1998411b 100644 --- a/.gitignore +++ b/.gitignore @@ -494,6 +494,10 @@ model/ # opencl generated code opencl_program.cc +# cache +*.cache +*.cache~ + # opencl generated code opencl_program.cc platforms/mac/tnn.xcodeproj/project.xcworkspace/xcuserdata/darrenyao.xcuserdatad/UserInterfaceState.xcuserstate diff --git a/CMakeLists.txt b/CMakeLists.txt index e50f270da..5f62c19e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.1) - +#set(CMAKE_CXX_STANDARD 14) +#set(CMAKE_CXX_STANDARD_REQUIRED ON) # https://cmake.org/cmake/help/latest/policy/CMP0068.html if(POLICY CMP0068) cmake_policy(SET CMP0068 NEW) @@ -15,9 +16,9 @@ project(TNN) ENABLE_LANGUAGE(ASM) set(TNN_MAJOR_VERSION 0) -set(TNN_MINOR_VERSION 3) -set(TNN_PATCH_VERSION 0) -set(TNN_BUILD_VERSION 0) +set(TNN_MINOR_VERSION 4) +set(TNN_PATCH_VERSION 2) +set(TNN_BUILD_VERSION 11) set(TNN_VERSION "${TNN_MAJOR_VERSION}.${TNN_MINOR_VERSION}.${TNN_PATCH_VERSION}.${TNN_BUILD_VERSION}") option(TNN_CPU_ENABLE "Enable Cpu" ON) @@ -27,14 +28,16 @@ option(TNN_ARM82_ENABLE "Enable Arm82" OFF) option(TNN_METAL_ENABLE "Enable Metal" OFF) option(TNN_OPENCL_ENABLE "Enable OpenCL" OFF) option(TNN_CUDA_ENABLE "Enable CUDA" OFF) -option(TNN_DSP_ENABLE "Enable DSP" OFF) +option(TNN_SNPE_ENABLE "Enable Qualcomm SNPE DSP" OFF) option(TNN_ATLAS_ENABLE "Enable Atlas" OFF) option(TNN_TENSORRT_ENABLE "Enable TensorRT" OFF) option(TNN_OPENVINO_ENABLE "Enable OPENVINO" OFF) option(TNN_APPLE_NPU_ENABLE "Enable NPU" OFF) option(TNN_HUAWEI_NPU_ENABLE "Enable NPU" OFF) option(TNN_RK_NPU_ENABLE "Enable RKNPU" OFF) -option(TNN_JETSON_NANO_ENABLE "Enable Jetson Nano" OFF) +option(TNN_TNNTORCH_ENABLE "Enable TNNTorch" OFF) +option(TNN_ZIXIAO_ENABLE "Enable TNNTorch" OFF) +option(TNN_TORCHVISION_ENABLE "Enable TorchVision" OFF) option(TNN_SYMBOL_HIDE "Enable Hide Symbol Visibility" ON) option(TNN_OPENMP_ENABLE "Enable OpenMP" OFF) option(TNN_BUILD_SHARED "Build Shared Library" ON) @@ -52,12 +55,18 @@ option(TNN_ONNX2TNN_ENABLE "Enable ONNX2TNN Converter" OFF) option(TNN_TNN2MEM_ENABLE "Enable tnn2mem" OFF) option(TNN_BUILD_BENCHMARK_TEST_LIB_ENABLE "Enable Build Benchmark Test Lib" OFF) option(TNN_GLIBCXX_USE_CXX11_ABI_ENABLE "Enable Use CXX11 ABI" ON) +option(TNN_PYBIND_ENABLE "Enable Pybind" OFF) option(TNN_METAL_FLOAT32 "Enable Metal Float32" OFF) option(TNN_COREML_FLOAT32 "Enable Float32 CoreML Model" ON) option(TNN_DYNAMIC_RANGE_QUANTIZATION_ENABLE "Enable Dynamic Range Quantization" OFF) +option(TNN_PACK_TORCH_LIB "Enable Torch Lib Pack in release" ON) +option(TNN_CUDA_JETSON_ENABLE "Enable CUDA build for Nvidia Jetson Driving Chips like Orin, Thor etc." OFF) set(TNN_USE_GFLAGS OFF) +set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) +set(CMAKE_SKIP_BUILD_RPATH FALSE) + message(${CMAKE_SOURCE_DIR}) message(${CMAKE_CURRENT_SOURCE_DIR}) @@ -76,6 +85,10 @@ if(TNN_PROFILER_ENABLE) set(TNN_SYMBOL_HIDE OFF) endif() +if(TNN_TORCHVISION_ENABLE) + add_definitions(-DTNN_TORCHVISION) +endif() + if(TNN_BENCHMARK_MODE) add_definitions(-DGENERATE_RESOURCE) endif() @@ -131,12 +144,17 @@ if(TNN_UNIT_TEST_ENABLE) add_definitions(-DGENERATE_RESOURCE) endif() +if(TNN_MATCHER_TEST_ENABLE) + set(TNN_SYMBOL_HIDE OFF) +endif() + if(TNN_CONVERTER_ENABLE) set(TNN_ONNX2TNN_ENABLE ON) endif() if(TNN_CONVERTER_ENABLE OR TNN_ONNX2TNN_ENABLE) set(TNN_SYMBOL_HIDE OFF) + set(TNN_PYBIND_ENABLE ON) add_definitions(-DTNN_CONVERTER_RUNTIME) endif() @@ -220,7 +238,11 @@ if(UNIX) endif() endif() -set(CMAKE_CXX_STANDARD 11) +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.0) + set(CMAKE_CXX_STANDARD 11) +else() + set(CMAKE_CXX_STANDARD 17) +endif() set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(TNN_METAL_ENABLE OR TNN_APPLE_NPU_ENABLE) @@ -256,7 +278,7 @@ message(STATUS "\tArm82:\t${TNN_ARM82_ENABLE}") message(STATUS "\tMetal:\t${TNN_METAL_ENABLE}") message(STATUS "\tOpenCL:\t${TNN_OPENCL_ENABLE}") message(STATUS "\tCUDA:\t${TNN_CUDA_ENABLE}") -message(STATUS "\tDSP:\t${TNN_DSP_ENABLE}") +message(STATUS "\tSNPE:\t${TNN_SNPE_ENABLE}") message(STATUS "\tAtlas:\t${TNN_ATLAS_ENABLE}") message(STATUS "\tTensorRT:\t${TNN_TENSORRT_ENABLE}") message(STATUS "\tAppleNPU:\t${TNN_APPLE_NPU_ENABLE}") @@ -264,6 +286,8 @@ message(STATUS "\tHuaweiNPU:\t${TNN_HUAWEI_NPU_ENABLE}") message(STATUS "\tRKNPU:\t${TNN_RK_NPU_ENABLE}") message(STATUS "\tJetson Nano:\t${TNN_JETSON_NANO_ENABLE}") message(STATUS "\tOpenVINO:\t${TNN_OPENVINO_ENABLE}") +message(STATUS "\tTNNTorch:\t${TNN_TNNTORCH_ENABLE}") +message(STATUS "\tZIXIAO:\t${TNN_ZIXIAO_ENABLE}") message(STATUS "\tOpenMP:\t${TNN_OPENMP_ENABLE}") message(STATUS "\tTEST:\t${TNN_TEST_ENABLE}") message(STATUS "\t--Unit Test:\t${TNN_UNIT_TEST_ENABLE}") @@ -279,6 +303,7 @@ message(STATUS "\tTNN2MEM:\t${TNN_TNN2MEM_ENABLE}") message(STATUS "\tBENCHMARK Test Lib:\t${TNN_BUILD_BENCHMARK_TEST_LIB_ENABLE}") message(STATUS "\tDynamic Range Quantization:\t${TNN_DYNAMIC_RANGE_QUANTIZATION_ENABLE}") message(STATUS "\tSHARING_MEM_WITH_OPENGL:\t${SHARING_MEM_WITH_OPENGL}") +message(STATUS "\tCuda Build Jetson Chips:\t${TNN_CUDA_JETSON_ENABLE}") include_directories(include) include_directories(source) @@ -385,19 +410,58 @@ if(TNN_CUDA_ENABLE) set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") endif() +if(TNN_SNPE_ENABLE) + if(ANDROID_ABI STREQUAL "armeabi-v7a") + # SNPE 2.11+ no longer support ARMv7 + message(STATUS "TNN SNPE not available on Android ARMv7") + else() + link_directories(third_party/snpe/lib/aarch64-android/) + add_subdirectory(source/tnn/device/snpe) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") + endif() +endif() + if(TNN_HUAWEI_NPU_ENABLE) if(ANDROID_ABI STREQUAL "armeabi-v7a") link_directories( - third_party/huawei_npu/hiai_ddk_latest/armeabi-v7a/ + third_party/huawei_npu/hiai_ddk_latest/ddk/ai_ddk_lib/lib/ ) else() link_directories( - third_party/huawei_npu/hiai_ddk_latest/arm64-v8a/ + third_party/huawei_npu/hiai_ddk_latest/ddk/ai_ddk_lib/lib64/ ) endif() add_subdirectory(source/tnn/device/huawei_npu) set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") endif() +if(TNN_ATLAS_ENABLE) + add_definitions(-DGET_NETWORK_ENABLE) + add_subdirectory(source/tnn/device/atlas) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") + include_directories(${CMAKE_SOURCE_DIR}/source/tnn/device/atlas) + set(ASCEND_PATH $ENV{DDK_PATH}) + if (NOT DEFINED ENV{DDK_PATH}) + set(ASCEND_PATH "/usr/local/Ascend/ascend-toolkit/latest") + message(STATUS "set default ASCEND_PATH: ${ASCEND_PATH}") + else () + message(STATUS "env ASCEND_PATH: ${ASCEND_PATH}") + endif() + set(ACL_LIB_PATH $ENV{NPU_HOST_LIB}) + if (NOT DEFINED ENV{NPU_HOST_LIB}) + set(ACL_LIB_PATH "/usr/local/Ascend/ascend-toolkit/latest/lib64") + message(STATUS "set default ACL_LIB_PATH: ${ACL_LIB_PATH}") + else () + message(STATUS "env ACL_LIB_PATH: ${ACL_LIB_PATH}") + endif() + # Header path + include_directories( + ${ASCEND_PATH}/acllib/include/ + ) + # add host lib path + link_directories( + ${ACL_LIB_PATH} + ) +endif() if(TNN_RK_NPU_ENABLE) if(CMAKE_SIZEOF_VOID_P EQUAL 8) @@ -413,9 +477,23 @@ if(TNN_RK_NPU_ENABLE) set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") endif() +if(TNN_TNNTORCH_ENABLE) + add_subdirectory(source/tnn/network/torch) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + +if(TNN_ZIXIAO_ENABLE) + add_subdirectory(source/tnn/device/zixiao) + set(TARGET_OBJECTS ${TARGET_OBJECTS} "$") +endif() + if(TNN_BUILD_SHARED) add_library(TNN SHARED ${SRC} ${TARGET_OBJECTS}) set_target_properties(TNN PROPERTIES VERSION ${TNN_VERSION} SOVERSION ${TNN_MAJOR_VERSION}) + set_target_properties(TNN PROPERTIES LINK_FLAGS "-Wl,-rpath,$ORIGIN") + if (NOT TNN_PACK_TORCH_LIB) + set_target_properties(TNN PROPERTIES LINK_FLAGS "-Wl,-rpath,$ORIGIN -Wl,-rpath,$ORIGIN/../torch/lib") + endif() if(SHARING_MEM_WITH_OPENGL) if(SYSTEM.Windows) target_link_libraries(TNN opengl32) @@ -456,6 +534,19 @@ elseif(SYSTEM.Windows) include(platforms/windows/CMakeLists.txt) endif() +if(TNN_PYBIND_ENABLE) + set(CMAKE_CXX_STANDARD 17) + include_directories(third_party/pybind11/include) + add_subdirectory(third_party/pybind11) + file(GLOB_RECURSE TORCH_SRC "source/pytnn/*.cc") + add_library(_pytnn SHARED ${TORCH_SRC}) + target_link_libraries(_pytnn pybind11::module) + set_target_properties(_pytnn PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}") + set_target_properties(_pytnn PROPERTIES LINK_FLAGS "-Wl,-rpath,\$ORIGIN") + target_link_libraries(_pytnn TNN) +endif() + if (TNN_TEST_ENABLE OR TNN_CONVERTER_ENABLE OR TNN_MODEL_CHECK_ENABLE OR TNN_DYNAMIC_RANGE_QUANTIZATION_ENABLE) set(TNN_USE_GFLAGS ON) endif () @@ -499,6 +590,7 @@ endif() if(TNN_DYNAMIC_RANGE_QUANTIZATION_ENABLE) add_subdirectory(tools/dynamic_range_quantization) endif() +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\"") if (MSVC) target_compile_options(TNN PUBLIC "/Zc:__cplusplus") diff --git a/doc/cn/user/api.md b/doc/cn/user/api.md index 80df1ae80..cf5f1a870 100644 --- a/doc/cn/user/api.md +++ b/doc/cn/user/api.md @@ -63,7 +63,7 @@ TNN_NS::Status error; auto net_instance = tnn.CreateInst(config, error); ``` -TNN网络构建需配置NetworkConfig,device_type可配置`DEVICE_ARM`, `DEVICE_OPENCL`, `DEVICE_METAL`, `DEVICE_X86`, `DEVICE_CUDA`, `DEVICE_HUAWEI_NPU`, `DEVICE_RK_NPU`等多种加速方式,通过CreateInst接口完成网络的构建。 +TNN网络构建需配置NetworkConfig,device_type可配置`DEVICE_ARM`, `DEVICE_OPENCL`, `DEVICE_METAL`, `DEVICE_X86`, `DEVICE_CUDA`, `DEVICE_HUAWEI_NPU`, `DEVICE_RK_NPU`,`DEVICE_ATLAS`等多种加速方式,通过CreateInst接口完成网络的构建。 ### 步骤3. 输入设定 @@ -143,7 +143,7 @@ struct PUBLIC ModelConfig { ModelConfig参数说明: -- `model_type`: TNN当前开源版本仅支持传入`MODEL_TYPE_TNN`, `MODEL_TYPE_NCNN`, `MODEL_TYPE_COREML` 模型格式。 +- `model_type`: TNN当前开源版本仅支持传入`MODEL_TYPE_TNN`, `MODEL_TYPE_NCNN`, `MODEL_TYPE_COREML`, `MODEL_TYPE_ATLAS`模型格式。 - `params`: TNN模型需传入proto文件内容以及model文件路径。NCNN模型需传入param文件内容以及bin文件路径, COREML模型需传入coreml 模型所在目录路径。 @@ -181,7 +181,7 @@ struct PUBLIC NetworkConfig { NetworkConfig参数说明: -- `device_type`: 默认为`DEVICE_ARM`。 当前已支持 `DEVICE_NAIVE`、`DEVICE_ARM`、`DEVICE_X86`、`DEVICE_OPENCL`、`DEVICE_METAL`、`DEVICE_CUDA`、`DEVICE_HUAWEI_NPU`、`DEVICE_RK_NPU`。 +- `device_type`: 默认为`DEVICE_ARM`。 当前已支持 `DEVICE_NAIVE`、`DEVICE_ARM`、`DEVICE_X86`、`DEVICE_OPENCL`、`DEVICE_METAL`、`DEVICE_CUDA`、`DEVICE_HUAWEI_NPU`、`DEVICE_RK_NPU`、`DEVICE_ATLAS`。 - `device_id`: 默认为0,多个设备支持通过`device_id`选择,当前仅`DEVICE_CUDA`需配置此参数指定gpu id。 - `data_format`: 默认为tnn自动选择blob数据排布方式进行加速,可通过此参数设定特定blob数据排布进行加速。 - `network_type`: 默认根据`device_type`自动选择网络类型,可指定构建网络类型。 @@ -556,3 +556,94 @@ struct PUBLIC MatConvertParam { ### 16. version.h 构建版本信息 + +# Python API说明 + +Python API 基于pybind 对 C++ Core 相关API进行了封装,所有定义类型均可通过`pytnn`包名引入。相关用法与C++ API基本相同,仅改变了c++传引用参数作为返回值的函数行为,在python对应接口中改为直接作为函数返回值返回。此外,Python API提供了简化的API接口。 + +## 一、模型加载 + +### 1. load + +```python +def load(model_path, config_dict = {}): +``` + +其中`model_path`传递模型路径,对于TNN这种模型结构与权重分开存储的模型,仅需传递tnnproto 文件路径,模型权重路径基于后缀名自动查找。`config_dict`支持字典传入,相关key说明如下: + +* `input_shapes`: 支持list以及dict两种形式传入,其中dict key 可指定输入name。shape可通过两种格式指定: + +```python +{ "input_shapes": [ {"min": [1,3,224,224], "max": [1,3,248,248]} ]} +{ "input_shapes": [ [1,3,224,224] ]} +``` +其中min, max可用来指定支持的最小,最大尺寸,固定尺寸仅需指定一个尺寸即可,尺寸支持tuple和list。 +对于多输入模型,不同输入尺寸可以采用不同的格式指定支持的输入尺寸。 + +```python +{ "input_shapes": [ [1,3,112,112], {"min": [1,3,224,224], "max": [1,3,248,248]} ] } +``` + +其中第一个输入为固定输入尺寸,第二个输入为可变尺寸。 +相同的输入,通过dict传入,key可用于指定输入name: + +```python +{ "input_shapes": { "data_0": [1,3,112,112], "data_1": {"min": [1,3,224,224], "max": [1,3,248,248]} } } +``` +其中`data_0` 为固定输入尺寸,`data_1` 为可变输入尺寸。 + +* `device_type`: 支持DeviceType枚举类型以及字符串传入。 + +```python +{"device_type": DEVICE_NAIVE} +{"device_type": "naive"} +``` +枚举类型同c++,支持 `DEVICE_CUDA`, `DEVICE_X86`, `DEVICE_ARM`, `DEVICE_NAIVE`等。 +字符串类型与枚举类型命名一一对应,如`CUDA`, `cuda`均表示DEVICE_CUDA,支持大小写。 +特别说明:不指定device_type,默认选择`DEVICE_CUDA`。 + +* `data_format`,`network_type`, `precision`, `share_memory_mode`, `data_format` 与 `device_type`类似,均支持枚举类型和字符串类型输入,枚举类型同c++, 字符串类型与枚举类型命名一一对应,支持大小写。 + +* `cache_path`, `library_path` 支持字符串类型传入,`enable_tune_kernel` 支持布尔类型传入。 + +### 2. `load_raw`, `load_raw_range` + +```python +def load_raw(model_path, network_config, input_shapes=None): +def load_raw_range(model_path, network_config, min_input_shapes, max_input_shapes): +``` +两接口为TNN对应接口CreateInst的简单封装,其中`model_path`传递模型路径;`network_config`为`NetworkConfig`类实例,与C++类相同;`input_shapes`,`min_input_shapes`以及`max_input_shapes` 对应相关输入尺寸设定,类型为字典,其中key为输入name,value对应输入尺寸list。 + +## 二、网络运行 + +模型加载完成后,会返回pytnn新定义的Module类实例, 其中Module类定义的一重要函数为forward。 + +```python +class Module: +... + def forward(self, *inputs, rtype="list"): +... +``` + +其中`inputs`为不定长参数,每个输入数据存储于`numpy.ndarray`中,排布为NC[D1-D4]。支持多个输入直接传入,也支持list, tuple,dict形式传入。 如一个两输入网络,输入name依次为`data_1`, `data_2`,可支持以下几种方式传入数据。 + +``` +input1=numpy.ones((1,3,224,224), np.float32, 'F') +input2=numpy.ones((1,3,224,224), np.float32, 'F') +# case1 +outputs=module.forward(input1, input2) +#case2 +outputs=module.forward((input1, input2)) +#case3 +outputs=module.forward([input1, input2]) +#case4 +outputs=module.forward({"data_1":input1, "data_2":input2}) +``` +输出`outputs`默认返回类型为list,每个输出存储于`numpy.ndarray`中,排布为NC[D1-D4]。 + +`rtype` 支持 `list` , `dict`, 指定输出返回类型为字典类型时,key为模型输出name,value对应输出数据,存储于`numpy.ndarray`中。 + + + + + diff --git a/doc/cn/user/compile.md b/doc/cn/user/compile.md index b38c6d96a..ec02dd976 100644 --- a/doc/cn/user/compile.md +++ b/doc/cn/user/compile.md @@ -227,7 +227,41 @@ cd /scripts ``` ./build_macos.sh ``` +## 九、ATLAS环境编译 +### 1. 环境要求 +#### 依赖库 + - cmake(使用3.1及以上版本) + - 交叉编译需要安装编译工具链 + - ubuntu: aarch64: sudo apt-get install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu + - other linux: 下载arm toolchain: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads + - CANN环境依赖: + toolkit软件包: wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/6.0.0.alpha003/Ascend-cann-toolkit_6.0.0.alpha003_linux-aarch64.run + kernel包: wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/6.0.0.alpha003/Ascend-cann-kernels-310p_6.0.0.alpha003_linux.run + chmod a+x Ascend-cann-toolkit_6.0.0.alpha003_linux-aarch64.run && chmod a+x Ascend-cann-kernels-310p_6.0.0.alpha003_linux.run + ./Ascend-cann-toolkit_6.0.0.alpha003_linux-aarch64.run --install # 默认安装路径:/usr/local/Ascend/ascend-toolkit + ./Ascend-cann-kernels-310p_6.0.0.alpha003_linux.run --install + +### 2. 编译步骤 +1)切换到脚本目录 +``` +cd /scripts +``` +2)编辑`build_atlas.sh`修改配置选项 +``` + SHARED_LIB="ON" # ON表示编译动态库,OFF表示编译静态库 + ARM="ON" # ON表示编译带有Arm CPU版本的库 + OPENMP="ON" # ON表示打开OpenMP + #ARM64: + CC=aarch64-linux-gnu-gcc # 指定C编译器 + CXX=aarch64-linux-gnu-g++ # 指定C++编译器 + TARGET_ARCH=aarch64 # 指定指令架构 + +``` +3)执行编译脚本 +``` +./build_atlas.sh +``` ## 编译参数option说明 |Option|默认值|说明| @@ -239,7 +273,7 @@ cd /scripts |TNN_METAL_ENABLE| OFF | 代码source/device/metal编译开关,代码包含metal加速指令。| |TNN_OPENCL_ENABLE| OFF | 代码source/device/opencl编译开关,代码包含opencl加速指令。| |TNN_CUDA_ENABLE| OFF | 代码source/device/cuda编译开关,当前适配TensorRT实现,后续会迁入更多加速代码实现。| -|TNN_DSP_ENABLE| OFF | 代码source/device/dsp编译开关,当前适配snpe实现。| +|TNN_SNPE_ENABLE| OFF | 代码source/device/snpe编译开关,当前适配Qualcomm SNPE DSP实现。| |TNN_ATLAS_ENABLE| OFF | 代码source/device/atlas编译开关,当前适配华为atlas加速框架。| |TNN_HUAWEI_NPU_ENABLE| OFF | 代码source/device/huawei_npu编译开关,当前适配HiAI加速框架。| |TNN_RK_NPU_ENABLE| OFF | 代码source/device/rknpu编译开关,当前适配rknpu_ddk加速框架。| diff --git a/doc/cn/user/demo.md b/doc/cn/user/demo.md index cf30e04e4..28b4c31b1 100644 --- a/doc/cn/user/demo.md +++ b/doc/cn/user/demo.md @@ -108,8 +108,8 @@ c) 如果需要执行OCR demo,需要将tnn_sdk_sample.h中的宏HAS_OPENCV设 ### 运行环境要求 -1. Android Studio 3.5 或以上 -2. NDK version >= 18, <= 21 +1. Android Studio 3.5 或以上, Android Studio 2022.2.1 测试可运行 +2. NDK version >= 18, NDK 22和23在链接第三方动态库可能会出错,例如opencv,hiai,不建议使用。 ### 运行步骤 @@ -534,6 +534,48 @@ NDK 22和23在链接第三方动态库可能会出错,例如opencv,hiai, 文本识别 demo ./demo_cuda_ocrdetecor ``` +##### Atlas +* 环境要求 + - Cmake (>= 3.1) + - 交叉编译需要安装编译工具链 + - ubuntu: aarch64: sudo apt-get install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu + - other linux: 下载 arm toolchain: https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads + - CANN环境依赖: + toolkit软件包: wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/6.0.0.alpha003/Ascend-cann-toolkit_6.0.0.alpha003_linux-aarch64.run + kernel包: wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/6.0.0.alpha003/Ascend-cann-kernels-310p_6.0.0.alpha003_linux.run + chmod a+x Ascend-cann-toolkit_6.0.0.alpha003_linux-aarch64.run && chmod a+x Ascend-cann-kernels-310p_6.0.0.alpha003_linux.run + ./Ascend-cann-toolkit_6.0.0.alpha003_linux-aarch64.run --install # 默认安装路径:/usr/local/Ascend/ascend-toolkit + ./Ascend-cann-kernels-310p_6.0.0.alpha003_linux.run --install +* 编译 + 进入 `examples/linux/atlas` 目录 + ``` + cd /examples/linux/atlas + ``` + 执行 `build_atlas.sh` + ``` + sh build_aarch64_linux.sh + ``` +* 执行 + 进入 `examples/linux/cross/build_atlas` 目录,当不使用任何参数执行demo文件时,会打印demo用法信息,以图形分类demo为例: + ``` + cd build_atlas + ./demo_atlas_imageclassify + >Parameter -m and -p should be set + >usage: + >./demo_arm_linux_imageclassify [-h] [-p] tnnproto [-m] ommodel [-i] + > -h, print a usage message. + > -p, (required) tnn proto file path + > -m, (required) om model file path + > -i, (required) input file path + > -l,