From 2e3c87afa17bfc9d9d283cfd778fc7c086eb7ca9 Mon Sep 17 00:00:00 2001 From: ealinli Date: Wed, 14 Dec 2022 15:33:04 +0800 Subject: [PATCH] [UPD] put MatMul's transformations into model conversion --- doc/cn/user/convert.md | 14 +++++ doc/en/user/convert_en.md | 13 ++++ source/tnn/core/const_folder.cc | 27 +------- source/tnn/core/default_network.cc | 14 ++++- source/tnn/core/instance.cc | 4 +- .../net_optimizer_convert_matmul_to_conv.cc | 3 - tools/convert2tnn/converter.py | 3 +- tools/convert2tnn/onnx_converter/onnx2tnn.py | 9 ++- tools/convert2tnn/utils/args_parser.py | 5 ++ tools/onnx2tnn/onnx-converter/onnx2tnn.py | 10 ++- .../onnx-converter/onnx2tnn_convert.cc | 63 ++++++++++++++++++- 11 files changed, 126 insertions(+), 39 deletions(-) diff --git a/doc/cn/user/convert.md b/doc/cn/user/convert.md index 04ac9015f..248a77eb7 100755 --- a/doc/cn/user/convert.md +++ b/doc/cn/user/convert.md @@ -403,6 +403,20 @@ python3 converter.py onnx2tnn \ -ref_file ref.txt ``` +如果要将 onnx 模型中的 MatMul 转换成 Conv1x1,可以用 -extra_info 来进行配置,例如: +```shell script +python3 converter.py onnx2tnn \ + ./matmul.onnx \ + -optimize \ + -v=v3.0 \ + -o ./ \ + -align \ + -input_file in.txt \ + -ref_file ref.txt \ + -extra_info "optimize:net_optimizer_convert_matmul_to_conv" +``` + + - caffe2tnn Caffe 格式转换 diff --git a/doc/en/user/convert_en.md b/doc/en/user/convert_en.md index bd5327e0a..e574ba23b 100644 --- a/doc/en/user/convert_en.md +++ b/doc/en/user/convert_en.md @@ -389,6 +389,19 @@ Example: python3 converter.py onnx2tnn ~/mobilenetv3/mobilenetv3-small-c7eb32fe.onnx.opt.onnx -optimize -v=v3.0 -o ~/mobilenetv3/ ``` +If you want to convert MatMul in the onnx model to Conv1x1, you can use -extra_info to configure, for example: +```shell script +python3 converter.py onnx2tnn \ + ./matmul.onnx \ + -optimize \ + -v=v3.0 \ + -o ./ \ + -align \ + -input_file in.txt \ + -ref_file ref.txt \ + -extra_info "optimize:net_optimizer_convert_matmul_to_conv" +``` + - caffe2tnn caffe format conversion diff --git a/source/tnn/core/const_folder.cc b/source/tnn/core/const_folder.cc index 4df4451e4..c7e97b524 100644 --- a/source/tnn/core/const_folder.cc +++ b/source/tnn/core/const_folder.cc @@ -57,32 +57,7 @@ Status ConstFolder::Init(NetworkConfig &net_config, ModelConfig &model_config, A runtime_blob_pool_ = BlobMemoryPoolFactory::CreateBlobMemoryPool(device); runtime_model_ = RUNTIME_MODE_CONST_FOLD; - - // Some optimizations need to be done in ConstFolder, - // so we moved those optimizations from DefaultNetwork to ConstFolder - { - auto *default_interpreter = dynamic_cast(interpreter); - CHECK_PARAM_NULL(default_interpreter); - - NetStructure *net_structure = default_interpreter->GetNetStructure(); - NetResource *net_resource = default_interpreter->GetNetResource(); - - std::set const_fold_optimizers = { - "net_optimizer_dynamic_range_dequant", - "net_optimizer_convert_matmul_to_conv", - }; - - if (runtime_model_ == RUNTIME_MODE_CONST_FOLD && net_config.network_type != NETWORK_TYPE_COREML) { - std::unique_lock lck(optimize_mtx_); - for (const auto &iter : const_fold_optimizers) { - auto optimizer = optimizer::NetOptimizerManager::GetNetOptimizerByName(iter); - if (optimizer && optimizer->IsSupported(net_config)) { - RETURN_ON_NEQ(optimizer->Optimize(net_structure, net_resource), TNN_OK); - } - } - } - } - + auto ret =DefaultNetwork::Init(config_, model_config, interpreter, min_inputs_shape, max_inputs_shape); if(ret != TNN_OK) { return ret; diff --git a/source/tnn/core/default_network.cc b/source/tnn/core/default_network.cc index 6294f5be4..500441480 100644 --- a/source/tnn/core/default_network.cc +++ b/source/tnn/core/default_network.cc @@ -113,6 +113,18 @@ Status DefaultNetwork::Init(NetworkConfig &net_config, ModelConfig &model_config RETURN_ON_NEQ(ret, TNN_OK); } + /* + * decode dynamic quantization model for const folder. + * coreml can not support this optimize + */ + if (runtime_model_ == RUNTIME_MODE_CONST_FOLD && net_config.network_type != NETWORK_TYPE_COREML) { + std::unique_lock lck(optimize_mtx_); + auto optimizer = optimizer::NetOptimizerManager::GetNetOptimizerByName("net_optimizer_dynamic_range_dequant"); + if (optimizer) { + RETURN_ON_NEQ(optimizer->Optimize(net_structure, net_resource), TNN_OK); + } + } + blob_manager_ = new BlobManager(device_); ret = blob_manager_->Init(net_config, net_structure, max_inputs_shape, GetNetResourceDataType(net_resource)); @@ -201,7 +213,7 @@ Status DefaultNetwork::InitLayers(NetStructure *net_structure, NetResource *net_ dynamic_cast(layer_info->param.get())->dst_format : input_fmt; #ifdef GENERATE_RESOURCE - if (runtime_model_ == RUNTIME_MODE_NORMAL || runtime_model_ == RUNTIME_MODE_CONST_FOLD) { + if (runtime_model_ == RUNTIME_MODE_NORMAL) { LayerType type = layer_info->type; BaseLayer *cur_layer = CreateLayer(type); if (cur_layer == NULL) { diff --git a/source/tnn/core/instance.cc b/source/tnn/core/instance.cc index b032a7270..ed0cce2a3 100644 --- a/source/tnn/core/instance.cc +++ b/source/tnn/core/instance.cc @@ -94,8 +94,8 @@ Status Instance::Init(std::shared_ptr interpreter, Inp } if (default_interpreter && default_interpreter->GetNetStructure() && - (NeedDoConstantFolding(default_interpreter->GetNetStructure()) || net_config_.device_type == DEVICE_CUDA || - net_config_.device_type == DEVICE_APPLE_NPU || net_config_.device_type == DEVICE_ARM)) { + (NeedDoConstantFolding(default_interpreter->GetNetStructure()) || + net_config_.device_type == DEVICE_CUDA || net_config_.device_type == DEVICE_APPLE_NPU)) { auto const_folder = std::make_shared(); auto folder_net_config = net_config_; folder_net_config.share_memory_mode = SHARE_MEMORY_MODE_DEFAULT; diff --git a/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc b/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc index ab3c14307..ce29a02ac 100644 --- a/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc +++ b/source/tnn/optimizer/net_optimizer_convert_matmul_to_conv.cc @@ -38,9 +38,6 @@ namespace optimizer { } bool NetOptimizerConvertMatMulToConv::IsSupported(const NetworkConfig &net_config) { - if (net_config.device_type == DEVICE_ARM) { - return true; - } return false; } diff --git a/tools/convert2tnn/converter.py b/tools/convert2tnn/converter.py index 0e135e324..6e01f8c9d 100755 --- a/tools/convert2tnn/converter.py +++ b/tools/convert2tnn/converter.py @@ -56,9 +56,10 @@ def main(): input_names = "" for item in args.input_names: input_names += (item + " ") + extra_info = args.extra_info try: onnx2tnn.convert(onnx_path, output_dir, version, optimize, half, align, align_batch, input_file, ref_file, input_names, - debug_mode=debug_mode) + debug_mode=debug_mode, extra_info=extra_info) if int8: dynamic_range_quantization.quantization(onnx_path, "onnx", output_dir, optimize) except Exception as err: diff --git a/tools/convert2tnn/onnx_converter/onnx2tnn.py b/tools/convert2tnn/onnx_converter/onnx2tnn.py index 07071aaa0..ffa6da7c2 100644 --- a/tools/convert2tnn/onnx_converter/onnx2tnn.py +++ b/tools/convert2tnn/onnx_converter/onnx2tnn.py @@ -56,7 +56,8 @@ def check_input_names(input_names: str, onnx_input_info: dict): def convert(onnx_path, output_dir=None, version="v1.0", optimize=True, half=False, align='', align_batch=False, - input_path=None, refer_path=None, input_names: str = None, is_ssd=False, debug_mode: bool = False): + input_path=None, refer_path=None, input_names: str = None, is_ssd=False, debug_mode: bool = False, + extra_info: str = None): """ 执行 onnx 转换为 tnn 的转换指令 :parameter: @@ -92,7 +93,7 @@ def convert(onnx_path, output_dir=None, version="v1.0", optimize=True, half=Fals proto_suffix = '.tnnproto' model_suffix = '.tnnmodel' - command = "python3 onnx2tnn.py " + onnx_path + command = "python3.7 onnx2tnn.py " + onnx_path command = command + " -version=" + version checker.check_file_exist(onnx_path) if optimize is True: @@ -111,6 +112,10 @@ def convert(onnx_path, output_dir=None, version="v1.0", optimize=True, half=Fals if input_names is not None: command = command + " -input_shape " + input_names + + if extra_info is not None: + command = command + " -extra_info " + extra_info + logging.debug("The onnx2tnn command:" + command + "\n") current_file_dir = os.path.dirname(__file__) diff --git a/tools/convert2tnn/utils/args_parser.py b/tools/convert2tnn/utils/args_parser.py index 2844a9ad8..1de4bc8cf 100644 --- a/tools/convert2tnn/utils/args_parser.py +++ b/tools/convert2tnn/utils/args_parser.py @@ -100,6 +100,11 @@ def parse_args(): action='store_true', required=False, help="save model using dynamic range quantization. use int8 save, fp32 interpreting") + onnx2tnn_parser.add_argument('-extra_info', + dest='extra_info', + action='store', + required=False, + help="set additional configuration information") # convert caff2onnx -pp proto_path -mp model_path -o caffe2tnn_parser = subparsers.add_parser('caffe2tnn', diff --git a/tools/onnx2tnn/onnx-converter/onnx2tnn.py b/tools/onnx2tnn/onnx-converter/onnx2tnn.py index 4cc46b7c9..bfad4d8c3 100644 --- a/tools/onnx2tnn/onnx-converter/onnx2tnn.py +++ b/tools/onnx2tnn/onnx-converter/onnx2tnn.py @@ -71,6 +71,10 @@ def main(): action='store', nargs='+', help='manually-set static input shape, useful when the input shape is dynamic') + parser.add_argument('-extra_info', + required=False, + action='store', + help='set additional configuration information') args = parser.parse_args() onnx_net_path = args.onnx_model_path algo_version = args.version @@ -83,6 +87,10 @@ def main(): for item in args.input_shape: input_shape += (item + " ") + extra_info = "" + if args.extra_info is not None: + extra_info = args.extra_info + if onnx_net_path is None: print('Please make sure the onnx model path is correct!') exit(-1) @@ -134,7 +142,7 @@ def main(): status = onnx2tnn.convert(onnx_net_opt_path, output_dir, algo_version, file_time, 0 if model_half == '0' else 1, 1 if algo_optimize != '0' else 0, - input_shape) + input_shape, extra_info) except Exception as err: status = -1 traceback.print_exc() diff --git a/tools/onnx2tnn/onnx-converter/onnx2tnn_convert.cc b/tools/onnx2tnn/onnx-converter/onnx2tnn_convert.cc index c64ab3cfa..803061243 100644 --- a/tools/onnx2tnn/onnx-converter/onnx2tnn_convert.cc +++ b/tools/onnx2tnn/onnx-converter/onnx2tnn_convert.cc @@ -18,6 +18,7 @@ #include "onnx2tnn.h" #include "tnn/core/const_folder.h" +#include "tnn/optimizer/net_optimizer_manager.h" #include "tnn/interpreter/default_model_interpreter.h" #include "tnn/interpreter/tnn/model_packer.h" #include "tnn/core/blob.h" @@ -135,11 +136,57 @@ Status parse_input_info(std::string input_info, TNN_NS::InputShapesMap & input_s return TNN_NS::TNN_OK; } +std::vector split_string(const std::string& str, const std::string& delimiter) { + size_t pos = 0; + std::string s(str); + std::string token; + std::vector sub_strs; + while ((pos = s.find(delimiter)) != std::string::npos) { + token = s.substr(0, pos); + sub_strs.emplace_back(token); + s.erase(0, pos + delimiter.length()); + } + if (!s.empty()) { + sub_strs.emplace_back(s); + } + + return sub_strs; +} + +// This function parses the additional configuration information needed for model conversion, +// configuration information is expressed in the form of key-value pair. +// e.g., If you want to convert matmul to conv1x1, extra_info can be configured like this, +// "optimize:net_optimizer_convert_matmul_to_conv" +Status parse_extra_info(const std::string& extra_info, std::map>& extra_map) { + extra_map.clear(); + const auto& extra_list = split_string(extra_info, ";"); + for (const auto& extra_it: extra_list) { + const auto& info = split_string(extra_it, ":"); + if (info.size() != 2) { + LOGE("sub extra info is invalid, %s\n", extra_it.c_str()); + return Status(TNNERR_INVALID_NETCFG, "invalid extra_info"); + } + const std::string& type = info.at(0); + extra_map[type] = {}; + const auto& config_list = split_string(info.at(1), ","); + for (const auto& it: config_list) { + if (extra_map.at(type).find(it) == extra_map.at(type).end()) { + extra_map.at(type).emplace(it); + } + } + } + + return TNN_OK; +} + //data_type: 0:float 1:half 2:int8 not support now int onnx2tnn_convert(std::string onnx_model_path, std::string output_dir, std::string algo_version, - std::string file_time, int data_type, int fixed_input_shape, std::string input_info) -{ + std::string file_time, int data_type, int fixed_input_shape, std::string input_info, + std::string extra_info) { + std::map> extra_map; + RETURN_ON_NEQ(parse_extra_info(extra_info, extra_map), TNN_OK); + std::string onnx_model_name; std::string onnx_suffix = ".onnx"; std::size_t sep_position = onnx_model_path.rfind('/'); @@ -219,7 +266,17 @@ int onnx2tnn_convert(std::string onnx_model_path, std::string output_dir, std::s DLog("GetOptimizedNet Error: %s\n", status.description().c_str()); return status; } - + + if (extra_map.find("optimize") != extra_map.end()) { + const auto& optimizer_string_list = extra_map.at("optimize"); + for (const auto& it : optimizer_string_list) { + auto optimizer = optimizer::NetOptimizerManager::GetNetOptimizerByName(it); + if (optimizer) { + RETURN_ON_NEQ(optimizer->Optimize(opt_structure.get(), opt_resource.get()), TNN_OK); + } + } + } + auto packer = std::make_shared(opt_structure.get(), opt_resource.get()); if (packer->Pack(tnn_proto, tnn_model) != 0) { DLog("ModelPacker Pack failed!\n");