-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathonnxruntime-test.cpp
189 lines (153 loc) · 7.41 KB
/
onnxruntime-test.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// Copyright(c) Microsoft Corporation.All rights reserved.
// Licensed under the MIT License.
//
// Original source code URL: https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/CXX_Api_Sample.cpp
#include <iostream>
#include <fstream>
#include <assert.h>
#include <vector>
#include <chrono>
#include <opencv2/opencv.hpp>
//OPENVINO
#define USE_OPENVINO
//OPENVINO
#include <core/session/onnxruntime_cxx_api.h>
//OPENVINO
#include <core/providers/openvino/openvino_provider_factory.h>
//OPENVINO
int main(int argc, char* argv[]) {
// Read class label text data
std::ifstream label_file("synset_words.txt");
std::string str;
std::vector<std::string> labels;
while(getline(label_file, str)) labels.push_back(str);
label_file.close();
//*************************************************************************
// initialize enviroment...one enviroment per process
// enviroment maintains thread pools and other state info
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
// initialize session options if needed
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(1);
// Sets graph optimization level
// Available levels are
// ORT_DISABLE_ALL -> To disable all optimizations
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals)
// ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
// ORT_ENABLE_ALL -> To Enable All possible opitmizations
//OPENVINO
session_options.SetGraphOptimizationLevel(ORT_DISABLE_ALL); // Disable high level optimization by ONNX runtime
//OPENVINO
//OPENVINO
std::string inference_device = "CPU_FP32";
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(session_options, inference_device.c_str()));
std::cout << "[INFO] Using OpenVINO execution provider" << std::endl;
//OPENVINO
//*************************************************************************
// create session and load model into memory
#ifdef _WIN32
const wchar_t* model_path = L"resnet18-v2-7.onnx";
#else
const char* model_path = "resnet18-v2-7.onnx";
#endif
printf("Using Onnxruntime C++ API\n");
Ort::Session session(env, model_path, session_options);
//*************************************************************************
// print model input layer (node names, types, shape etc.)
Ort::AllocatorWithDefaultOptions allocator;
// print number of model input nodes
size_t num_input_nodes = session.GetInputCount();
std::vector<const char*> input_node_names(num_input_nodes);
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 input node {1, 3, 224, 224}.
// Otherwise need vector<vector<>>
printf("Number of inputs = %zu\n", num_input_nodes);
// iterate over all input nodes
for (int i = 0; i < num_input_nodes; i++) {
// print input node names
char* input_name = session.GetInputName(i, allocator);
printf("Input %d : name=%s\n", i, input_name);
input_node_names[i] = input_name;
// print input node types
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType type = tensor_info.GetElementType();
printf("Input %d : type=%d\n", i, type);
// print input shapes/dims
input_node_dims = tensor_info.GetShape();
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
for (int j = 0; j < input_node_dims.size(); j++)
printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
}
//*************************************************************************
// print model output layer (node names, types, shape etc.)
//Ort::AllocatorWithDefaultOptions allocator;
// print number of model output nodes
size_t num_output_nodes = session.GetOutputCount();
std::vector<const char*> output_node_names(num_output_nodes);
std::vector<int64_t> output_node_dims; // simplify... this model has only 1 output node {1, 3, 224, 224}.
// Otherwise need vector<vector<>>
printf("Number of outputs = %zu\n", num_output_nodes);
// iterate over all output nodes
for (int i = 0; i < num_output_nodes; i++) {
// print output node names
char* output_name = session.GetOutputName(i, allocator);
printf("Output %d : name=%s\n", i, output_name);
output_node_names[i] = output_name;
// print output node types
Ort::TypeInfo type_info = session.GetOutputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType type = tensor_info.GetElementType();
printf("Output %d : type=%d\n", i, type);
// print output shapes/dims
output_node_dims = tensor_info.GetShape();
printf("Output %d : num_dims=%zu\n", i, output_node_dims.size());
for (int j = 0; j < output_node_dims.size(); j++)
printf("Output %d : dim %d=%jd\n", i, j, output_node_dims[j]);
}
//*************************************************************************
// Score the model using sample data, and inspect values
size_t input_tensor_size = 224 * 224 * 3; // simplify ... using known dim values to calculate size
// use OrtGetTensorShapeElementCount() to get official size!
std::vector<float> input_tensor_values(input_tensor_size);
// Load an image and fill the input blob (0.0-1.0)
cv::Mat image = cv::imread("car.png");
cv::resize(image, image, cv::Size(224,224));
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
uint8_t *buf = image.data;
for(int h=0; h<224; h++) {
for(int w=0; w<224; w++) {
input_tensor_values[0 * (224*224) + h*224 + w] = (float)(*buf++)/255.f;
input_tensor_values[1 * (224*224) + h*224 + w] = (float)(*buf++)/255.f;
input_tensor_values[2 * (224*224) + h*224 + w] = (float)(*buf++)/255.f;
}
}
// create input tensor object from data values
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4);
assert(input_tensor.IsTensor());
// Benchmarking (sync, latency)
class std::vector<struct Ort::Value,class std::allocator<struct Ort::Value> > output_tensors;
std::cout<<"Start inferencing"<<std::endl;
auto startTime = std::chrono::system_clock::now();
size_t niter = 100;
for(size_t i=0; i<niter; i++) {
output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
}
auto endTime = std::chrono::system_clock::now();
auto execTime = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
std::cout<<"Finished inferencing"<<std::endl;
std::cout<<inference_device<<std::endl;
std::cout<<execTime/niter<<"ms/inference"<<std::endl;
// Get pointer to output tensor float values
float* output = output_tensors.front().GetTensorMutableData<float>();
std::cout << "\nresults\n------------------" << std::endl;
std::vector<int> idx;
for(int i=0; i<1000; i++) idx.push_back(i);
std::sort(idx.begin(), idx.end(), [output](const int& left, const int& right) { return output[left]>output[right]; } );
for (size_t id = 0; id < 5; ++id) {
std::cout << id << " : " << idx[id] << " : " <<
std::fixed<< std::setprecision(2) << output[idx[id]] << " " << labels[idx[id]] << std::endl;
}
printf("Done!\n");
return 0;
}