Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ats m ivi #25

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
326 changes: 173 additions & 153 deletions BasePreparedModel.cpp

Large diffs are not rendered by default.

11 changes: 6 additions & 5 deletions BasePreparedModel.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,13 @@ namespace android::hardware::neuralnetworks::nnhal {
template <class T>
using vec = std::vector<T>;
typedef uint8_t* memory;
extern bool mRemoteCheck;
extern std::shared_ptr<DetectionClient> mDetectionClient;
class BasePreparedModel : public V1_3::IPreparedModel {
public:
bool mRemoteCheck = false;
BasePreparedModel(const IntelDeviceType device, const Model& model) : mTargetDevice(device) {
mModelInfo = std::make_shared<NnapiModelInfo>(model);
mXmlFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".xml");
mBinFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".bin");
mXmlFile = MODEL_DIR + std::to_string(mFileId) + std::string(".xml");
mBinFile = MODEL_DIR + std::to_string(mFileId) + std::string(".bin");
mFileId++;
}

Expand Down Expand Up @@ -89,7 +88,8 @@ class BasePreparedModel : public V1_3::IPreparedModel {

virtual bool initialize();
virtual bool checkRemoteConnection();
virtual bool loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin);
virtual void loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin);
virtual void setRemoteEnabled(bool flag);

std::shared_ptr<NnapiModelInfo> getModelInfo() { return mModelInfo; }

Expand All @@ -98,6 +98,7 @@ class BasePreparedModel : public V1_3::IPreparedModel {
std::shared_ptr<IIENetwork> getPlugin() { return mPlugin; }

std::shared_ptr<ov::Model> modelPtr;
std::shared_ptr<DetectionClient> mDetectionClient;

protected:
virtual void deinitialize();
Expand Down
99 changes: 93 additions & 6 deletions DetectionClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

std::string DetectionClient::prepare(bool& flag) {
RequestString request;
request.set_value("");
request.mutable_token()->set_data(mToken);
ReplyStatus reply;
ClientContext context;
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(10000);
context.set_deadline(deadline);

Status status = stub_->prepare(&context, request, &reply);
Expand All @@ -21,9 +21,26 @@ std::string DetectionClient::prepare(bool& flag) {
}
}

std::string DetectionClient::release(bool& flag) {
RequestString request;
request.mutable_token()->set_data(mToken);
ReplyStatus reply;
ClientContext context;

Status status = stub_->release(&context, request, &reply);

if (status.ok()) {
flag = reply.status();
return (flag ? "status True" : "status False");
} else {
return std::string(status.error_message());
}
}

Status DetectionClient::sendFile(std::string fileName,
std::unique_ptr<ClientWriter<RequestDataChunks> >& writer) {
RequestDataChunks request;
request.mutable_token()->set_data(mToken);
uint32_t CHUNK_SIZE = 1024 * 1024;
std::ifstream fin(fileName, std::ifstream::binary);
std::vector<char> buffer(CHUNK_SIZE, 0);
Expand All @@ -47,6 +64,22 @@ Status DetectionClient::sendFile(std::string fileName,
return writer->Finish();
}

bool DetectionClient::isModelLoaded(std::string fileName) {
ReplyStatus reply;
ClientContext context;
RequestString request;
request.mutable_token()->set_data(mToken);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
context.set_deadline(deadline);
status = stub_->loadModel(&context, request, &reply);
if(status.ok()) {
return reply.status();
} else {
ALOGE("Model Load failure: %s", status.error_message().c_str());
}
return false;
}

std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin) {
ReplyStatus reply;
ClientContext context;
Expand All @@ -62,25 +95,75 @@ std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, cons
status = sendFile(ir_bin, writerBin);
if (status.ok()) {
flag = reply.status();
return (flag ? "status True" : "status False");
//if model is sent succesfully trigger model loading
if (flag && isModelLoaded(ir_xml) ) {
flag = true;
return ("status True");
} else {
flag = false;
ALOGE("Model Loading Failed!!!");
return ("status False");
}
} else {
return ("status False");
}
}
return std::string(status.error_message());
}

void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size) {
void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType) {
const float* src;
size_t index;

DataTensor* input = request.add_data_tensors();
input->set_node_name(label);
switch(operandType) {
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_INT32: {
input->set_data_type(DataTensor::i32);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT16: {
input->set_data_type(DataTensor::f16);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT32: {
input->set_data_type(DataTensor::f32);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_BOOL8: {
input->set_data_type(DataTensor::boolean);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM: {
input->set_data_type(DataTensor::u8);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM:
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
input->set_data_type(DataTensor::i8);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_SYMM: {
input->set_data_type(DataTensor::i16);
break;
}
case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_ASYMM: {
input->set_data_type(DataTensor::u16);
break;
}
default: {
input->set_data_type(DataTensor::u8);
break;
}
}
for (index = 0; index < shape.size(); index++) {
input->add_tensor_shape(shape[index]);
}
input->set_data(buffer, size);
}

void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape) {
void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength) {
std::string src;
size_t index;
size_t size = 1;
Expand All @@ -91,6 +174,9 @@ void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::v
for (index = 0; index < reply.data_tensors_size(); index++) {
if (label.compare(reply.data_tensors(index).node_name()) == 0) {
src = reply.data_tensors(index).data();
if(expectedLength != src.length()) {
ALOGE("Length Mismatch error: expected length %d , actual length %d", expectedLength, src.length());
}
memcpy(buffer, src.data(), src.length());
break;
}
Expand All @@ -104,9 +190,10 @@ void DetectionClient::clear_data() {

std::string DetectionClient::remote_infer() {
ClientContext context;
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(10000);
context.set_deadline(deadline);

request.mutable_token()->set_data(mToken);
status = stub_->getInferResult(&context, request, &reply);
if (status.ok()) {
if (reply.data_tensors_size() == 0) ALOGE("GRPC reply empty, ovms failure ?");
Expand Down
12 changes: 9 additions & 3 deletions DetectionClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <log/log.h>
#include <android-base/logging.h>
#include "nnhal_object_detection.grpc.pb.h"
#include "Driver.h"

using grpc::Channel;
using grpc::ClientContext;
Expand All @@ -22,19 +23,23 @@ using objectDetection::RequestDataTensors;
using objectDetection::RequestString;
using time_point = std::chrono::system_clock::time_point;

#define MODEL_DIR std::string("/data/vendor/neuralnetworks/")

class DetectionClient {
public:
DetectionClient(std::shared_ptr<Channel> channel) : stub_(Detection::NewStub(channel)){}
DetectionClient(std::shared_ptr<Channel> channel, uint32_t token) : stub_(Detection::NewStub(channel)), mToken(token) {}

std::string prepare(bool& flag);
std::string release(bool& flag);

Status sendFile(std::string fileName,
std::unique_ptr<ClientWriter<RequestDataChunks> >& writer);

std::string sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin);
bool isModelLoaded(std::string fileName);

void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size);
void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape);
void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType);
void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength);
void clear_data();
std::string remote_infer();
bool get_status();
Expand All @@ -44,6 +49,7 @@ class DetectionClient {
RequestDataTensors request;
ReplyDataTensors reply;
Status status;
uint32_t mToken;
};

#endif
1 change: 1 addition & 0 deletions Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ Return<V1_3::ErrorStatus> Driver::prepareModel_1_3(

// TODO: make asynchronous later
sp<BasePreparedModel> driverPreparedModel = ModelFactory(mDeviceType, model);
for (auto& opn : model.main.operations) dumpOperation(opn);
if (!driverPreparedModel->initialize()) {
ALOGI("Failed to initialize prepared model");
cb->notify_1_3(convertToV1_3(ErrorStatus::INVALID_ARGUMENT), nullptr);
Expand Down
52 changes: 52 additions & 0 deletions ModelManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,31 @@ void* NnapiModelInfo::getBlobFromMemoryPoolIn(const Request& request, uint32_t i
return (r.buffer + arg.location.offset);
}

void* NnapiModelInfo::getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index,
uint32_t& rBufferLength) {
RunTimeOperandInfo& operand = mOperands[mModel.main.inputIndexes[index]];
const V1_0::RequestArgument& arg = request.inputs[index];
auto poolIndex = arg.location.poolIndex;
nnAssert(poolIndex < mRequestPoolInfos.size());
auto& r = mRequestPoolInfos[poolIndex];

if (arg.dimensions.size() > 0) {
// It's the responsibility of the caller to validate that
// from.dimensions only modifies the dimensions that were
// unspecified in the model. That's the case in SampleDriver.cpp
// with the call to validateRequest().
operand.dimensions = arg.dimensions;
}

operand.buffer = r.buffer + arg.location.offset;
operand.length = arg.location.length;
ALOGV("%s Operand length:%d pointer:%p offset:%d pool index: %d", __func__, operand.length,
(r.buffer + arg.location.offset), arg.location.offset, poolIndex);
rBufferLength = operand.length;

return (r.buffer + arg.location.offset);
}

void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t index,
uint32_t& rBufferLength) {
RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]];
Expand Down Expand Up @@ -260,6 +285,33 @@ void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t
return (r.buffer + arg.location.offset);
}

void* NnapiModelInfo::getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index,
uint32_t& rBufferLength) {
RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]];
const V1_0::RequestArgument& arg = request.outputs[index];
auto poolIndex = arg.location.poolIndex;
nnAssert(poolIndex < mRequestPoolInfos.size());
auto& r = mRequestPoolInfos[poolIndex];

ALOGV("%s lifetime:%d location offset:%d length:%d pool index:%d", __func__, operand.lifetime,
arg.location.offset, arg.location.length, poolIndex);

if (arg.dimensions.size() > 0) {
// It's the responsibility of the caller to validate that
// from.dimensions only modifies the dimensions that were
// unspecified in the model. That's the case in SampleDriver.cpp
// with the call to validateRequest().
operand.dimensions = arg.dimensions;
}

operand.buffer = r.buffer + arg.location.offset;
operand.length = arg.location.length;
rBufferLength = operand.length;
ALOGV("%s Operand length:%d pointer:%p", __func__, operand.length,
(r.buffer + arg.location.offset));
return (r.buffer + arg.location.offset);
}

bool NnapiModelInfo::isOmittedInput(int operationIndex, uint32_t index) {
uint32_t inputIndex = mModel.main.operations[operationIndex].inputs[index];
const auto op = mModel.main.operands[inputIndex];
Expand Down
6 changes: 4 additions & 2 deletions ModelManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ class NnapiModelInfo {
const auto value = GetConstOperand<T>(inputIndex);
ALOGV("Operation input index: %d, operand index: %d", index, inputIndex);
ALOGV("Operation: %s", toString(mModel.main.operations[operationIndex]).c_str());
printHelper<T>::print(value, toString(operand).c_str());

return value;
}
Expand Down Expand Up @@ -167,7 +166,9 @@ class NnapiModelInfo {
T GetConstFromBuffer(const uint8_t* buf, uint32_t len);

void* getBlobFromMemoryPoolIn(const Request& request, uint32_t index, uint32_t& rBufferLength);
void* getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength);
void* getBlobFromMemoryPoolOut(const Request& request, uint32_t index, uint32_t& rBufferLength);
void* getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength);

Model getModel() { return mModel; }

Expand All @@ -185,10 +186,11 @@ class NnapiModelInfo {

std::vector<V1_2::OutputShape> getOutputShapes() { return mOutputShapes; }

void unmapRuntimeMemPools() {
bool unmapRuntimeMemPools() {
for (auto& runtimeInfo : mRequestPoolInfos) {
runtimeInfo.unmap_mem();
}
return true;
}

bool isOmittedInput(int operationIndex, uint32_t index);
Expand Down
1 change: 1 addition & 0 deletions ngraph_creator/Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ cc_library_static {
"operations/src/Greater.cpp",
"operations/src/GreaterEqual.cpp",
"operations/src/GroupedConv2d.cpp",
"operations/src/HardSwish.cpp",
"operations/src/InstanceNormalization.cpp",
"operations/src/L2Normalization.cpp",
"operations/src/L2Pooling2D.cpp",
Expand Down
2 changes: 2 additions & 0 deletions ngraph_creator/include/OperationsFactory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <Greater.hpp>
#include <GreaterEqual.hpp>
#include <GroupedConv2d.hpp>
#include <HardSwish.hpp>
#include <InstanceNormalization.hpp>
#include <L2Normalization.hpp>
#include <L2Pooling2D.hpp>
Expand Down Expand Up @@ -89,6 +90,7 @@ namespace nnhal {
class OperationsFactory {
private:
std::shared_ptr<NgraphNodes> mNgraphNodes;
GraphMetadata mGraphMetadata;

public:
OperationsFactory(IntelDeviceType deviceType, std::shared_ptr<NnapiModelInfo> modelInfo,
Expand Down
2 changes: 1 addition & 1 deletion ngraph_creator/operations/include/Abs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace nnhal {

class Abs : public OperationsBase {
public:
Abs(int operationIndex);
Abs(int operationIndex, GraphMetadata graphMetadata);
std::shared_ptr<ov::Node> createNode() override;
};

Expand Down
2 changes: 1 addition & 1 deletion ngraph_creator/operations/include/Add.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace nnhal {

class Add : public OperationsBase {
public:
Add(int operationIndex);
Add(int operationIndex, GraphMetadata graphMetadata);
bool validate() override;
std::shared_ptr<ov::Node> createNode() override;
std::shared_ptr<ov::Node> createNodeForPlugin() override;
Expand Down
2 changes: 1 addition & 1 deletion ngraph_creator/operations/include/Argmax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace nnhal {

class Argmax : public OperationsBase {
public:
Argmax(int operationIndex);
Argmax(int operationIndex, GraphMetadata graphMetadata);
std::shared_ptr<ov::Node> createNode() override;
};

Expand Down
Loading