projectceladon · rairatne · May 12, 2023 · May 12, 2023 · May 15, 2023 · May 16, 2023
diff --git a/BasePreparedModel.cpp b/BasePreparedModel.cpp
diff --git a/BasePreparedModel.h b/BasePreparedModel.h
@@ -49,14 +49,13 @@ namespace android::hardware::neuralnetworks::nnhal {
 template <class T>
 using vec = std::vector<T>;
 typedef uint8_t* memory;
-extern bool mRemoteCheck;
-extern std::shared_ptr<DetectionClient> mDetectionClient;
 class BasePreparedModel : public V1_3::IPreparedModel {
 public:
+    bool mRemoteCheck = false;
     BasePreparedModel(const IntelDeviceType device, const Model& model) : mTargetDevice(device) {
         mModelInfo = std::make_shared<NnapiModelInfo>(model);
-        mXmlFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".xml");
-        mBinFile = std::string("/data/vendor/neuralnetworks/") + std::to_string(mFileId) + std::string(".bin");
+        mXmlFile = MODEL_DIR + std::to_string(mFileId) + std::string(".xml");
+        mBinFile = MODEL_DIR + std::to_string(mFileId) + std::string(".bin");
         mFileId++;
     }
 
@@ -89,7 +88,8 @@ class BasePreparedModel : public V1_3::IPreparedModel {
 
     virtual bool initialize();
     virtual bool checkRemoteConnection();
-    virtual bool loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin);
+    virtual void loadRemoteModel(const std::string& ir_xml, const std::string& ir_bin);
+    virtual void setRemoteEnabled(bool flag);
 
     std::shared_ptr<NnapiModelInfo> getModelInfo() { return mModelInfo; }
 
@@ -98,6 +98,7 @@ class BasePreparedModel : public V1_3::IPreparedModel {
     std::shared_ptr<IIENetwork> getPlugin() { return mPlugin; }
 
     std::shared_ptr<ov::Model> modelPtr;
+    std::shared_ptr<DetectionClient> mDetectionClient;
 
 protected:
     virtual void deinitialize();

diff --git a/DetectionClient.cpp b/DetectionClient.cpp
@@ -5,10 +5,10 @@
 
 std::string DetectionClient::prepare(bool& flag) {
     RequestString request;
-    request.set_value("");
+    request.mutable_token()->set_data(mToken);
     ReplyStatus reply;
     ClientContext context;
-    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100);
+    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(10000);
     context.set_deadline(deadline);
 
     Status status = stub_->prepare(&context, request, &reply);
@@ -21,9 +21,26 @@ std::string DetectionClient::prepare(bool& flag) {
     }
 }
 
+std::string DetectionClient::release(bool& flag) {
+    RequestString request;
+    request.mutable_token()->set_data(mToken);
+    ReplyStatus reply;
+    ClientContext context;
+
+    Status status = stub_->release(&context, request, &reply);
+
+    if (status.ok()) {
+        flag = reply.status();
+        return (flag ? "status True" : "status False");
+    } else {
+        return std::string(status.error_message());
+    }
+}
+
 Status DetectionClient::sendFile(std::string fileName,
                 std::unique_ptr<ClientWriter<RequestDataChunks> >& writer) {
     RequestDataChunks request;
+    request.mutable_token()->set_data(mToken);
     uint32_t CHUNK_SIZE = 1024 * 1024;
     std::ifstream fin(fileName, std::ifstream::binary);
     std::vector<char> buffer(CHUNK_SIZE, 0);
@@ -47,6 +64,22 @@ Status DetectionClient::sendFile(std::string fileName,
     return writer->Finish();
 }
 
+bool DetectionClient::isModelLoaded(std::string fileName) {
+    ReplyStatus reply;
+    ClientContext context;
+    RequestString request;
+    request.mutable_token()->set_data(mToken);
+    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
+    context.set_deadline(deadline);
+    status = stub_->loadModel(&context, request, &reply);
+    if(status.ok()) {
+        return reply.status();
+    } else {
+        ALOGE("Model Load failure: %s", status.error_message().c_str());
+    }
+    return false;
+}
+
 std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin) {
     ReplyStatus reply;
     ClientContext context;
@@ -62,25 +95,75 @@ std::string DetectionClient::sendIRs(bool& flag, const std::string& ir_xml, cons
         status = sendFile(ir_bin, writerBin);
         if (status.ok()) {
             flag = reply.status();
-            return (flag ? "status True" : "status False");
+            //if model is sent succesfully trigger model loading
+            if (flag && isModelLoaded(ir_xml) ) {
+                flag = true;
+                return ("status True");
+            } else {
+                flag = false;
+                ALOGE("Model Loading Failed!!!");
+                return ("status False");
+            }
+        } else {
+            return ("status False");
         }
     }
     return std::string(status.error_message());
 }
 
-void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size) {
+void DetectionClient::add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType) {
     const float* src;
     size_t index;
 
     DataTensor* input = request.add_data_tensors();
     input->set_node_name(label);
+    switch(operandType) {
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_INT32: {
+            input->set_data_type(DataTensor::i32);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT16: {
+            input->set_data_type(DataTensor::f16);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_FLOAT32: {
+            input->set_data_type(DataTensor::f32);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_BOOL8: {
+            input->set_data_type(DataTensor::boolean);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM: {
+            input->set_data_type(DataTensor::u8);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM:
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT8_ASYMM_SIGNED: {
+            input->set_data_type(DataTensor::i8);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_SYMM: {
+            input->set_data_type(DataTensor::i16);
+            break;
+        }
+        case android::hardware::neuralnetworks::nnhal::OperandType::TENSOR_QUANT16_ASYMM: {
+            input->set_data_type(DataTensor::u16);
+            break;
+        }
+        default: {
+            input->set_data_type(DataTensor::u8);
+            break;
+        }
+    }
     for (index = 0; index < shape.size(); index++) {
         input->add_tensor_shape(shape[index]);
     }
     input->set_data(buffer, size);
 }
 
-void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape) {
+void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength) {
     std::string src;
     size_t index;
     size_t size = 1;
@@ -91,6 +174,9 @@ void DetectionClient::get_output_data(std::string label, uint8_t* buffer, std::v
     for (index = 0; index < reply.data_tensors_size(); index++) {
         if (label.compare(reply.data_tensors(index).node_name()) == 0) {
             src = reply.data_tensors(index).data();
+            if(expectedLength != src.length()) {
+                ALOGE("Length Mismatch error: expected length %d , actual length %d", expectedLength, src.length());
+            }
             memcpy(buffer, src.data(), src.length());
             break;
         }
@@ -104,9 +190,10 @@ void DetectionClient::clear_data() {
 
 std::string DetectionClient::remote_infer() {
     ClientContext context;
-    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(20000);
+    time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(10000);
     context.set_deadline(deadline);
 
+    request.mutable_token()->set_data(mToken);
     status = stub_->getInferResult(&context, request, &reply);
     if (status.ok()) {
         if (reply.data_tensors_size() == 0) ALOGE("GRPC reply empty, ovms failure ?");

diff --git a/DetectionClient.h b/DetectionClient.h
@@ -8,6 +8,7 @@
 #include <log/log.h>
 #include <android-base/logging.h>
 #include "nnhal_object_detection.grpc.pb.h"
+#include "Driver.h"
 
 using grpc::Channel;
 using grpc::ClientContext;
@@ -22,19 +23,23 @@ using objectDetection::RequestDataTensors;
 using objectDetection::RequestString;
 using time_point = std::chrono::system_clock::time_point;
 
+#define MODEL_DIR std::string("/data/vendor/neuralnetworks/")
+
 class DetectionClient {
 public:
-    DetectionClient(std::shared_ptr<Channel> channel) : stub_(Detection::NewStub(channel)){}
+    DetectionClient(std::shared_ptr<Channel> channel, uint32_t token) : stub_(Detection::NewStub(channel)), mToken(token) {}
 
     std::string prepare(bool& flag);
+    std::string release(bool& flag);
 
     Status sendFile(std::string fileName,
                     std::unique_ptr<ClientWriter<RequestDataChunks> >& writer);
 
     std::string sendIRs(bool& flag, const std::string& ir_xml, const std::string& ir_bin);
+    bool isModelLoaded(std::string fileName);
 
-    void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size);
-    void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape);
+    void add_input_data(std::string label, const uint8_t* buffer, std::vector<size_t> shape, uint32_t size, android::hardware::neuralnetworks::nnhal::OperandType operandType);
+    void get_output_data(std::string label, uint8_t* buffer, std::vector<size_t> shape, uint32_t expectedLength);
     void clear_data();
     std::string remote_infer();
     bool get_status();
@@ -44,6 +49,7 @@ class DetectionClient {
     RequestDataTensors request;
     ReplyDataTensors reply;
     Status status;
+    uint32_t mToken;
 };
 
 #endif
diff --git a/Driver.cpp b/Driver.cpp
@@ -403,6 +403,7 @@ Return<V1_3::ErrorStatus> Driver::prepareModel_1_3(
 
     // TODO: make asynchronous later
     sp<BasePreparedModel> driverPreparedModel = ModelFactory(mDeviceType, model);
+    for (auto& opn : model.main.operations) dumpOperation(opn);
     if (!driverPreparedModel->initialize()) {
         ALOGI("Failed to initialize prepared model");
         cb->notify_1_3(convertToV1_3(ErrorStatus::INVALID_ARGUMENT), nullptr);

diff --git a/ModelManager.cpp b/ModelManager.cpp
@@ -233,6 +233,31 @@ void* NnapiModelInfo::getBlobFromMemoryPoolIn(const Request& request, uint32_t i
     return (r.buffer + arg.location.offset);
 }
 
+void* NnapiModelInfo::getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index,
+                                              uint32_t& rBufferLength) {
+    RunTimeOperandInfo& operand = mOperands[mModel.main.inputIndexes[index]];
+    const V1_0::RequestArgument& arg = request.inputs[index];
+    auto poolIndex = arg.location.poolIndex;
+    nnAssert(poolIndex < mRequestPoolInfos.size());
+    auto& r = mRequestPoolInfos[poolIndex];
+
+    if (arg.dimensions.size() > 0) {
+        // It's the responsibility of the caller to validate that
+        // from.dimensions only modifies the dimensions that were
+        // unspecified in the model.  That's the case in SampleDriver.cpp
+        // with the call to validateRequest().
+        operand.dimensions = arg.dimensions;
+    }
+
+    operand.buffer = r.buffer + arg.location.offset;
+    operand.length = arg.location.length;
+    ALOGV("%s Operand length:%d pointer:%p offset:%d pool index: %d", __func__, operand.length,
+          (r.buffer + arg.location.offset), arg.location.offset, poolIndex);
+    rBufferLength = operand.length;
+
+    return (r.buffer + arg.location.offset);
+}
+
 void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t index,
                                                uint32_t& rBufferLength) {
     RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]];
@@ -260,6 +285,33 @@ void* NnapiModelInfo::getBlobFromMemoryPoolOut(const Request& request, uint32_t
     return (r.buffer + arg.location.offset);
 }
 
+void* NnapiModelInfo::getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index,
+                                               uint32_t& rBufferLength) {
+    RunTimeOperandInfo& operand = mOperands[mModel.main.outputIndexes[index]];
+    const V1_0::RequestArgument& arg = request.outputs[index];
+    auto poolIndex = arg.location.poolIndex;
+    nnAssert(poolIndex < mRequestPoolInfos.size());
+    auto& r = mRequestPoolInfos[poolIndex];
+
+    ALOGV("%s lifetime:%d location offset:%d length:%d pool index:%d", __func__, operand.lifetime,
+          arg.location.offset, arg.location.length, poolIndex);
+
+    if (arg.dimensions.size() > 0) {
+        // It's the responsibility of the caller to validate that
+        // from.dimensions only modifies the dimensions that were
+        // unspecified in the model.  That's the case in SampleDriver.cpp
+        // with the call to validateRequest().
+        operand.dimensions = arg.dimensions;
+    }
+
+    operand.buffer = r.buffer + arg.location.offset;
+    operand.length = arg.location.length;
+    rBufferLength = operand.length;
+    ALOGV("%s Operand length:%d pointer:%p", __func__, operand.length,
+          (r.buffer + arg.location.offset));
+    return (r.buffer + arg.location.offset);
+}
+
 bool NnapiModelInfo::isOmittedInput(int operationIndex, uint32_t index) {
     uint32_t inputIndex = mModel.main.operations[operationIndex].inputs[index];
     const auto op = mModel.main.operands[inputIndex];

diff --git a/ModelManager.h b/ModelManager.h
@@ -133,7 +133,6 @@ class NnapiModelInfo {
         const auto value = GetConstOperand<T>(inputIndex);
         ALOGV("Operation input index: %d, operand index: %d", index, inputIndex);
         ALOGV("Operation: %s", toString(mModel.main.operations[operationIndex]).c_str());
-        printHelper<T>::print(value, toString(operand).c_str());
 
         return value;
     }
@@ -167,7 +166,9 @@ class NnapiModelInfo {
     T GetConstFromBuffer(const uint8_t* buf, uint32_t len);
 
     void* getBlobFromMemoryPoolIn(const Request& request, uint32_t index, uint32_t& rBufferLength);
+    void* getBlobFromMemoryPoolIn(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength);
     void* getBlobFromMemoryPoolOut(const Request& request, uint32_t index, uint32_t& rBufferLength);
+    void* getBlobFromMemoryPoolOut(const V1_3::Request& request, uint32_t index, uint32_t& rBufferLength);
 
     Model getModel() { return mModel; }
 
@@ -185,10 +186,11 @@ class NnapiModelInfo {
 
     std::vector<V1_2::OutputShape> getOutputShapes() { return mOutputShapes; }
 
-    void unmapRuntimeMemPools() {
+    bool unmapRuntimeMemPools() {
         for (auto& runtimeInfo : mRequestPoolInfos) {
             runtimeInfo.unmap_mem();
         }
+        return true;
     }
 
     bool isOmittedInput(int operationIndex, uint32_t index);

diff --git a/ngraph_creator/Android.bp b/ngraph_creator/Android.bp
@@ -35,6 +35,7 @@ cc_library_static {
         "operations/src/Greater.cpp",
         "operations/src/GreaterEqual.cpp",
         "operations/src/GroupedConv2d.cpp",
+        "operations/src/HardSwish.cpp",
         "operations/src/InstanceNormalization.cpp",
         "operations/src/L2Normalization.cpp",
         "operations/src/L2Pooling2D.cpp",

diff --git a/ngraph_creator/include/OperationsFactory.hpp b/ngraph_creator/include/OperationsFactory.hpp
@@ -25,6 +25,7 @@
 #include <Greater.hpp>
 #include <GreaterEqual.hpp>
 #include <GroupedConv2d.hpp>
+#include <HardSwish.hpp>
 #include <InstanceNormalization.hpp>
 #include <L2Normalization.hpp>
 #include <L2Pooling2D.hpp>
@@ -89,6 +90,7 @@ namespace nnhal {
 class OperationsFactory {
 private:
     std::shared_ptr<NgraphNodes> mNgraphNodes;
+    GraphMetadata mGraphMetadata;
 
 public:
     OperationsFactory(IntelDeviceType deviceType, std::shared_ptr<NnapiModelInfo> modelInfo,

diff --git a/ngraph_creator/operations/include/Abs.hpp b/ngraph_creator/operations/include/Abs.hpp
@@ -9,7 +9,7 @@ namespace nnhal {
 
 class Abs : public OperationsBase {
 public:
-    Abs(int operationIndex);
+    Abs(int operationIndex, GraphMetadata graphMetadata);
     std::shared_ptr<ov::Node> createNode() override;
 };
 

diff --git a/ngraph_creator/operations/include/Add.hpp b/ngraph_creator/operations/include/Add.hpp
@@ -9,7 +9,7 @@ namespace nnhal {
 
 class Add : public OperationsBase {
 public:
-    Add(int operationIndex);
+    Add(int operationIndex, GraphMetadata graphMetadata);
     bool validate() override;
     std::shared_ptr<ov::Node> createNode() override;
     std::shared_ptr<ov::Node> createNodeForPlugin() override;

diff --git a/ngraph_creator/operations/include/Argmax.hpp b/ngraph_creator/operations/include/Argmax.hpp
@@ -9,7 +9,7 @@ namespace nnhal {
 
 class Argmax : public OperationsBase {
 public:
-    Argmax(int operationIndex);
+    Argmax(int operationIndex, GraphMetadata graphMetadata);
     std::shared_ptr<ov::Node> createNode() override;
 };