123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733 |
- #ifndef INFER_SERVER_API_H_
- #define INFER_SERVER_API_H_
- #include <functional>
- #include <limits>
- #include <map>
- #include <memory>
- #include <mutex>
- #include <string>
- #include <utility>
- #include <vector>
- #include "buffer.h"
- #include "shape.h"
- #include "util/any.h"
- #include "util/base_object.h"
- #include "config.h"
- #define CNIS_GET_VERSION(major, minor, patch) (((major) << 20) | ((minor) << 10) | (patch))
- #define CNIS_VERSION CNIS_GET_VERSION(CNIS_VERSION_MAJOR, CNIS_VERSION_MINOR, CNIS_VERSION_PATCH)
- namespace infer_server {
- enum class DataType { UINT8, FLOAT32, FLOAT16, INT16, INT32, INVALID };
- enum class DimOrder { NCHW, NHWC, HWCN, TNC, NTC };
- struct DataLayout {
- DataType dtype;
- DimOrder order;
- };
- size_t GetTypeSize(DataType type) noexcept;
- enum class Status {
- SUCCESS = 0,
- ERROR_READWRITE = 1,
- ERROR_MEMORY = 2,
- INVALID_PARAM = 3,
- WRONG_TYPE = 4,
- ERROR_BACKEND = 5,
- NOT_IMPLEMENTED = 6,
- TIMEOUT = 7,
- STATUS_COUNT = 8,
- };
- enum class BatchStrategy {
- DYNAMIC = 0,
- STATIC = 1,
- SEQUENCE = 2,
- STRATEGY_COUNT = 3,
- };
- std::string ToString(BatchStrategy strategy) noexcept;
- inline std::ostream& operator<<(std::ostream& os, BatchStrategy s) { return os << ToString(s); }
- inline std::string Version() {
-
- return std::to_string(CNIS_VERSION_MAJOR) + "." +
- std::to_string(CNIS_VERSION_MINOR) + "." +
- std::to_string(CNIS_VERSION_PATCH);
-
- }
- bool SetCurrentDevice(int device_id) noexcept;
- bool CheckDevice(int device_id) noexcept;
- uint32_t TotalDeviceCount() noexcept;
- class ModelInfo {
- public:
- virtual ~ModelInfo() = default;
-
-
- virtual const Shape& InputShape(int index) const noexcept = 0;
-
- virtual const Shape& OutputShape(int index) const noexcept = 0;
-
- virtual const DataLayout& InputLayout(int index) const noexcept = 0;
-
- virtual const DataLayout& OutputLayout(int index) const noexcept = 0;
-
- virtual uint32_t InputNum() const noexcept = 0;
-
- virtual uint32_t OutputNum() const noexcept = 0;
-
- virtual uint32_t BatchSize() const noexcept = 0;
-
- virtual std::string GetKey() const noexcept = 0;
-
- };
- using ModelPtr = std::shared_ptr<ModelInfo>;
- class RequestControl;
- struct InferData {
-
- template <typename T>
- void Set(T&& v) {
- data = std::forward<T>(v);
- }
-
- template <typename T>
- typename std::remove_reference<T>::type Get() const {
- return any_cast<typename std::remove_reference<T>::type>(data);
- }
-
- template <typename T>
- typename std::add_lvalue_reference<T>::type GetLref() & {
- return any_cast<typename std::add_lvalue_reference<T>::type>(data);
- }
-
- template <typename T>
- typename std::add_lvalue_reference<typename std::add_const<T>::type>::type GetLref() const& {
- return any_cast<typename std::add_lvalue_reference<typename std::add_const<T>::type>::type>(data);
- }
-
- bool HasValue() noexcept {
- return data.has_value();
- }
-
- template <typename T>
- void SetUserData(T&& v) {
- user_data = std::forward<T>(v);
- }
-
- template <typename T>
- T GetUserData() const {
- return any_cast<T>(user_data);
- }
-
- any data;
-
- any user_data;
-
- RequestControl* ctrl{nullptr};
-
- uint32_t index{0};
- };
- using InferDataPtr = std::shared_ptr<InferData>;
- using BatchData = std::vector<InferDataPtr>;
- struct Package {
-
- BatchData data;
-
- InferDataPtr predict_io{nullptr};
-
- std::string tag;
-
- std::map<std::string, float> perf;
-
- int64_t priority;
- static std::shared_ptr<Package> Create(uint32_t data_num, const std::string& tag = "") noexcept {
- auto ret = std::make_shared<Package>();
- ret->data.reserve(data_num);
- for (uint32_t idx = 0; idx < data_num; ++idx) {
- ret->data.emplace_back(new InferData);
- }
- ret->tag = tag;
- return ret;
- }
- };
- using PackagePtr = std::shared_ptr<Package>;
- class Processor : public BaseObject {
- public:
-
- explicit Processor(const std::string& type_name) noexcept : type_name_(type_name) {}
-
- const std::string& TypeName() const noexcept { return type_name_; }
-
- virtual ~Processor() = default;
-
- virtual Status Init() noexcept = 0;
-
- virtual Status Process(PackagePtr data) noexcept = 0;
-
- virtual std::shared_ptr<Processor> Fork() = 0;
- private:
- Processor() = delete;
- friend class TaskNode;
- std::unique_lock<std::mutex> Lock() noexcept { return std::unique_lock<std::mutex>(process_lock_); }
- std::string type_name_;
- std::mutex process_lock_;
- };
- template <typename T>
- class ProcessorForkable : public Processor {
- public:
-
- explicit ProcessorForkable(const std::string& type_name) noexcept : Processor(type_name) {}
-
- virtual ~ProcessorForkable() = default;
-
- std::shared_ptr<Processor> Fork() noexcept(std::is_nothrow_default_constructible<T>::value) final {
- auto p = std::make_shared<T>();
- p->CopyParamsFrom(*this);
- if (p->Init() != Status::SUCCESS) return nullptr;
- return p;
- }
-
- static std::shared_ptr<T> Create() noexcept(std::is_nothrow_default_constructible<T>::value) {
- return std::make_shared<T>();
- }
- };
- class Observer {
- public:
-
- virtual void Response(Status status, PackagePtr data, any user_data) noexcept = 0;
-
- virtual ~Observer() = default;
- };
- struct SessionDesc {
-
- std::string name{};
-
- ModelPtr model{nullptr};
-
- BatchStrategy strategy{BatchStrategy::DYNAMIC};
-
- DataLayout host_input_layout{DataType::UINT8, DimOrder::NHWC};
-
- DataLayout host_output_layout{DataType::FLOAT32, DimOrder::NHWC};
-
- std::shared_ptr<Processor> preproc{nullptr};
-
- std::shared_ptr<Processor> postproc{nullptr};
-
- uint32_t batch_timeout{100};
-
- int priority{0};
-
- uint32_t engine_num{1};
-
- bool show_perf{true};
- };
- struct LatencyStatistic {
-
- uint32_t unit_cnt{0};
-
- double total{0};
-
- float max{0};
-
- float min{std::numeric_limits<float>::max()};
- };
- struct ThroughoutStatistic {
-
- uint32_t request_cnt{0};
-
- uint32_t unit_cnt{0};
-
- float rps{0};
-
- float ups{0};
-
- float rps_rt{0};
-
- float ups_rt{0};
- };
- class Session;
- using Session_t = Session*;
- class InferServerPrivate;
- class InferServer {
- public:
-
- explicit InferServer(int device_id) noexcept;
-
-
- Session_t CreateSession(SessionDesc desc, std::shared_ptr<Observer> observer) noexcept;
-
- Session_t CreateSyncSession(SessionDesc desc) noexcept { return CreateSession(desc, nullptr); }
-
- bool DestroySession(Session_t session) noexcept;
-
- bool Request(Session_t session, PackagePtr input, any user_data, int timeout = -1) noexcept;
-
- bool RequestSync(Session_t session, PackagePtr input, Status* status, PackagePtr response, int timeout = -1) noexcept;
-
- void WaitTaskDone(Session_t session, const std::string& tag) noexcept;
-
- void DiscardTask(Session_t session, const std::string& tag) noexcept;
-
- ModelPtr GetModel(Session_t session) noexcept;
-
-
- static bool SetModelDir(const std::string& model_dir) noexcept;
-
- static ModelPtr LoadModel(const std::string& pattern1, const std::string& pattern2 = "subnet0") noexcept;
- #ifdef CNIS_USE_MAGICMIND
-
- static ModelPtr LoadModel(void* ptr, size_t size) noexcept;
- #else
-
- static ModelPtr LoadModel(void* ptr, const std::string& func_name = "subnet0") noexcept;
- #endif
-
- static bool UnloadModel(ModelPtr model) noexcept;
-
- static void ClearModelCache() noexcept;
-
-
- std::map<std::string, LatencyStatistic> GetLatency(Session_t session) const noexcept;
-
- ThroughoutStatistic GetThroughout(Session_t session) const noexcept;
-
- ThroughoutStatistic GetThroughout(Session_t session, const std::string& tag) const noexcept;
- private:
- InferServer() = delete;
- InferServerPrivate* priv_;
- };
- }
- #endif
|