123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733 |
- /*************************************************************************
- * Copyright (C) [2020] by Cambricon, Inc. All rights reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *************************************************************************/
- #ifndef INFER_SERVER_API_H_
- #define INFER_SERVER_API_H_
- #include <functional>
- #include <limits>
- #include <map>
- #include <memory>
- #include <mutex>
- #include <string>
- #include <utility>
- #include <vector>
- #include "buffer.h"
- #include "shape.h"
- #include "util/any.h"
- #include "util/base_object.h"
- #include "config.h"
- #define CNIS_GET_VERSION(major, minor, patch) (((major) << 20) | ((minor) << 10) | (patch))
- #define CNIS_VERSION CNIS_GET_VERSION(CNIS_VERSION_MAJOR, CNIS_VERSION_MINOR, CNIS_VERSION_PATCH)
- namespace infer_server {
- /**
- * @brief Enumeration to specify data type of model input and output
- */
- enum class DataType { UINT8, FLOAT32, FLOAT16, INT16, INT32, INVALID };
- /**
- * @brief Enumeration to specify dim order of model input and output
- */
- enum class DimOrder { NCHW, NHWC, HWCN, TNC, NTC };
- /**
- * @brief Describe data layout on MLU or CPU
- */
- struct DataLayout {
- DataType dtype; ///< @see DataType
- DimOrder order; ///< @see DimOrder
- };
- /**
- * @brief Get size in bytes of type
- *
- * @param type Data type enumeration
- * @return size_t size of specified type
- */
- size_t GetTypeSize(DataType type) noexcept;
- /**
- * @brief An enum describes InferServer request return values.
- */
- enum class Status {
- SUCCESS = 0, ///< The operation was successful
- ERROR_READWRITE = 1, ///< Read / Write file failed
- ERROR_MEMORY = 2, ///< Memory error, such as out of memory, memcpy failed
- INVALID_PARAM = 3, ///< Invalid parameters
- WRONG_TYPE = 4, ///< Invalid data type in `any`
- ERROR_BACKEND = 5, ///< Error occured in processor
- NOT_IMPLEMENTED = 6, ///< Function not implemented
- TIMEOUT = 7, ///< Time expired
- STATUS_COUNT = 8, ///< Number of status
- };
- /**
- * @brief An enum describes batch strategy
- */
- enum class BatchStrategy {
- DYNAMIC = 0, ///< Cross-request batch
- STATIC = 1, ///< In-request batch
- SEQUENCE = 2, ///< Sequence model, unsupported for now
- STRATEGY_COUNT = 3, ///< Number of strategy
- };
- /**
- * @brief Convert BatchStrategy to string
- *
- * @param strategy batch strategy
- * @return std::string Stringified batch strategy
- */
- std::string ToString(BatchStrategy strategy) noexcept;
- /**
- * @brief Put BatchStrategy into ostream
- *
- * @param os ostream
- * @param s BatchStrategy
- * @return std::ostream& ostream
- */
- inline std::ostream& operator<<(std::ostream& os, BatchStrategy s) { return os << ToString(s); }
- /**
- * @brief Get CNIS version string
- *
- * @return std::string version string
- */
- inline std::string Version() {
- // clang-format off
- return std::to_string(CNIS_VERSION_MAJOR) + "." +
- std::to_string(CNIS_VERSION_MINOR) + "." +
- std::to_string(CNIS_VERSION_PATCH);
- // clang-format on
- }
- /**
- * @brief Set current deivce for this thread
- *
- * @param device_id device id
- *
- * @retval true success
- * @retval false set device failed
- */
- bool SetCurrentDevice(int device_id) noexcept;
- /**
- * @brief Check whether device is accessible
- *
- * @param device_id device id
- *
- * @retval true device is accessible
- * @retval false no such device
- */
- bool CheckDevice(int device_id) noexcept;
- /**
- * @brief Get total device count
- *
- * @retval device count
- */
- uint32_t TotalDeviceCount() noexcept;
- /**
- * @brief Model interface
- */
- class ModelInfo {
- public:
- virtual ~ModelInfo() = default;
- // ----------- Observers -----------
- /**
- * @brief Get input shape
- *
- * @param index index of input
- * @return const Shape& shape of specified input
- */
- virtual const Shape& InputShape(int index) const noexcept = 0;
- /**
- * @brief Get output shape
- *
- * @param index index of output
- * @return const Shape& shape of specified output
- */
- virtual const Shape& OutputShape(int index) const noexcept = 0;
- /**
- * @brief Get input layout on MLU
- *
- * @param index index of input
- * @return const DataLayout& data layout of specified input
- */
- virtual const DataLayout& InputLayout(int index) const noexcept = 0;
- /**
- * @brief Get output layout on MLU
- *
- * @param index index of output
- * @return const DataLayout& data layout of specified output
- */
- virtual const DataLayout& OutputLayout(int index) const noexcept = 0;
- /**
- * @brief Get number of input
- *
- * @return uint32_t number of input
- */
- virtual uint32_t InputNum() const noexcept = 0;
- /**
- * @brief Get number of output
- *
- * @return uint32_t number of output
- */
- virtual uint32_t OutputNum() const noexcept = 0;
- /**
- * @brief Get model batch size
- *
- * @return uint32_t batch size
- */
- virtual uint32_t BatchSize() const noexcept = 0;
- /**
- * @brief Get model key
- *
- * @return const std::string& model key
- */
- virtual std::string GetKey() const noexcept = 0;
- // ----------- Observers End -----------
- }; // class ModelInfo
- using ModelPtr = std::shared_ptr<ModelInfo>;
- class RequestControl;
- /**
- * @brief Inference data unit
- */
- struct InferData {
- /**
- * @brief Set any data into inference data
- *
- * @tparam T data type
- * @param v data value
- */
- template <typename T>
- void Set(T&& v) {
- data = std::forward<T>(v);
- }
- /**
- * @brief Get data by value
- *
- * @tparam T data type
- * @return std::remove_reference<T>::type a copy of data
- */
- template <typename T>
- typename std::remove_reference<T>::type Get() const {
- return any_cast<typename std::remove_reference<T>::type>(data);
- }
- /**
- * @brief Get data by lvalue reference
- *
- * @tparam T data type
- * @return std::add_lvalue_reference<T>::type lvalue reference to data
- */
- template <typename T>
- typename std::add_lvalue_reference<T>::type GetLref() & {
- return any_cast<typename std::add_lvalue_reference<T>::type>(data);
- }
- /**
- * @brief Get data by const lvalue reference
- *
- * @tparam T data type
- * @return std::add_lvalue_reference<typename std::add_const<T>::type>::type const lvalue reference to data
- */
- template <typename T>
- typename std::add_lvalue_reference<typename std::add_const<T>::type>::type GetLref() const& {
- return any_cast<typename std::add_lvalue_reference<typename std::add_const<T>::type>::type>(data);
- }
- /**
- * @brief Check if InferData has value
- *
- * @retval true InferData has value
- * @retval false InferData does not have value
- */
- bool HasValue() noexcept {
- return data.has_value();
- }
- /**
- * @brief Set user data for postprocess
- *
- * @tparam T data type
- * @param v data value
- */
- template <typename T>
- void SetUserData(T&& v) {
- user_data = std::forward<T>(v);
- }
- /**
- * @brief Get user data by value
- *
- * @note if T is lvalue reference, data is returned by lvalue reference.
- * if T is bare type, data is returned by value.
- * @tparam T data type
- * @return data
- */
- template <typename T>
- T GetUserData() const {
- return any_cast<T>(user_data);
- }
- /// stored data
- any data;
- /// user data passed to postprocessor
- any user_data;
- /// private member
- RequestControl* ctrl{nullptr};
- /// private member
- uint32_t index{0};
- };
- using InferDataPtr = std::shared_ptr<InferData>;
- using BatchData = std::vector<InferDataPtr>;
- /**
- * @brief Data package, used in request and response
- */
- struct Package {
- /// a batch of data
- BatchData data;
- /// private member, intermediate storage
- InferDataPtr predict_io{nullptr};
- /// tag of this package (such as stream_id, client ip, etc.)
- std::string tag;
- /// perf statistics of one request
- std::map<std::string, float> perf;
- /// private member
- int64_t priority;
- static std::shared_ptr<Package> Create(uint32_t data_num, const std::string& tag = "") noexcept {
- auto ret = std::make_shared<Package>();
- ret->data.reserve(data_num);
- for (uint32_t idx = 0; idx < data_num; ++idx) {
- ret->data.emplace_back(new InferData);
- }
- ret->tag = tag;
- return ret;
- }
- };
- using PackagePtr = std::shared_ptr<Package>;
- /**
- * @brief Processor interface
- */
- class Processor : public BaseObject {
- public:
- /**
- * @brief Construct a new Processor object
- *
- * @param type_name type name of derived processor
- */
- explicit Processor(const std::string& type_name) noexcept : type_name_(type_name) {}
- /**
- * @brief Get type name of processor
- *
- * @return const std::string& type name
- */
- const std::string& TypeName() const noexcept { return type_name_; }
- /**
- * @brief Destroy the Processor object
- */
- virtual ~Processor() = default;
- /**
- * @brief Initialize processor
- *
- * @retval Status::SUCCESS Init succeeded
- * @retval other Init failed
- */
- virtual Status Init() noexcept = 0;
- /**
- * @brief Process data in package
- *
- * @param data Processed data
- * @retval Status::SUCCESS Process succeeded
- * @retval other Process failed
- */
- virtual Status Process(PackagePtr data) noexcept = 0;
- /**
- * @brief Fork an initialized processor which have the same params as this
- *
- * @return std::shared_ptr<Processor> A new processor
- */
- virtual std::shared_ptr<Processor> Fork() = 0;
- private:
- Processor() = delete;
- friend class TaskNode;
- std::unique_lock<std::mutex> Lock() noexcept { return std::unique_lock<std::mutex>(process_lock_); }
- std::string type_name_;
- std::mutex process_lock_;
- }; // class Processor
- /**
- * @brief A convenient CRTP template provided `Fork` and `Create` function
- *
- * @tparam T Type of derived class
- */
- template <typename T>
- class ProcessorForkable : public Processor {
- public:
- /**
- * @brief Construct a new Processor Forkable object
- *
- * @param type_name type name of derived processor
- */
- explicit ProcessorForkable(const std::string& type_name) noexcept : Processor(type_name) {}
- /**
- * @brief Destroy the Processor Forkable object
- */
- virtual ~ProcessorForkable() = default;
- /**
- * @brief Fork an initialized processor which have the same params as this
- *
- * @return std::shared_ptr<Processor> A new processor
- */
- std::shared_ptr<Processor> Fork() noexcept(std::is_nothrow_default_constructible<T>::value) final {
- auto p = std::make_shared<T>();
- p->CopyParamsFrom(*this);
- if (p->Init() != Status::SUCCESS) return nullptr;
- return p;
- }
- /**
- * @brief Create a processor
- *
- * @return std::shared_ptr<T> A new processor
- */
- static std::shared_ptr<T> Create() noexcept(std::is_nothrow_default_constructible<T>::value) {
- return std::make_shared<T>();
- }
- };
- /**
- * @brief Base class of response observer, only used for async Session
- */
- class Observer {
- public:
- /**
- * @brief Notify the observer one response
- *
- * @param status Request status code
- * @param data Response data
- * @param user_data User data
- */
- virtual void Response(Status status, PackagePtr data, any user_data) noexcept = 0;
- /**
- * @brief Destroy the Observer object
- */
- virtual ~Observer() = default;
- };
- /**
- * @brief A struct to describe execution graph
- */
- struct SessionDesc {
- /// session name, distinct session in log
- std::string name{};
- /// model pointer
- ModelPtr model{nullptr};
- /// batch strategy
- BatchStrategy strategy{BatchStrategy::DYNAMIC};
- /**
- * @brief host input data layout, work when input data is on cpu
- *
- * @note built-in processor will transform data from host input layout into MLU input layout
- * ( @see ModelInfo::InputLayout(int index) ) automatically before infer
- */
- DataLayout host_input_layout{DataType::UINT8, DimOrder::NHWC};
- /**
- * @brief host output data layout
- *
- * @note built-in processor will transform from MLU output layout ( @see ModelInfo::OutputLayout(int index) )
- * into host output layout automatically after infer
- */
- DataLayout host_output_layout{DataType::FLOAT32, DimOrder::NHWC};
- /// preprocessor
- std::shared_ptr<Processor> preproc{nullptr};
- /// postprocessor
- std::shared_ptr<Processor> postproc{nullptr};
- /// timeout in milliseconds, only work for BatchStrategy::DYNAMIC
- uint32_t batch_timeout{100};
- /// Session request priority
- int priority{0};
- /**
- * @brief engine number
- *
- * @note multi engine can boost process, but will take more MLU resources
- */
- uint32_t engine_num{1};
- /// whether print performance
- bool show_perf{true};
- };
- /**
- * @brief Latency statistics
- */
- struct LatencyStatistic {
- /// Total processed unit count
- uint32_t unit_cnt{0};
- /// Total recorded value
- double total{0};
- /// Maximum value of one unit
- float max{0};
- /// Minimum value of one unit
- float min{std::numeric_limits<float>::max()};
- };
- /**
- * @brief Throughout statistics
- */
- struct ThroughoutStatistic {
- /// total request count
- uint32_t request_cnt{0};
- /// total unit cnt
- uint32_t unit_cnt{0};
- /// request per second
- float rps{0};
- /// unit per second
- float ups{0};
- /// real time rps
- float rps_rt{0};
- /// real time ups
- float ups_rt{0};
- };
- /// A structure describes linked session of server
- class Session;
- /// pointer to Session
- using Session_t = Session*;
- class InferServerPrivate;
- /**
- * @brief Inference server api class
- */
- class InferServer {
- public:
- /**
- * @brief Construct a new Infer Server object
- *
- * @param device_id Specified MLU device ID
- */
- explicit InferServer(int device_id) noexcept;
- /* ------------------------- Request API -------------------------- */
- /**
- * @brief Create a Session
- *
- * @param desc Session description
- * @param observer Response observer
- * @return Session_t a Session
- */
- Session_t CreateSession(SessionDesc desc, std::shared_ptr<Observer> observer) noexcept;
- /**
- * @brief Create a synchronous Session
- *
- * @param desc Session description
- * @return Session_t a Session
- */
- Session_t CreateSyncSession(SessionDesc desc) noexcept { return CreateSession(desc, nullptr); }
- /**
- * @brief Destroy session
- *
- * @param session a Session
- * @retval true Destroy succeeded
- * @retval false session does not belong to this server
- */
- bool DestroySession(Session_t session) noexcept;
- /**
- * @brief send a inference request
- *
- * @warning async api, can be invoked with async Session only.
- *
- * @param session link handle
- * @param input input package
- * @param user_data user data
- * @param timeout timeout threshold (milliseconds), -1 for endless
- */
- bool Request(Session_t session, PackagePtr input, any user_data, int timeout = -1) noexcept;
- /**
- * @brief send a inference request and wait for response
- *
- * @warning synchronous api, can be invoked with synchronous Session only.
- *
- * @param session session
- * @param input input package
- * @param status execute status
- * @param response output result
- * @param timeout timeout threshold (milliseconds), -1 for endless
- */
- bool RequestSync(Session_t session, PackagePtr input, Status* status, PackagePtr response, int timeout = -1) noexcept;
- /**
- * @brief Wait task with specified tag done, @see Package::tag
- *
- * @note Usually used at EOS
- *
- * @param session a Session
- * @param tag specified tag
- */
- void WaitTaskDone(Session_t session, const std::string& tag) noexcept;
- /**
- * @brief Discard task with specified tag done, @see Package::tag
- *
- * @note Usually used when you need to stop the process as soon as possible
- * @param session a Session
- * @param tag specified tag
- */
- void DiscardTask(Session_t session, const std::string& tag) noexcept;
- /**
- * @brief Get model from session
- *
- * @param session a Session
- * @return ModelPtr A model
- */
- ModelPtr GetModel(Session_t session) noexcept;
- /* --------------------- Model API ----------------------------- */
- /**
- * @brief Set directory to save downloaded model file
- *
- * @param model_dir model directory
- * @retval true Succeeded
- * @retval false Model not exist
- */
- static bool SetModelDir(const std::string& model_dir) noexcept;
- /**
- * @brief Load model from uri, model won't be loaded again if it is already in cache
- *
- * @note support download model from remote by HTTP, HTTPS, FTP, while compiled with flag `WITH_CURL`,
- * use uri such as `../../model_file`, or "https://someweb/model_file"
- * @param pattern1 offline model uri
- * @param pattern2 extracted function name, work only if backend is cnrt
- * @return ModelPtr A model
- */
- static ModelPtr LoadModel(const std::string& pattern1, const std::string& pattern2 = "subnet0") noexcept;
- #ifdef CNIS_USE_MAGICMIND
- /**
- * @brief Load model from memory, model won't be loaded again if it is already in cache
- *
- * @param ptr serialized model data in memory
- * @param size size of model data in memory
- * @return ModelPtr A model
- */
- static ModelPtr LoadModel(void* ptr, size_t size) noexcept;
- #else
- /**
- * @brief Load model from memory, model won't be loaded again if it is already in cache
- *
- * @param ptr serialized model data in memory
- * @param func_name name of function to be extracted
- * @return ModelPtr A model
- */
- static ModelPtr LoadModel(void* ptr, const std::string& func_name = "subnet0") noexcept;
- #endif
- /**
- * @brief Remove model from cache, model won't be destroyed if still in use
- *
- * @param model a model
- * @return true Succeed
- * @return false Model is not in cache
- */
- static bool UnloadModel(ModelPtr model) noexcept;
- /**
- * @brief Clear all the models in cache, model won't be destroyed if still in use
- */
- static void ClearModelCache() noexcept;
- /* ----------------------- Perf API ---------------------------- */
- /**
- * @brief Get the latency statistics
- *
- * @param session a session
- * @return std::map<std::string, PerfStatistic> latency statistics
- */
- std::map<std::string, LatencyStatistic> GetLatency(Session_t session) const noexcept;
- /**
- * @brief Get the performance statistics
- *
- * @param session a session
- * @return ThroughoutStatistic throughout statistic
- */
- ThroughoutStatistic GetThroughout(Session_t session) const noexcept;
- /**
- * @brief Get the throughout statistics of specified tag
- *
- * @param session a session
- * @param tag tag
- * @return ThroughoutStatistic throughout statistic
- */
- ThroughoutStatistic GetThroughout(Session_t session, const std::string& tag) const noexcept;
- private:
- InferServer() = delete;
- InferServerPrivate* priv_;
- }; // class InferServer
- } // namespace infer_server
- #endif // INFER_SERVER_API_H_
|