furiosa.server package

Submodules

furiosa.server.errors module

exception furiosa.server.errors.ModelNotFound(name: str, version: Optional[str] = None)

Bases: furiosa.server.errors.ModelServerError

exception furiosa.server.errors.ModelServerError(msg)

Bases: Exception

furiosa.server.model module

Model class for prediction/explanation.

class furiosa.server.model.AsyncNuxModel(config: furiosa.server.settings.NuxModelConfig)

Bases: furiosa.server.model.NuxModel

Model Nux runtime based on AsyncSession.

completed(receiver: furiosa.runtime.session.CompletionQueue, id: uuid.UUID, future: _asyncio.Future)
async create_sessions(devices: str) List[Union[furiosa.runtime.session.Session, furiosa.runtime.session.AsyncSession]]
async run(inputs: Sequence[numpy.ndarray]) furiosa.runtime.tensor.TensorArray
class furiosa.server.model.CPUModel(config: furiosa.server.settings.ModelConfig, *, predict: Callable[[Any, Any], Awaitable[Any]])

Bases: furiosa.server.model.Model

Model runing on CPU.

async predict(*args: Any, **kwargs: Any) Any
class furiosa.server.model.Model(config: furiosa.server.settings.ModelConfig)

Bases: abc.ABC

Base model class for every runtime.

async load() bool
async metadata() furiosa.server.types.predict.MetadataModelResponse
property name: str
abstract async predict(payload: furiosa.server.types.predict.InferenceRequest) furiosa.server.types.predict.InferenceResponse
abstract async predict(payload: List[numpy.ndarray]) List[numpy.ndarray]
async unload()
property version: Optional[str]
class furiosa.server.model.NuxModel(config: furiosa.server.settings.NuxModelConfig)

Bases: furiosa.server.model.Model

Model Nux runtime.

async create_sessions(devices: str) List[Union[furiosa.runtime.session.Session, furiosa.runtime.session.AsyncSession]]
decode(tensor: furiosa.runtime.tensor.TensorDesc, request_input: furiosa.server.types.predict.RequestInput) numpy.ndarray
encode(name: str, payload: numpy.ndarray) furiosa.server.types.predict.ResponseOutput
async load() bool
async predict(payload)
async run(inputs: Sequence[numpy.ndarray]) furiosa.runtime.tensor.TensorArray
property session: furiosa.runtime.session.Session
async unload()
class furiosa.server.model.OpenVINOModel(config: furiosa.server.settings.OpenVINOModelConfig)

Bases: furiosa.server.model.Model

Model runing on OpenVINO runtime.

property inner: CompiledModel
async load() bool
async predict(payload)

Inference via OpenVINO runtime.

Note that it’s not thread safe API as OpenVINO API does not support.

property session: InferRequest

furiosa.server.registry module

class furiosa.server.registry.FileRegistry(config: furiosa.server.settings.FileRegistryConfig)

Bases: furiosa.server.registry.Registry

async list() List[furiosa.server.settings.ModelConfig]

Get the model configs from file with yaml suffix in the specified directory

class furiosa.server.registry.InMemoryRegistry(model_configs: List[furiosa.server.settings.ModelConfig] = [])

Bases: furiosa.server.registry.Registry

async list() List[furiosa.server.settings.ModelConfig]

Get the model configs in-memory

register(model_config: furiosa.server.settings.ModelConfig) bool

Register a model config in this in-memory registry

Returns

True for success, False if the config is already existed

Return type

bool

unregister(model_config: furiosa.server.settings.ModelConfig) bool

Unregister a model config from this in-memory registry

Returns

True for success, False if the config is not existed

Return type

bool

class furiosa.server.registry.Registry

Bases: abc.ABC

async find(name: str) furiosa.server.settings.ModelConfig

Find a (loaded + unloaded) model configs

abstract async list() List[furiosa.server.settings.ModelConfig]

Get the (loaded + unloaded) model configs

furiosa.server.repository module

class furiosa.server.repository.Repository(registries: List[furiosa.server.registry.Registry], on_load: Optional[Callable[[furiosa.server.model.Model], None]] = None, on_unload: Optional[Callable[[furiosa.server.model.Model], None]] = None)

Bases: object

async find(name: str) furiosa.server.settings.ModelConfig

Find a (loaded + unloaded) model configs from several registries

async get_model(name: str, version: Optional[str] = None) furiosa.server.model.Model

Get a specified loaded model with name and version

async get_models() List[furiosa.server.model.Model]

Get the specified loaded models

async list() List[furiosa.server.settings.ModelConfig]

Get the (loaded + unloaded) model configs from several registries

async load(model: furiosa.server.model.Model) bool

Load a specified model

Register a model in the internal directory to maintain loaded models.

property registries: List[furiosa.server.registry.Registry]
async unload(name: str) bool

Unload models with a specified name

Unregister models in the internal directory to maintain loaded models.

furiosa.server.server module

class furiosa.server.server.ModelServer(config: furiosa.server.settings.ServerConfig, model_configs: List[furiosa.server.settings.ModelConfig])

Bases: object

async load()
async start()
async stop()

furiosa.server.settings module

Configuration settings via Pydantic

class furiosa.server.settings.FileRegistryConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, repository_root: str = '.')

Bases: pydantic.env_settings.BaseSettings

File based registry configs.

repository_root: str
class furiosa.server.settings.GRPCServerConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, host: str = '0.0.0.0', port: int = 8081, workers: int = 1)

Bases: pydantic.env_settings.BaseSettings

GRPC server configuration.

host: str
port: int
workers: int
class furiosa.server.settings.ModelConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, name: str, version: Optional[str] = None, description: Optional[str] = None, platform: str = 'Unknown', versions: Optional[List[str]] = [], inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [])

Bases: pydantic.env_settings.BaseSettings

Base Model configuration.

description: Optional[str]
inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
name: str
outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
platform: str
version: Optional[str]
versions: Optional[List[str]]
class furiosa.server.settings.NuxModelConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, name: str, version: Optional[str] = None, description: Optional[str] = None, platform: str = 'nux', versions: Optional[List[str]] = [], inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes], npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None)

Bases: furiosa.server.settings.ModelConfig

Model configuration for a Nux model.

batch_size: Optional[int]
compiler_config: Optional[Dict]
model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes]
npu_device: Optional[str]
platform: str
worker_num: Optional[int]
class furiosa.server.settings.OpenVINOModelConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, name: str, version: Optional[str] = None, description: Optional[str] = None, platform: str = 'openvino', versions: Optional[List[str]] = [], inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes], compiler_config: Optional[Dict] = None)

Bases: furiosa.server.settings.ModelConfig

Model configuration for a OpenVINO model.

compiler_config: Optional[Dict]
model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes]
platform: str
class furiosa.server.settings.RESTServerConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, host: str = '0.0.0.0', port: int = 8080, workers: int = 1, debug: bool = False)

Bases: pydantic.env_settings.BaseSettings

Rest server configuration.

debug: bool
host: str
port: int
workers: int
class furiosa.server.settings.ServerConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, server_name: str = 'furiosa-server', server_version: str = '0.8.3-dev (rev: c85fad2)', extensions: List[str] = [], intial_model_autoload: bool = True, file_registry_config: furiosa.server.settings.FileRegistryConfig = FileRegistryConfig(repository_root='.'), grpc_server_config: furiosa.server.settings.GRPCServerConfig = GRPCServerConfig(host='0.0.0.0', port=8081, workers=1), rest_server_config: furiosa.server.settings.RESTServerConfig = RESTServerConfig(host='0.0.0.0', port=8080, workers=1, debug=False))

Bases: pydantic.env_settings.BaseSettings

Server (GRPC server + REST server) configuration.

extensions: List[str]
file_registry_config: furiosa.server.settings.FileRegistryConfig
grpc_server_config: furiosa.server.settings.GRPCServerConfig
intial_model_autoload: bool
rest_server_config: furiosa.server.settings.RESTServerConfig
server_name: str
server_version: str

Module contents

FuriosaAI model server interacting Furiosa NPU.

class furiosa.server.AsyncNuxModel(config: furiosa.server.settings.NuxModelConfig)

Bases: furiosa.server.model.NuxModel

Model Nux runtime based on AsyncSession.

completed(receiver: furiosa.runtime.session.CompletionQueue, id: uuid.UUID, future: _asyncio.Future)
async create_sessions(devices: str) List[Union[furiosa.runtime.session.Session, furiosa.runtime.session.AsyncSession]]
async run(inputs: Sequence[numpy.ndarray]) furiosa.runtime.tensor.TensorArray
class furiosa.server.CPUModel(config: furiosa.server.settings.ModelConfig, *, predict: Callable[[Any, Any], Awaitable[Any]])

Bases: furiosa.server.model.Model

Model runing on CPU.

async predict(*args: Any, **kwargs: Any) Any
class furiosa.server.Model(config: furiosa.server.settings.ModelConfig)

Bases: abc.ABC

Base model class for every runtime.

async load() bool
async metadata() furiosa.server.types.predict.MetadataModelResponse
property name: str
abstract async predict(payload: furiosa.server.types.predict.InferenceRequest) furiosa.server.types.predict.InferenceResponse
abstract async predict(payload: List[numpy.ndarray]) List[numpy.ndarray]
async unload()
property version: Optional[str]
class furiosa.server.ModelConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, name: str, version: Optional[str] = None, description: Optional[str] = None, platform: str = 'Unknown', versions: Optional[List[str]] = [], inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [])

Bases: pydantic.env_settings.BaseSettings

Base Model configuration.

description: Optional[str]
inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
name: str
outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
platform: str
version: Optional[str]
versions: Optional[List[str]]
exception furiosa.server.ModelNotFound(name: str, version: Optional[str] = None)

Bases: furiosa.server.errors.ModelServerError

class furiosa.server.ModelServer(config: furiosa.server.settings.ServerConfig, model_configs: List[furiosa.server.settings.ModelConfig])

Bases: object

async load()
async start()
async stop()
exception furiosa.server.ModelServerError(msg)

Bases: Exception

class furiosa.server.NuxModel(config: furiosa.server.settings.NuxModelConfig)

Bases: furiosa.server.model.Model

Model Nux runtime.

async create_sessions(devices: str) List[Union[furiosa.runtime.session.Session, furiosa.runtime.session.AsyncSession]]
decode(tensor: furiosa.runtime.tensor.TensorDesc, request_input: furiosa.server.types.predict.RequestInput) numpy.ndarray
encode(name: str, payload: numpy.ndarray) furiosa.server.types.predict.ResponseOutput
async load() bool
async predict(payload)
async run(inputs: Sequence[numpy.ndarray]) furiosa.runtime.tensor.TensorArray
property session: furiosa.runtime.session.Session
async unload()
class furiosa.server.NuxModelConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, name: str, version: Optional[str] = None, description: Optional[str] = None, platform: str = 'nux', versions: Optional[List[str]] = [], inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes], npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None)

Bases: furiosa.server.settings.ModelConfig

Model configuration for a Nux model.

batch_size: Optional[int]
compiler_config: Optional[Dict]
description: Optional[str]
inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes]
name: str
npu_device: Optional[str]
outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
platform: str
version: Optional[str]
versions: Optional[List[str]]
worker_num: Optional[int]
class furiosa.server.OpenVINOModel(config: furiosa.server.settings.OpenVINOModelConfig)

Bases: furiosa.server.model.Model

Model runing on OpenVINO runtime.

property inner: CompiledModel
async load() bool
async predict(payload)

Inference via OpenVINO runtime.

Note that it’s not thread safe API as OpenVINO API does not support.

property session: InferRequest
class furiosa.server.OpenVINOModelConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, name: str, version: Optional[str] = None, description: Optional[str] = None, platform: str = 'openvino', versions: Optional[List[str]] = [], inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]] = [], model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes], compiler_config: Optional[Dict] = None)

Bases: furiosa.server.settings.ModelConfig

Model configuration for a OpenVINO model.

compiler_config: Optional[Dict]
description: Optional[str]
inputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
model: Union[pydantic.types.StrictStr, pydantic.types.StrictBytes]
name: str
outputs: Optional[List[furiosa.server.types.predict.MetadataTensor]]
platform: str
version: Optional[str]
versions: Optional[List[str]]
class furiosa.server.ServerConfig(_env_file: Optional[Union[str, os.PathLike, List[Union[str, os.PathLike]], Tuple[Union[str, os.PathLike], ...]]] = '<object object>', _env_file_encoding: Optional[str] = None, _env_nested_delimiter: Optional[str] = None, _secrets_dir: Optional[Union[str, os.PathLike]] = None, *, server_name: str = 'furiosa-server', server_version: str = '0.8.3-dev (rev: c85fad2)', extensions: List[str] = [], intial_model_autoload: bool = True, file_registry_config: furiosa.server.settings.FileRegistryConfig = FileRegistryConfig(repository_root='.'), grpc_server_config: furiosa.server.settings.GRPCServerConfig = GRPCServerConfig(host='0.0.0.0', port=8081, workers=1), rest_server_config: furiosa.server.settings.RESTServerConfig = RESTServerConfig(host='0.0.0.0', port=8080, workers=1, debug=False))

Bases: pydantic.env_settings.BaseSettings

Server (GRPC server + REST server) configuration.

extensions: List[str]
file_registry_config: furiosa.server.settings.FileRegistryConfig
grpc_server_config: furiosa.server.settings.GRPCServerConfig
intial_model_autoload: bool
rest_server_config: furiosa.server.settings.RESTServerConfig
server_name: str
server_version: str