furiosa.serving package

Submodules

furiosa.serving.application module

class furiosa.serving.application.ServeAPI(repository: furiosa.server.repository.Repository = <furiosa.server.repository.Repository object>, **kwargs: typing.Any)

Bases: object

property app: fastapi.applications.FastAPI
async load()
model(device: str) Callable[[Any], Awaitable[furiosa.serving.model.ServeModel]]
setup_telemetry()
furiosa.serving.application.fallback(location: str) str
async furiosa.serving.application.nux(name: str, location: str, *, blocking: bool = True, app: fastapi.applications.FastAPI, on_create: Callable[[furiosa.serving.model.ServeModel], None], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None) furiosa.serving.model.NPUServeModel
async furiosa.serving.application.openvino(name: str, location: str, *, app: fastapi.applications.FastAPI, on_create: Callable[[furiosa.serving.model.ServeModel], None], version: Optional[str] = None, description: Optional[str] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None) furiosa.serving.model.OpenVINOServeModel
async furiosa.serving.application.python(name: str, predict: Callable[[Any, Any], Awaitable[Any]], *, app: fastapi.applications.FastAPI, on_create: Callable[[furiosa.serving.model.ServeModel], None], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None) furiosa.serving.model.CPUServeModel

furiosa.serving.model module

class furiosa.serving.model.CPUServeModel(app: fastapi.applications.FastAPI, name: str, *, predict: Callable[[Any, Any], Union[Awaitable[Any], Any]], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: furiosa.serving.model.ServeModel

property config: furiosa.server.settings.ModelConfig
property inner: furiosa.server.model.Model
property inputs: List[furiosa.runtime.tensor.TensorDesc]
property outputs: List[furiosa.runtime.tensor.TensorDesc]
async predict(*args: Any, **kwargs: Any) Any
class furiosa.serving.model.NPUServeModel(app: fastapi.applications.FastAPI, name: str, *, blocking: bool = True, model: Union[str, bytes], version: Optional[str] = None, description: Optional[str] = None, npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: furiosa.serving.model.ServeModel

property config: furiosa.server.settings.NuxModelConfig
property inner: furiosa.server.model.NuxModel
property inputs: List[furiosa.runtime.tensor.TensorDesc]
property outputs: List[furiosa.runtime.tensor.TensorDesc]
async predict(payload: List[numpy.ndarray]) List[numpy.ndarray]
class furiosa.serving.model.OpenVINOServeModel(app: fastapi.applications.FastAPI, name: str, *, model: bytes, version: Optional[str] = None, description: Optional[str] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: furiosa.serving.model.ServeModel

property config: furiosa.server.settings.ModelConfig
property inner: furiosa.server.model.Model
input(name: str) ConstOutput
property inputs: List[furiosa.runtime.tensor.TensorDesc]
output(name: str) ConstOutput
property outputs: List[furiosa.runtime.tensor.TensorDesc]
async predict(payload: numpy.ndarray) numpy.ndarray
class furiosa.serving.model.ServeModel(app: fastapi.applications.FastAPI, name: str, *, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: abc.ABC

abstract property config: furiosa.server.settings.ModelConfig
delete(*args, **kwargs) Callable
expose()

Expose FastAPI route API endpoint.

get(*args, **kwargs) Callable
head(*args, **kwargs) Callable
hide()

Hide FastAPI route API endpoint.

abstract property inner: furiosa.server.model.Model
abstract property inputs: List[furiosa.runtime.tensor.TensorDesc]
abstract property outputs: List[furiosa.runtime.tensor.TensorDesc]
patch(*args, **kwargs) Callable
post(*args, **kwargs) Callable
async postprocess(*args: Any, **kwargs: Any) Any
abstract async predict(*args: Any, **kwargs: Any) Any
async preprocess(*args: Any, **kwargs: Any) Any
put(*args, **kwargs) Callable
trace(*args, **kwargs) Callable

furiosa.serving.telemetry module

class furiosa.serving.telemetry.PrometheusMiddleware(app: Callable[[MutableMapping[str, Any], Callable[[], Awaitable[MutableMapping[str, Any]]], Callable[[MutableMapping[str, Any]], Awaitable[None]]], Awaitable[None]], app_name: str = 'fastapi-app')

Bases: starlette.middleware.base.BaseHTTPMiddleware

async dispatch(request: starlette.requests.Request, call_next: Callable[[starlette.requests.Request], Awaitable[starlette.responses.Response]]) starlette.responses.Response
static get_path(request: starlette.requests.Request) Tuple[str, bool]
furiosa.serving.telemetry.metrics(request: starlette.requests.Request) starlette.responses.Response
furiosa.serving.telemetry.setup_logger(otlp_enabled: bool) None
furiosa.serving.telemetry.setup_metrics(app: Callable[[MutableMapping[str, Any], Callable[[], Awaitable[MutableMapping[str, Any]]], Callable[[MutableMapping[str, Any]], Awaitable[None]]], Awaitable[None]], app_name: str, metric_path: str = '/metrics') None
furiosa.serving.telemetry.setup_otlp(app: Callable[[MutableMapping[str, Any], Callable[[], Awaitable[MutableMapping[str, Any]]], Callable[[MutableMapping[str, Any]], Awaitable[None]]], Awaitable[None]], app_name: str, endpoint: str, log_correlation: bool = True) None

Module contents

Furiosa serving framework, easy to use inference server.

class furiosa.serving.CPUServeModel(app: fastapi.applications.FastAPI, name: str, *, predict: Callable[[Any, Any], Union[Awaitable[Any], Any]], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: furiosa.serving.model.ServeModel

property config: furiosa.server.settings.ModelConfig
property inner: furiosa.server.model.Model
property inputs: List[furiosa.runtime.tensor.TensorDesc]
property outputs: List[furiosa.runtime.tensor.TensorDesc]
async predict(*args: Any, **kwargs: Any) Any
class furiosa.serving.NPUServeModel(app: fastapi.applications.FastAPI, name: str, *, blocking: bool = True, model: Union[str, bytes], version: Optional[str] = None, description: Optional[str] = None, npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: furiosa.serving.model.ServeModel

property config: furiosa.server.settings.NuxModelConfig
property inner: furiosa.server.model.NuxModel
property inputs: List[furiosa.runtime.tensor.TensorDesc]
property outputs: List[furiosa.runtime.tensor.TensorDesc]
async predict(payload: List[numpy.ndarray]) List[numpy.ndarray]
class furiosa.serving.OpenVINOServeModel(app: fastapi.applications.FastAPI, name: str, *, model: bytes, version: Optional[str] = None, description: Optional[str] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: furiosa.serving.model.ServeModel

property config: furiosa.server.settings.ModelConfig
property inner: furiosa.server.model.Model
input(name: str) ConstOutput
property inputs: List[furiosa.runtime.tensor.TensorDesc]
output(name: str) ConstOutput
property outputs: List[furiosa.runtime.tensor.TensorDesc]
async predict(payload: numpy.ndarray) numpy.ndarray
class furiosa.serving.Processor

Bases: abc.ABC

abstract async postprocess(*args: Any, **kwargs: Any) Any
abstract async preprocess(*args: Any, **kwargs: Any) Any
class furiosa.serving.ServeAPI(repository: furiosa.server.repository.Repository = <furiosa.server.repository.Repository object>, **kwargs: typing.Any)

Bases: object

property app: fastapi.applications.FastAPI
async load()
model(device: str) Callable[[Any], Awaitable[furiosa.serving.model.ServeModel]]
setup_telemetry()
class furiosa.serving.ServeModel(app: fastapi.applications.FastAPI, name: str, *, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)

Bases: abc.ABC

abstract property config: furiosa.server.settings.ModelConfig
delete(*args, **kwargs) Callable
expose()

Expose FastAPI route API endpoint.

get(*args, **kwargs) Callable
head(*args, **kwargs) Callable
hide()

Hide FastAPI route API endpoint.

abstract property inner: furiosa.server.model.Model
abstract property inputs: List[furiosa.runtime.tensor.TensorDesc]
abstract property outputs: List[furiosa.runtime.tensor.TensorDesc]
patch(*args, **kwargs) Callable
post(*args, **kwargs) Callable
async postprocess(*args: Any, **kwargs: Any) Any
abstract async predict(*args: Any, **kwargs: Any) Any
async preprocess(*args: Any, **kwargs: Any) Any
put(*args, **kwargs) Callable
trace(*args, **kwargs) Callable