furiosa.serving package
Submodules
furiosa.serving.application module
- class furiosa.serving.application.ServeAPI(repository: furiosa.server.repository.Repository = <furiosa.server.repository.Repository object>, **kwargs: typing.Any)
Bases:
object
- property app: fastapi.applications.FastAPI
- async load()
- model(device: str) Callable[[Any], Awaitable[furiosa.serving.model.ServeModel]]
- setup_telemetry()
- furiosa.serving.application.fallback(location: str) str
- async furiosa.serving.application.nux(name: str, location: str, *, blocking: bool = True, app: fastapi.applications.FastAPI, on_create: Callable[[furiosa.serving.model.ServeModel], None], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None) furiosa.serving.model.NPUServeModel
- async furiosa.serving.application.openvino(name: str, location: str, *, app: fastapi.applications.FastAPI, on_create: Callable[[furiosa.serving.model.ServeModel], None], version: Optional[str] = None, description: Optional[str] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None) furiosa.serving.model.OpenVINOServeModel
- async furiosa.serving.application.python(name: str, predict: Callable[[Any, Any], Awaitable[Any]], *, app: fastapi.applications.FastAPI, on_create: Callable[[furiosa.serving.model.ServeModel], None], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None) furiosa.serving.model.CPUServeModel
furiosa.serving.model module
- class furiosa.serving.model.CPUServeModel(app: fastapi.applications.FastAPI, name: str, *, predict: Callable[[Any, Any], Union[Awaitable[Any], Any]], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
furiosa.serving.model.ServeModel
- property config: furiosa.server.settings.ModelConfig
- property inner: furiosa.server.model.Model
- property inputs: List[furiosa.runtime.tensor.TensorDesc]
- property outputs: List[furiosa.runtime.tensor.TensorDesc]
- async predict(*args: Any, **kwargs: Any) Any
- class furiosa.serving.model.NPUServeModel(app: fastapi.applications.FastAPI, name: str, *, blocking: bool = True, model: Union[str, bytes], version: Optional[str] = None, description: Optional[str] = None, npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
furiosa.serving.model.ServeModel
- property config: furiosa.server.settings.NuxModelConfig
- property inner: furiosa.server.model.NuxModel
- property inputs: List[furiosa.runtime.tensor.TensorDesc]
- property outputs: List[furiosa.runtime.tensor.TensorDesc]
- async predict(payload: List[numpy.ndarray]) List[numpy.ndarray]
- class furiosa.serving.model.OpenVINOServeModel(app: fastapi.applications.FastAPI, name: str, *, model: bytes, version: Optional[str] = None, description: Optional[str] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
furiosa.serving.model.ServeModel
- property config: furiosa.server.settings.ModelConfig
- property inner: furiosa.server.model.Model
- input(name: str) ConstOutput
- property inputs: List[furiosa.runtime.tensor.TensorDesc]
- output(name: str) ConstOutput
- property outputs: List[furiosa.runtime.tensor.TensorDesc]
- async predict(payload: numpy.ndarray) numpy.ndarray
- class furiosa.serving.model.ServeModel(app: fastapi.applications.FastAPI, name: str, *, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
abc.ABC
- abstract property config: furiosa.server.settings.ModelConfig
- delete(*args, **kwargs) Callable
- expose()
Expose FastAPI route API endpoint.
- get(*args, **kwargs) Callable
- head(*args, **kwargs) Callable
- hide()
Hide FastAPI route API endpoint.
- abstract property inner: furiosa.server.model.Model
- abstract property inputs: List[furiosa.runtime.tensor.TensorDesc]
- abstract property outputs: List[furiosa.runtime.tensor.TensorDesc]
- patch(*args, **kwargs) Callable
- post(*args, **kwargs) Callable
- async postprocess(*args: Any, **kwargs: Any) Any
- abstract async predict(*args: Any, **kwargs: Any) Any
- async preprocess(*args: Any, **kwargs: Any) Any
- put(*args, **kwargs) Callable
- trace(*args, **kwargs) Callable
furiosa.serving.telemetry module
- class furiosa.serving.telemetry.PrometheusMiddleware(app: Callable[[MutableMapping[str, Any], Callable[[], Awaitable[MutableMapping[str, Any]]], Callable[[MutableMapping[str, Any]], Awaitable[None]]], Awaitable[None]], app_name: str = 'fastapi-app')
Bases:
starlette.middleware.base.BaseHTTPMiddleware
- async dispatch(request: starlette.requests.Request, call_next: Callable[[starlette.requests.Request], Awaitable[starlette.responses.Response]]) starlette.responses.Response
- static get_path(request: starlette.requests.Request) Tuple[str, bool]
- furiosa.serving.telemetry.metrics(request: starlette.requests.Request) starlette.responses.Response
- furiosa.serving.telemetry.setup_logger(otlp_enabled: bool) None
- furiosa.serving.telemetry.setup_metrics(app: Callable[[MutableMapping[str, Any], Callable[[], Awaitable[MutableMapping[str, Any]]], Callable[[MutableMapping[str, Any]], Awaitable[None]]], Awaitable[None]], app_name: str, metric_path: str = '/metrics') None
- furiosa.serving.telemetry.setup_otlp(app: Callable[[MutableMapping[str, Any], Callable[[], Awaitable[MutableMapping[str, Any]]], Callable[[MutableMapping[str, Any]], Awaitable[None]]], Awaitable[None]], app_name: str, endpoint: str, log_correlation: bool = True) None
Module contents
Furiosa serving framework, easy to use inference server.
- class furiosa.serving.CPUServeModel(app: fastapi.applications.FastAPI, name: str, *, predict: Callable[[Any, Any], Union[Awaitable[Any], Any]], version: Optional[str] = None, description: Optional[str] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
furiosa.serving.model.ServeModel
- property config: furiosa.server.settings.ModelConfig
- property inner: furiosa.server.model.Model
- property inputs: List[furiosa.runtime.tensor.TensorDesc]
- property outputs: List[furiosa.runtime.tensor.TensorDesc]
- async predict(*args: Any, **kwargs: Any) Any
- class furiosa.serving.NPUServeModel(app: fastapi.applications.FastAPI, name: str, *, blocking: bool = True, model: Union[str, bytes], version: Optional[str] = None, description: Optional[str] = None, npu_device: Optional[str] = None, batch_size: Optional[int] = None, worker_num: Optional[int] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
furiosa.serving.model.ServeModel
- property config: furiosa.server.settings.NuxModelConfig
- property inner: furiosa.server.model.NuxModel
- property inputs: List[furiosa.runtime.tensor.TensorDesc]
- property outputs: List[furiosa.runtime.tensor.TensorDesc]
- async predict(payload: List[numpy.ndarray]) List[numpy.ndarray]
- class furiosa.serving.OpenVINOServeModel(app: fastapi.applications.FastAPI, name: str, *, model: bytes, version: Optional[str] = None, description: Optional[str] = None, compiler_config: Optional[Dict] = None, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
furiosa.serving.model.ServeModel
- property config: furiosa.server.settings.ModelConfig
- property inner: furiosa.server.model.Model
- input(name: str) ConstOutput
- property inputs: List[furiosa.runtime.tensor.TensorDesc]
- output(name: str) ConstOutput
- property outputs: List[furiosa.runtime.tensor.TensorDesc]
- async predict(payload: numpy.ndarray) numpy.ndarray
- class furiosa.serving.Processor
Bases:
abc.ABC
- abstract async postprocess(*args: Any, **kwargs: Any) Any
- abstract async preprocess(*args: Any, **kwargs: Any) Any
- class furiosa.serving.ServeAPI(repository: furiosa.server.repository.Repository = <furiosa.server.repository.Repository object>, **kwargs: typing.Any)
Bases:
object
- property app: fastapi.applications.FastAPI
- async load()
- model(device: str) Callable[[Any], Awaitable[furiosa.serving.model.ServeModel]]
- setup_telemetry()
- class furiosa.serving.ServeModel(app: fastapi.applications.FastAPI, name: str, *, preprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None, postprocess: Optional[Callable[[Any, Any], Awaitable[Any]]] = None)
Bases:
abc.ABC
- abstract property config: furiosa.server.settings.ModelConfig
- delete(*args, **kwargs) Callable
- expose()
Expose FastAPI route API endpoint.
- get(*args, **kwargs) Callable
- head(*args, **kwargs) Callable
- hide()
Hide FastAPI route API endpoint.
- abstract property inner: furiosa.server.model.Model
- abstract property inputs: List[furiosa.runtime.tensor.TensorDesc]
- abstract property outputs: List[furiosa.runtime.tensor.TensorDesc]
- patch(*args, **kwargs) Callable
- post(*args, **kwargs) Callable
- async postprocess(*args: Any, **kwargs: Any) Any
- abstract async predict(*args: Any, **kwargs: Any) Any
- async preprocess(*args: Any, **kwargs: Any) Any
- put(*args, **kwargs) Callable
- trace(*args, **kwargs) Callable