vllm.entrypoints.openai.serving_embedding
 
  Bases: OpenAIServing
Source code in vllm/entrypoints/openai/serving_embedding.py
  
 _build_response(
    ctx: ServeContext,
) -> Union[EmbeddingResponse, ErrorResponse]
Source code in vllm/entrypoints/openai/serving_embedding.py
  async  ¶
 _preprocess(ctx: ServeContext) -> Optional[ErrorResponse]
Source code in vllm/entrypoints/openai/serving_embedding.py
  
  Bases: EmbeddingMixin
Source code in vllm/entrypoints/openai/serving_embedding.py
  instance-attribute  ¶
 chat_template_content_format: Final = (
    chat_template_content_format
)
 
 __init__(
    engine_client: EngineClient,
    model_config: ModelConfig,
    models: OpenAIServingModels,
    *,
    request_logger: Optional[RequestLogger],
    chat_template: Optional[str],
    chat_template_content_format: ChatTemplateContentFormatOption,
) -> None
Source code in vllm/entrypoints/openai/serving_embedding.py
  
 _create_pooling_params(
    ctx: ServeContext[EmbeddingRequest],
) -> Union[PoolingParams, ErrorResponse]
Source code in vllm/entrypoints/openai/serving_embedding.py
  
 _validate_request(
    ctx: ServeContext[EmbeddingRequest],
) -> Optional[ErrorResponse]
Source code in vllm/entrypoints/openai/serving_embedding.py
   async  ¶
 create_embedding(
    request: EmbeddingRequest,
    raw_request: Optional[Request] = None,
) -> Union[EmbeddingResponse, ErrorResponse]
Embedding API similar to OpenAI's API.
See https://platform.openai.com/docs/api-reference/embeddings/create for the API specification. This API mimics the OpenAI Embedding API.
Source code in vllm/entrypoints/openai/serving_embedding.py
  
 _get_embedding(
    output: EmbeddingOutput,
    encoding_format: Literal["float", "base64"],
) -> Union[list[float], str]