vllm.model_executor.models.bert
 
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 output = BertSelfOutput(
    hidden_size=hidden_size,
    layer_norm_eps=layer_norm_eps,
    quant_config=quant_config,
    prefix=f"{prefix}.output",
)
 instance-attribute  ¶
 self = BertSelfAttention(
    hidden_size=hidden_size,
    num_attention_heads=num_attention_heads,
    cache_config=cache_config,
    quant_config=quant_config,
    prefix=f"{prefix}.output",
)
 
 __init__(
    hidden_size: int,
    num_attention_heads: int,
    layer_norm_eps: float,
    cache_config: Optional[CacheConfig] = None,
    quant_config: Optional[QuantizationConfig] = None,
    prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
  
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 position_embeddings = VocabParallelEmbedding(
    max_position_embeddings, hidden_size
)
 instance-attribute  ¶
 token_type_embeddings = VocabParallelEmbedding(
    type_vocab_size, hidden_size
)
 instance-attribute  ¶
 word_embeddings = VocabParallelEmbedding(
    vocab_size, hidden_size
)
 
  Source code in vllm/model_executor/models/bert.py
  
  Source code in vllm/model_executor/models/bert.py
  
  Bases: Module, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
| Name | Type | Description | 
|---|---|---|
| model | An instance of BertModel used for forward operations. | |
| _pooler | An instance of Pooler used for pooling operations. | 
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 model = _build_model(
    vllm_config=vllm_config,
    prefix=maybe_prefix(prefix, "model"),
)
 
 __init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
  
 _build_model(
    vllm_config: VllmConfig, prefix: str = ""
) -> BertModel
 
 _build_pooler(pooler_config: PoolerConfig) -> Pooler
 
 forward(
    input_ids: Tensor,
    positions: Tensor,
    intermediate_tensors: Optional[
        IntermediateTensors
    ] = None,
    inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
  
  Source code in vllm/model_executor/models/bert.py
  
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 layer = ModuleList(
    [
        (
            BertLayer(
                config=config,
                cache_config=cache_config,
                quant_config=quant_config,
                prefix=f"{prefix}.layer.{layer_idx}",
            )
        )
        for layer_idx in (range(num_hidden_layers))
    ]
)
 
 __init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
  
  Bases: Module, SupportsCrossEncoding, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
| Name | Type | Description | 
|---|---|---|
| model | An instance of BertModel used for forward operations. | |
| _pooler | An instance of Pooler used for pooling operations. | 
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 bert = BertPoolingModel(
    vllm_config=vllm_config,
    prefix=maybe_prefix(prefix, "bert"),
    embedding_class=BertEmbedding,
)
 instance-attribute  ¶
 pooler = DispatchPooler(
    {
        "encode": for_encode(pooler_config),
        "classify": ClassifierPooler(
            pooling=pooler,
            classifier=classifier,
            act_fn=act_fn_for_seq_cls(model_config),
        ),
        "score": ClassifierPooler(
            pooling=pooler,
            classifier=classifier,
            act_fn=act_fn_for_cross_encoder(model_config),
        ),
    }
)
 
 __init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
  
 forward(
    input_ids: Optional[Tensor],
    positions: Tensor,
    intermediate_tensors: Optional[
        IntermediateTensors
    ] = None,
    inputs_embeds: Optional[Tensor] = None,
    token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
  
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 dense = ColumnParallelLinear(
    input_size=hidden_size,
    output_size=intermediate_size,
    bias=True,
    quant_config=quant_config,
    prefix=f"{prefix}.dense",
)
 
 __init__(
    hidden_size: int,
    intermediate_size: int,
    hidden_act: str,
    quant_config: Optional[QuantizationConfig] = None,
    prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
  
    
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 attention = BertAttention(
    hidden_size=hidden_size,
    num_attention_heads=num_attention_heads,
    layer_norm_eps=layer_norm_eps,
    cache_config=cache_config,
    quant_config=quant_config,
    prefix=f"{prefix}.attention",
)
 instance-attribute  ¶
 intermediate = BertIntermediate(
    hidden_size=hidden_size,
    intermediate_size=intermediate_size,
    hidden_act=hidden_act,
    quant_config=quant_config,
    prefix=f"{prefix}.intermediate",
)
 instance-attribute  ¶
 output = BertOutput(
    hidden_size=hidden_size,
    intermediate_size=intermediate_size,
    layer_norm_eps=layer_norm_eps,
    quant_config=quant_config,
    prefix=f"{prefix}.output",
)
 
 __init__(
    config: BertConfig,
    cache_config: Optional[CacheConfig] = None,
    quant_config: Optional[QuantizationConfig] = None,
    prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
  
  Bases: Module, SupportsQuant
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 encoder = BertEncoder(
    vllm_config=vllm_config, prefix=f"{prefix}.encoder"
)
 class-attribute instance-attribute  ¶
   
 __init__(
    *,
    vllm_config: VllmConfig,
    prefix: str = "",
    embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
  
  Source code in vllm/model_executor/models/bert.py
  
 forward(
    input_ids: Tensor,
    positions: Tensor,
    intermediate_tensors: Optional[
        IntermediateTensors
    ] = None,
    inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
  
  Source code in vllm/model_executor/models/bert.py
  
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 dense = RowParallelLinear(
    input_size=intermediate_size,
    output_size=hidden_size,
    bias=True,
    quant_config=quant_config,
    prefix=f"{prefix}.dense",
)
 
 __init__(
    hidden_size: int,
    intermediate_size: int,
    layer_norm_eps: float,
    quant_config: Optional[QuantizationConfig] = None,
    prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
  
    
  Bases: Pooler
Source code in vllm/model_executor/models/bert.py
  
    
 forward(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> Union[Tensor, list[Tensor]]
Source code in vllm/model_executor/models/bert.py
  
 get_pooling_updates(
    task: PoolingTask,
) -> PoolingParamsUpdate
 
 get_supported_tasks() -> Set[PoolingTask]
 
  Bases: BertModel
Source code in vllm/model_executor/models/bert.py
  
 __init__(
    *,
    vllm_config: VllmConfig,
    prefix: str = "",
    embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
  
  Source code in vllm/model_executor/models/bert.py
  
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 attn = Attention(
    num_heads=num_heads,
    head_size=head_dim,
    scale=scaling,
    num_kv_heads=num_kv_heads,
    cache_config=cache_config,
    quant_config=quant_config,
    prefix=f"{prefix}.attn",
    attn_type=ENCODER_ONLY,
)
 instance-attribute  ¶
 qkv_proj = QKVParallelLinear(
    hidden_size=hidden_size,
    head_size=head_dim,
    total_num_heads=total_num_heads,
    total_num_kv_heads=total_num_kv_heads,
    bias=True,
    quant_config=quant_config,
    prefix=f"{prefix}.qkv_proj",
)
 
 __init__(
    hidden_size: int,
    num_attention_heads: int,
    cache_config: Optional[CacheConfig] = None,
    quant_config: Optional[QuantizationConfig] = None,
    prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
  
    
  Bases: Module
Source code in vllm/model_executor/models/bert.py
  instance-attribute  ¶
 dense = RowParallelLinear(
    input_size=hidden_size,
    output_size=hidden_size,
    bias=True,
    quant_config=quant_config,
    prefix=f"{prefix}.dense",
)
 
 __init__(
    hidden_size: int,
    layer_norm_eps: float,
    quant_config: Optional[QuantizationConfig] = None,
    prefix: str = "",
)