vllm.transformers_utils.configs.step3_vl
 
  Bases: PretrainedConfig
Source code in vllm/transformers_utils/configs/step3_vl.py
  
 __init__(
    hidden_size: int = 7168,
    intermediate_size: int = 18432,
    num_attention_heads: int = 64,
    num_attention_groups: int = 1,
    num_hidden_layers: int = 61,
    max_seq_len: int = 65536,
    vocab_size: int = 128815,
    rms_norm_eps: float = 1e-05,
    moe_intermediate_size: int = 5120,
    moe_num_experts: int = 48,
    moe_top_k: int = 3,
    rope_theta: float = 500000,
    rope_scaling: Optional[dict[str, Any]] = None,
    max_position_embedding: int = 65536,
    share_expert_dim: int = 5120,
    share_q_dim: int = 2048,
    head_dim: int = 256,
    norm_expert_weight: bool = False,
    moe_layers_enum: tuple[int, ...] = (
        4,
        5,
        6,
        7,
        8,
        9,
        10,
        11,
        12,
        13,
        14,
        15,
        16,
        17,
        18,
        19,
        20,
        21,
        22,
        23,
        24,
        25,
        26,
        27,
        28,
        29,
        30,
        31,
        32,
        33,
        34,
        35,
        36,
        37,
        38,
        39,
        40,
        41,
        42,
        43,
        44,
        45,
        46,
        47,
        48,
        49,
        50,
        51,
        52,
        53,
        54,
        55,
        56,
        57,
        58,
        59,
    ),
    **kwargs,
) -> None
Source code in vllm/transformers_utils/configs/step3_vl.py
  
  Bases: PretrainedConfig
Source code in vllm/transformers_utils/configs/step3_vl.py
  instance-attribute  ¶
   
 __init__(
    vision_config: Optional[
        Union[dict, Step3VisionEncoderConfig]
    ] = None,
    text_config: Optional[
        Union[dict, Step3TextConfig]
    ] = None,
    understand_projector_stride: int = 1,
    projector_bias: bool = True,
    image_token_id: int = 128001,
    **kwargs,
) -> None
Source code in vllm/transformers_utils/configs/step3_vl.py
  
  Bases: PretrainedConfig
Source code in vllm/transformers_utils/configs/step3_vl.py
  
 __init__(
    hidden_size=1792,
    intermediate_size=3072,
    output_hidden_size=4096,
    num_hidden_layers=63,
    num_attention_heads=16,
    num_channels=3,
    image_size=728,
    patch_size=14,
    hidden_act="quick_gelu",
    layer_norm_eps=1e-05,
    **kwargs,
)