Bases: AttentionMetadataBuilder[Mamba1AttentionMetadata]
  Source code in vllm/v1/attention/backends/mamba1_attn.py
 |  | class Mamba1AttentionMetadataBuilder(
        AttentionMetadataBuilder[Mamba1AttentionMetadata]):
    reorder_batch_threshold: ClassVar[int] = 1
    def __init__(
        self,
        kv_cache_spec: AttentionSpec,
        vllm_config: VllmConfig,
        device: torch.device,
        layer_names: list[str],
    ):
        assert isinstance(kv_cache_spec, MambaSpec)
        self.kv_cache_spec = kv_cache_spec
        self.device = device
        self.vllm_config = vllm_config
        self.layer_names = layer_names
    def build(
        self,
        common_prefix_len: int,
        common_attn_metadata: CommonAttentionMetadata,
        fast_build: bool = False,
    ) -> Mamba1AttentionMetadata:
        query_start_loc = common_attn_metadata.query_start_loc
        state_indices_tensor = common_attn_metadata.block_table_tensor[:, 0]
        context_lens_tensor = common_attn_metadata.num_computed_tokens_cpu.to(
            query_start_loc.device)
        has_initial_states = (context_lens_tensor > 0)
        return Mamba1AttentionMetadata(
            query_start_loc=query_start_loc,
            context_lens_tensor=context_lens_tensor,
            has_initial_states=has_initial_states,
            state_indices_tensor=state_indices_tensor,
        )
 | 
      kv_cache_spec = kv_cache_spec
 
     layer_names = layer_names
 
     reorder_batch_threshold: int = 1
 
     vllm_config = vllm_config
 
        Source code in vllm/v1/attention/backends/mamba1_attn.py
 |  | def __init__(
    self,
    kv_cache_spec: AttentionSpec,
    vllm_config: VllmConfig,
    device: torch.device,
    layer_names: list[str],
):
    assert isinstance(kv_cache_spec, MambaSpec)
    self.kv_cache_spec = kv_cache_spec
    self.device = device
    self.vllm_config = vllm_config
    self.layer_names = layer_names
 | 
           Source code in vllm/v1/attention/backends/mamba1_attn.py
 |  | def build(
    self,
    common_prefix_len: int,
    common_attn_metadata: CommonAttentionMetadata,
    fast_build: bool = False,
) -> Mamba1AttentionMetadata:
    query_start_loc = common_attn_metadata.query_start_loc
    state_indices_tensor = common_attn_metadata.block_table_tensor[:, 0]
    context_lens_tensor = common_attn_metadata.num_computed_tokens_cpu.to(
        query_start_loc.device)
    has_initial_states = (context_lens_tensor > 0)
    return Mamba1AttentionMetadata(
        query_start_loc=query_start_loc,
        context_lens_tensor=context_lens_tensor,
        has_initial_states=has_initial_states,
        state_indices_tensor=state_indices_tensor,
    )
 |