vllm.model_executor.layers.quantization.utils.marlin_utils
 
 _check_marlin_supported(
    quant_type: ScalarType,
    group_size: Optional[int],
    has_zp: bool,
    device_capability: Optional[int] = None,
) -> tuple[bool, Optional[str]]
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 apply_awq_marlin_linear(
    input: Tensor,
    weight: Tensor,
    weight_scale: Tensor,
    weight_zp: Tensor,
    g_idx: Tensor,
    g_idx_sort_indices: Tensor,
    workspace: Tensor,
    quant_type: ScalarType,
    output_size_per_partition: int,
    input_size_per_partition: int,
    bias: Optional[Tensor] = None,
    use_fp32_reduce: bool = USE_FP32_REDUCE_DEFAULT,
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 apply_gptq_marlin_linear(
    input: Tensor,
    weight: Tensor,
    weight_scale: Tensor,
    weight_zp: Tensor,
    g_idx: Tensor,
    g_idx_sort_indices: Tensor,
    workspace: Tensor,
    wtype: ScalarType,
    output_size_per_partition: int,
    input_size_per_partition: int,
    is_k_full: bool,
    bias: Optional[Tensor] = None,
    use_fp32_reduce: bool = USE_FP32_REDUCE_DEFAULT,
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 awq_to_marlin_zero_points(
    q_zp_packed: Tensor,
    size_k: int,
    size_n: int,
    num_bits: int,
) -> Tensor
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 check_marlin_supported(
    quant_type: ScalarType,
    group_size: int,
    has_zp: bool = False,
    device_capability: Optional[int] = None,
) -> bool
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
   
 check_marlin_supports_layer(
    layer: LinearBase, group_size: int
) -> bool
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 check_marlin_supports_shape(
    output_size_per_partition: int,
    input_size_per_partition: int,
    input_size: int,
    group_size: int,
) -> tuple[bool, Optional[str]]
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 check_moe_marlin_supports_layer(
    layer: LinearBase, group_size: int
) -> bool
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
    
    
    
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 marlin_repeat_scales_on_all_ranks(
    act_order: bool, group_size: int, is_row_parallel: bool
) -> bool
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
    
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 query_marlin_supported_quant_types(
    has_zp: Optional[bool] = None,
    include_fp_type: bool = True,
    device_capability: Optional[int] = None,
)
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
  Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
  
 verify_marlin_supported(
    quant_type: ScalarType,
    group_size: int,
    has_zp: bool = False,
) -> None
Source code in vllm/model_executor/layers/quantization/utils/marlin_utils.py
   
 verify_marlin_supports_shape(
    output_size_per_partition: int,
    input_size_per_partition: int,
    input_size: int,
    group_size: int,
) -> None