Source code for netspresso.np_qai.options.profile

from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional, Union

from netspresso.np_qai.options.common import CommonOptions


[docs]class TfliteDelegates(str, Enum):
    QNN = "qnn"
    QNN_GPU = "qnn-gpu"
    NNAPI = "nnapi"
    NNAPI_GPU = "nnapi-gpu"
    GPU = "gpu"
    XNNPACK = "xnnpack"


[docs]class ExecutionMode(str, Enum):
    SEQUENTIAL = "SEQUENTIAL"
    PARALLEL = "PARALLEL"


[docs]class GraphOptimizationLevel(str, Enum):
    DISABLE_ALL = "DISABLE_ALL"
    ENABLE_BASIC = "ENABLE_BASIC"
    ENABLE_EXTENDED = "ENABLE_EXTENDED"
    ENABLE_ALL = "ENABLE_ALL"


[docs]class OnnxQnnHtpPerformanceMode(str, Enum):
    DEFAULT = "default"
    LOW_POWER_SAVER = "low_power_saver"
    POWER_SAVER = "power_saver"
    HIGH_POWER_SAVER = "high_power_saver"
    LOW_BALANCED = "low_balanced"
    BALANCED = "balanced"
    HIGH_PERFORMANCE = "high_performance"
    SUSTAINED_HIGH_PERFORMANCE = "sustained_high_performance"
    BURST = "burst"


[docs]class OnnxExecutionProviders(str, Enum):
    QNN = "qnn"
    QNN_GPU = "qnn-gpu"
    DIRECTML = "directml"


[docs]class QnnLogLevel(str, Enum):
    K_LOG_OFF = "kLogOff"
    K_LOG_LEVEL_ERROR = "kLogLevelError"
    K_LOG_LEVEL_WARN = "kLogLevelWarn"
    K_LOG_LEVEL_INFO = "kLogLevelInfo"
    K_LOG_LEVEL_VERBOSE = "kLogLevelVerbose"
    K_LOG_LEVEL_DEBUG = "kLogLevelDebug"


[docs]class QnnGraphPriority(str, Enum):
    K_QNN_PRIORITY_DEFAULT = "kQnnPriorityDefault"
    K_QNN_PRIORITY_LOW = "kQnnPriorityLow"
    K_QNN_PRIORITY_NORMAL = "kQnnPriorityNormal"
    K_QNN_PRIORITY_NORMAL_HIGH = "kQnnPriorityNormalHigh"
    K_QNN_PRIORITY_HIGH = "kQnnPriorityHigh"
    K_QNN_PRIORITY_UNDEFINED = "kQnnPriorityUndefined"


[docs]class QnnGpuPrecision(str, Enum):
    K_GPU_USER_PROVIDED = "kGpuUserProvided"
    K_GPU_FP32 = "kGpuFp32"
    K_GPU_FP16 = "kGpuFp16"
    K_GPU_HYBRID = "kGpuHybrid"


[docs]class QnnGpuPerformanceMode(str, Enum):
    K_GPU_DEFAULT = "kGpuDefault"
    K_GPU_HIGH = "kGpuHigh"
    K_GPU_NORMAL = "kGpuNormal"
    K_GPU_LOW = "kGpuLow"


[docs]class QnnDspPerformanceMode(str, Enum):
    K_DSP_LOW_POWER_SAVER = "kDspLowPowerSaver"
    K_DSP_POWER_SAVER = "kDspPowerSaver"
    K_DSP_HIGH_POWER_SAVER = "kDspHighPowerSaver"
    K_DSP_LOW_BALANCED = "kDspLowBalanced"
    K_DSP_BALANCED = "kDspBalanced"
    K_DSP_HIGH_PERFORMANCE = "kDspHighPerformance"
    K_DPS_SUSTAINED_HIGH_PERFORMANCE = "kDspSustainedHighPerformance"
    K_DSP_BURST = "kDspBurst"


[docs]class QnnDspEncoding(str, Enum):
    K_DSP_STATIC = "kDspStatic"
    K_DSP_DYNAMIC = "kDspDynamic"


[docs]class TfliteQnnHtpPerformanceMode(str, Enum):
    K_HTP_LOW_POWER_SAVER = "kHtpLowPowerSaver"
    K_HTP_POWER_SAVER = "kHtpPowerSaver"
    K_HTP_HIGH_POWER_SAVER = "kHtpHighPowerSaver"
    K_HTP_LOW_BALANCED = "kHtpLowBalanced"
    K_HTP_BALANCED = "kHtpBalanced"
    K_HTP_HIGH_PERFORMANCE = "kHtpHighPerformance"
    K_HTP_SUSTAINED_HIGH_PERFORMANCE = "kHtpSustainedHighPerformance"
    K_HTP_BURST = "kHtpBurst"


[docs]class QnnHtpPrecision(str, Enum):
    K_HTP_QUANTIZED = "kHtpQuantized"
    K_HTP_FP16 = "kHtpFp16"


[docs]class QnnHtpOptimizationStrategy(str, Enum):
    K_HTP_OPTIMIZE_FOR_INFERENCE = "kHtpOptimizeForInference"
    K_HTP_OPTIMIZE_FOR_PREPARE = "kHtpOptimizeForPrepare"


[docs]class GpuInferencePreference(str, Enum):
    TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER = "TFLITE_GPU_INFERENCE_PREFERENCE_FAST_SINGLE_ANSWER"
    TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED = "TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED"
    TFLITE_GPU_INFERENCE_PREFERENCE_BALANCED = "TFLITE_GPU_INFERENCE_PREFERENCE_BALANCED"


[docs]class GpuInferencePriority(str, Enum):
    TFLITE_GPU_INFERENCE_PREFERENCE_BALANCED = "TFLITE_GPU_INFERENCE_PREFERENCE_BALANCED"
    TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION = "TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION"
    TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY = "TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY"
    TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE = "TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE"


[docs]class NnapiExecutionPreference(str, Enum):
    K_LOW_POWER = "kLowPower"
    K_FAST_SINGLE_ANSWER = "kFastSingleAnswer"
    K_SUSTAINED_SPEED = "kSustainedSpeed"


[docs]class ContextErrorReportingOptionsLevel(str, Enum):
    BRIEF = "BRIEF"
    DETAILED = "DETAILED"


[docs]class Priority(str, Enum):
    LOW = "LOW"
    NORMAL = "NORMAL"
    NORMAL_HIGH = "NORMAL_HIGH"
    HIGH = "HIGH"


[docs]class ContextGpuPerformanceHint(str, Enum):
    LOW = "LOW"
    NORMAL = "NORMAL"
    HIGH = "HIGH"


[docs]class ContextHtpPerformanceMode(str, Enum):
    EXTREME_POWER_SAVER = "EXTREME_POWER_SAVER"
    LOW_POWER_SAVER = "LOW_POWER_SAVER"
    POWER_SAVER = "POWER_SAVER"
    HIGH_POWER_SAVER = "HIGH_POWER_SAVER"
    LOW_BALANCED = "LOW_BALANCED"
    BALANCED = "BALANCED"
    HIGH_PERFORMANCE = "HIGH_PERFORMANCE"
    SUSTAINED_HIGH_PERFORMANCE = "SUSTAINED_HIGH_PERFORMANCE"
    BURST = "BURST"


[docs]class DefaultGraphGpuPrecision(str, Enum):
    FLOAT32 = "FLOAT32"
    FLOAT16 = "FLOAT16"
    HYBRID = "HYBRID"
    USER_PROVIDED = "USER_PROVIDED"


[docs]class DefaultGraphHtpOptimizationType(str, Enum):
    FINALIZE_OPTIMIZATION_FLAG = "FINALIZE_OPTIMIZATION_FLAG"


[docs]class DefaultGraphHtpPrecision(str, Enum):
    FLOAT16 = "FLOAT16"


[docs]@dataclass
class OnnxOptions:
    execution_mode: Optional[ExecutionMode] = ExecutionMode.SEQUENTIAL
    intra_op_num_threads: Optional[int] = 0
    inter_op_num_threads: Optional[int] = 0
    enable_memory_pattern: Optional[bool] = False
    enable_cpu_memory_arena: Optional[bool] = False
    graph_optimization_level: Optional[GraphOptimizationLevel] = GraphOptimizationLevel.ENABLE_ALL

[docs]    def to_cli_string(self) -> str:
        args = []
        if self.execution_mode is not None:
            args.append(f"execution_mode={self.execution_mode}")
        if self.intra_op_num_threads is not None:
            args.append(f"intra_op_num_threads={self.intra_op_num_threads}")
        if self.inter_op_num_threads is not None:
            args.append(f"inter_op_num_threads={self.inter_op_num_threads}")
        if self.enable_memory_pattern is not None:
            args.append(f"enable_memory_pattern={'true' if self.enable_memory_pattern else 'false'}")
        if self.enable_cpu_memory_arena is not None:
            args.append(f"enable_cpu_memory_arena={'true' if self.enable_cpu_memory_arena else 'false'}")
        if self.graph_optimization_level is not None:
            args.append(f"graph_optimization_level={self.graph_optimization_level}")

        return f"--onnx_options {';'.join(args)}"


[docs]@dataclass
class OnnxQnnOptions(OnnxOptions):
    qnn_htp_performance_mode: Optional[OnnxQnnHtpPerformanceMode] = OnnxQnnHtpPerformanceMode.BURST
    qnn_htp_graph_optimization_mode: Optional[str] = 3
    qnn_enable_htp_fp16_precision: Optional[str] = 1

[docs]    def to_cli_string(self) -> str:
        base_string = super().to_cli_string().split(" ")[1]  # Get base TfliteOptions part
        args = [base_string]
        if self.qnn_htp_performance_mode is not None:
            args.append(f"qnn_htp_performance_mode={self.qnn_htp_performance_mode}")
        if self.qnn_htp_graph_optimization_mode is not None:
            args.append(f"qnn_htp_graph_optimization_mode={self.qnn_htp_graph_optimization_mode}")
        if self.qnn_enable_htp_fp16_precision is not None:
            args.append(f"qnn_enable_htp_fp16_precision={self.qnn_enable_htp_fp16_precision}")

        return f"--onnx_options {';'.join(args)}"


[docs]@dataclass
class TfliteOptions:
    enable_fallback: Optional[bool] = True
    invoke_interpreter_on_cold_load: Optional[bool] = False
    allow_fp32_as_fp16: Optional[bool] = True
    force_opengl: Optional[bool] = False
    number_of_threads: Optional[int] = -1
    release_dynamic_tensors: Optional[bool] = False

[docs]    def to_cli_string(self) -> str:
        args = []
        if self.enable_fallback is not None:
            args.append(f"enable_fallback={'true' if self.enable_fallback else 'false'}")
        if self.invoke_interpreter_on_cold_load is not None:
            args.append(
                f"invoke_interpreter_on_cold_load={'true' if self.invoke_interpreter_on_cold_load else 'false'}"
            )
        if self.allow_fp32_as_fp16 is not None:
            args.append(f"allow_fp32_as_fp16={'true' if self.allow_fp32_as_fp16 else 'false'}")
        if self.force_opengl is not None:
            args.append(f"force_opengl={'true' if self.force_opengl else 'false'}")
        if self.number_of_threads is not None:
            args.append(f"number_of_threads={self.number_of_threads}")
        if self.release_dynamic_tensors is not None:
            args.append(f"release_dynamic_tensors={'true' if self.release_dynamic_tensors else 'false'}")

        return f"--tflite_options {';'.join(args)}"


[docs]@dataclass
class TfliteQnnOptions(TfliteOptions):
    qnn_log_level: Optional[QnnLogLevel] = QnnLogLevel.K_LOG_LEVEL_WARN
    qnn_graph_priority: Optional[QnnGraphPriority] = QnnGraphPriority.K_QNN_PRIORITY_DEFAULT
    qnn_gpu_precision: Optional[QnnGpuPrecision] = QnnGpuPrecision.K_GPU_FP16
    qnn_gpu_performance_mode: Optional[QnnGpuPerformanceMode] = QnnGpuPerformanceMode.K_GPU_HIGH
    qnn_dsp_performance_mode: Optional[QnnDspPerformanceMode] = QnnDspPerformanceMode.K_DSP_BURST
    qnn_dsp_encoding: Optional[QnnDspEncoding] = QnnDspEncoding.K_DSP_STATIC
    qnn_htp_performance_mode: Optional[TfliteQnnHtpPerformanceMode] = TfliteQnnHtpPerformanceMode.K_HTP_BURST
    qnn_htp_precision: Optional[QnnHtpPrecision] = QnnHtpPrecision.K_HTP_FP16
    qnn_htp_optimization_strategy: Optional[QnnHtpOptimizationStrategy] = (
        QnnHtpOptimizationStrategy.K_HTP_OPTIMIZE_FOR_INFERENCE
    )
    qnn_htp_use_conv_hmx: Optional[bool] = True
    qnn_htp_use_fold_relu: Optional[bool] = False
    qnn_htp_vtcm_size: Optional[int] = None
    qnn_htp_num_hvx_threads: Optional[int] = None

[docs]    def to_cli_string(self) -> str:
        base_string = super().to_cli_string().split(" ")[1]  # Get base TfliteOptions part
        args = [base_string]
        if self.qnn_log_level is not None:
            args.append(f"qnn_log_level={self.qnn_log_level.value}")
        if self.qnn_graph_priority is not None:
            args.append(f"qnn_graph_priority={self.qnn_graph_priority.value}")
        if self.qnn_gpu_precision is not None:
            args.append(f"qnn_gpu_precision={self.qnn_gpu_precision.value}")
        if self.qnn_gpu_performance_mode is not None:
            args.append(f"qnn_gpu_performance_mode={self.qnn_gpu_performance_mode.value}")
        if self.qnn_dsp_performance_mode is not None:
            args.append(f"qnn_dsp_performance_mode={self.qnn_dsp_performance_mode.value}")
        if self.qnn_dsp_encoding is not None:
            args.append(f"qnn_dsp_encoding={self.qnn_dsp_encoding.value}")
        if self.qnn_htp_performance_mode is not None:
            args.append(f"qnn_htp_performance_mode={self.qnn_htp_performance_mode.value}")
        if self.qnn_htp_precision is not None:
            args.append(f"qnn_htp_precision={self.qnn_htp_precision.value}")
        if self.qnn_htp_optimization_strategy is not None:
            args.append(f"qnn_htp_optimization_strategy={self.qnn_htp_optimization_strategy.value}")
        if self.qnn_htp_use_conv_hmx is not None:
            args.append(f"qnn_htp_use_conv_hmx={'true' if self.qnn_htp_use_conv_hmx else 'false'}")
        if self.qnn_htp_use_fold_relu is not None:
            args.append(f"qnn_htp_use_fold_relu={'true' if self.qnn_htp_use_fold_relu else 'false'}")
        if self.qnn_htp_vtcm_size is not None:
            args.append(f"qnn_htp_vtcm_size={self.qnn_htp_vtcm_size}")
        if self.qnn_htp_num_hvx_threads is not None:
            args.append(f"qnn_htp_num_hvx_threads={self.qnn_htp_num_hvx_threads}")

        return f"--tflite_options {';'.join(args)}"


[docs]@dataclass
class TfliteGpuv2Options(TfliteOptions):
    gpu_inference_preference: Optional[GpuInferencePreference] = (
        GpuInferencePreference.TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED
    )
    gpu_inference_priority1: Optional[GpuInferencePriority] = (
        GpuInferencePriority.TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY
    )
    gpu_inference_priority2: Optional[GpuInferencePriority] = (
        GpuInferencePriority.TFLITE_GPU_INFERENCE_PRIORITY_MIN_MEMORY_USAGE
    )
    gpu_inference_priority3: Optional[GpuInferencePriority] = (
        GpuInferencePriority.TFLITE_GPU_INFERENCE_PRIORITY_MAX_PRECISION
    )
    gpu_max_delegated_partitions: Optional[int] = 1

[docs]    def to_cli_string(self) -> str:
        base_string = super().to_cli_string().split(" ")[1]  # Get base TfliteOptions part
        args = [base_string]
        if self.gpu_inference_preference is not None:
            args.append(f"gpu_inference_preference={self.gpu_inference_preference.value}")
        if self.gpu_inference_priority1 is not None:
            args.append(f"gpu_inference_priority1={self.gpu_inference_priority1.value}")
        if self.gpu_inference_priority2 is not None:
            args.append(f"gpu_inference_priority2={self.gpu_inference_priority2.value}")
        if self.gpu_inference_priority3 is not None:
            args.append(f"gpu_inference_priority3={self.gpu_inference_priority3.value}")
        if self.gpu_max_delegated_partitions is not None:
            args.append(f"gpu_max_delegated_partitions={self.gpu_max_delegated_partitions}")

        return f"--tflite_options {';'.join(args)}"


[docs]@dataclass
class TfliteNnapiOptions(TfliteOptions):
    nnapi_execution_preference: Optional[NnapiExecutionPreference] = NnapiExecutionPreference.K_SUSTAINED_SPEED
    nnapi_max_number_delegated_partitions: Optional[int] = 3
    nnapi_allow_fp16: Optional[bool] = True

[docs]    def to_cli_string(self) -> str:
        base_string = super().to_cli_string().split(" ")[1]  # Get base TfliteOptions part
        args = [base_string]
        if self.nnapi_execution_preference is not None:
            args.append(f"nnapi_execution_preference={self.nnapi_execution_preference.value}")
        if self.nnapi_max_number_delegated_partitions is not None:
            args.append(f"nnapi_max_number_delegated_partitions={self.nnapi_max_number_delegated_partitions}")
        if self.nnapi_allow_fp16 is not None:
            args.append(f"nnapi_allow_fp16={'true' if self.nnapi_allow_fp16 else 'false'}")

        return f"--tflite_options {';'.join(args)}"


[docs]@dataclass
class QnnOptions:
    default_graph_htp_optimization_value: Optional[int] = True
    context_async_execution_queue_depth_numeric: Optional[int] = None
    context_enable_graphs: Optional[List[str]] = None
    context_error_reporting_options_level: Optional[ContextErrorReportingOptionsLevel] = None
    context_error_reporting_options_storage_limit: Optional[int] = None
    context_memory_limit_hint: Optional[int] = None
    context_priority: Optional[Priority] = None
    context_gpu_performance_hint: Optional[ContextGpuPerformanceHint] = ContextGpuPerformanceHint.HIGH
    context_gpu_use_gl_buffers: Optional[bool] = None
    context_htp_performance_mode: Optional[ContextHtpPerformanceMode] = ContextHtpPerformanceMode.BURST
    default_graph_priority: Optional[Priority] = True
    default_graph_gpu_precision: Optional[DefaultGraphGpuPrecision] = DefaultGraphGpuPrecision.USER_PROVIDED
    default_graph_gpu_disable_memory_optimizations: Optional[bool] = None
    default_graph_gpu_disable_node_optimizations: Optional[bool] = None
    default_graph_gpu_disable_queue_recording: Optional[bool] = None
    default_graph_htp_disable_fold_relu_activation_into_conv: Optional[bool] = False
    default_graph_htp_num_hvx_threads: Optional[int] = 4
    default_graph_htp_optimization_type: Optional[DefaultGraphHtpOptimizationType] = (
        DefaultGraphHtpOptimizationType.FINALIZE_OPTIMIZATION_FLAG
    )
    default_graph_htp_optimization_value: Optional[int] = field(default=None, metadata={"valid_values": [1, 2, 3]})
    default_graph_htp_precision: Optional[DefaultGraphHtpPrecision] = DefaultGraphHtpPrecision.FLOAT16
    default_graph_htp_disable_short_depth_conv_on_hmx: Optional[bool] = False
    default_graph_htp_vtcm_size: Optional[int] = 4

    def __post_init__(self):
        valid_values = self.__dataclass_fields__["default_graph_htp_optimization_value"].metadata["valid_values"]
        if (
            self.default_graph_htp_optimization_value is not None
            and self.default_graph_htp_optimization_value not in valid_values
        ):
            raise ValueError(
                f"default_graph_htp_optimization_value must be one of {valid_values}, "
                f"got {self.default_graph_htp_optimization_value}"
            )

[docs]    def to_cli_string(self) -> str:
        args = []
        if self.default_graph_htp_optimization_value is not None:
            args.append(f"default_graph_htp_optimization_value={self.default_graph_htp_optimization_value}")
        if self.context_async_execution_queue_depth_numeric is not None:
            args.append(
                f"context_async_execution_queue_depth_numeric={self.context_async_execution_queue_depth_numeric}"
            )
        if self.context_enable_graphs is not None:
            args.append(f"context_enable_graphs={','.join(self.context_enable_graphs)}")
        if self.context_error_reporting_options_level is not None:
            args.append(f"context_error_reporting_options_level={self.context_error_reporting_options_level}")
        if self.context_error_reporting_options_storage_limit is not None:
            args.append(
                f"context_error_reporting_options_storage_limit={self.context_error_reporting_options_storage_limit}"
            )
        if self.context_memory_limit_hint is not None:
            args.append(f"context_memory_limit_hint={self.context_memory_limit_hint}")
        if self.context_priority is not None:
            args.append(f"context_priority={self.context_priority}")
        if self.context_gpu_performance_hint is not None:
            args.append(f"context_gpu_performance_hint={self.context_gpu_performance_hint}")
        if self.context_gpu_use_gl_buffers is not None:
            args.append(f"context_gpu_use_gl_buffers={'true' if self.context_gpu_use_gl_buffers else 'false'}")
        if self.context_htp_performance_mode is not None:
            args.append(f"context_htp_performance_mode={self.context_htp_performance_mode}")
        if self.default_graph_priority is not None:
            args.append(f"default_graph_priority={self.default_graph_priority}")
        if self.default_graph_gpu_precision is not None:
            args.append(f"default_graph_gpu_precision={self.default_graph_gpu_precision}")
        if self.default_graph_gpu_disable_memory_optimizations is not None:
            args.append(
                f"default_graph_gpu_disable_memory_optimizations={'true' if self.default_graph_gpu_disable_memory_optimizations else 'false'}"
            )
        if self.default_graph_gpu_disable_node_optimizations is not None:
            args.append(
                f"default_graph_gpu_disable_node_optimizations={'true' if self.default_graph_gpu_disable_node_optimizations else 'false'}"
            )
        if self.default_graph_gpu_disable_queue_recording is not None:
            args.append(
                f"default_graph_gpu_disable_queue_recording={'true' if self.default_graph_gpu_disable_queue_recording else 'false'}"
            )
        if self.default_graph_htp_disable_fold_relu_activation_into_conv is not None:
            args.append(
                f"default_graph_htp_disable_fold_relu_activation_into_conv={'true' if self.default_graph_htp_disable_fold_relu_activation_into_conv else 'false'}"
            )
        if self.default_graph_htp_num_hvx_threads is not None:
            args.append(f"default_graph_htp_num_hvx_threads={self.default_graph_htp_num_hvx_threads}")
        if self.default_graph_htp_optimization_type is not None:
            args.append(f"default_graph_htp_optimization_type={self.default_graph_htp_optimization_type}")
        if self.default_graph_htp_precision is not None:
            args.append(f"default_graph_htp_precision={self.default_graph_htp_precision}")
        if self.default_graph_htp_disable_short_depth_conv_on_hmx is not None:
            args.append(
                f"default_graph_htp_disable_short_depth_conv_on_hmx={'true' if self.default_graph_htp_disable_short_depth_conv_on_hmx else 'false'}"
            )
        if self.default_graph_htp_vtcm_size is not None:
            args.append(f"default_graph_htp_vtcm_size={self.default_graph_htp_vtcm_size}")

        return f"--qnn_options {';'.join(args)}"


[docs]@dataclass
class ProfileCommonOptions(CommonOptions):
    dequantize_outputs: Optional[bool] = True
    tflite_delegates: Optional[List[TfliteDelegates]] = None
    tflite_options: Optional[Union[TfliteOptions, TfliteQnnOptions, TfliteGpuv2Options, TfliteNnapiOptions]] = None
    qnn_options: Optional[QnnOptions] = None
    onnx_options: Optional[Union[OnnxOptions, OnnxQnnOptions]] = None
    onnx_execution_providers: Optional[List[OnnxExecutionProviders]] = None
    max_profiler_iterations: Optional[int] = 100
    max_profiler_time: Optional[int] = 600

[docs]    def handle_tflite_options(self) -> str:
        if isinstance(self.tflite_options, (TfliteOptions, TfliteQnnOptions, TfliteGpuv2Options, TfliteNnapiOptions)):
            return self.tflite_options.to_cli_string()
        else:
            return str(self.tflite_options)

[docs]    def handle_onnx_options(self) -> str:
        if isinstance(self.onnx_options, (OnnxOptions, OnnxQnnOptions)):
            return self.onnx_options.to_cli_string()
        else:
            return str(self.onnx_options)

[docs]    def handle_qnn_options(self) -> str:
        if isinstance(self.qnn_options, QnnOptions):
            return self.qnn_options.to_cli_string()
        else:
            return str(self.qnn_options)

[docs]    def handle_common_options(self) -> List[str]:
        args = []
        if self.compute_unit is not None:
            compute_units = ",".join(list(self.compute_unit))
            args.append(f"--compute_unit {compute_units}")
        if self.dequantize_outputs:
            args.append("--dequantize_outputs")
        if self.tflite_delegates is not None:
            tflite_delegates = ",".join(list(self.tflite_delegates))
            args.append(f"--tflite_delegates {tflite_delegates}")
        if self.tflite_options is not None:
            args.append(self.handle_tflite_options())
        if self.onnx_options is not None:
            args.append(self.handle_onnx_options())
        if self.qnn_options is not None:
            args.append(self.handle_qnn_options())
        if self.onnx_execution_providers is not None:
            onnx_execution_providers = ",".join((self.onnx_execution_providers))
            args.append(f"--onnx_execution_providers {onnx_execution_providers}")
        if self.max_profiler_iterations is not None:
            args.append(f"--max_profiler_iterations {self.max_profiler_iterations}")
        if self.max_profiler_time is not None:
            args.append(f"--max_profiler_time {self.max_profiler_time}")
        return args

[docs]    def to_cli_string(self) -> str:
        args = self.handle_common_options()
        return " ".join(args)

[docs]@dataclass
class ProfileOptions(ProfileCommonOptions):
    """
    Profile options for the model.

    Note:
        For details, see `ProfileOptions in QAI Hub API <https://app.aihub.qualcomm.com/docs/hub/api.html#profile-inference-options>`_.
    """

    pass


[docs]@dataclass
class InferenceOptions(ProfileCommonOptions):
    """
    Inference options for the model.

    Note:
        For details, see `InferenceOptions in QAI Hub API <https://app.aihub.qualcomm.com/docs/hub/api.html#profile-inference-options>`_.
    """

    pass