Quantize Model

quantize_model(self, input_model_path: str | ~pathlib.Path, output_dir: str, weights_dtype: ~qai_hub.client.QuantizeDtype, activations_dtype: ~qai_hub.client.QuantizeDtype, options: ~netspresso.np_qai.options.quantize.QuantizeOptions | str = QuantizeOptions(range_scheme=<RangeScheme.AUTO: 'auto'>), job_name: str | None = None, calibration_data: ~qai_hub.client.Dataset | ~typing.Mapping[str, ~typing.List[~numpy.ndarray]] | str | None = None) → NPQAIQuantizerMetadata | List[NPQAIQuantizerMetadata]

Quantize a model in the QAI hub.

Parameters:

input_model_path – The path to the input model.
output_dir – The directory to save the quantized model.
weights_dtype – The data type to use for the weights.
activations_dtype – The data type to use for the activations.
options – The options to use for the quantization.
job_name – The name of the job.
calibration_data – The calibration data to use for the quantization.

Returns:

Returns a quantizer metadata object if successful.

Return type:

Union[NPQAIQuantizerMetadata, List[NPQAIQuantizerMetadata]]

Note

For details, see submit_quantize_job in QAI Hub API.

Example

from netspresso import NPQAI
from netspresso.np_qai import Device
from netspresso.np_qai.options import QuantizePrecision

QAI_HUB_API_TOKEN = "YOUR_QAI_HUB_API_TOKEN"
np_qai = NPQAI(api_token=QAI_HUB_API_TOKEN)

quantizer = np_qai.quantizer()

INPUT_MODEL_PATH = "YOUR_INPUT_MODEL_PATH"
OUTPUT_DIR = "YOUR_OUTPUT_DIR"
JOB_NAME = "YOUR_JOB_NAME"
DEVICE_NAME = "YOUR_DEVICE_NAME"

CALIBRATION_DATA = {"images": inputs_array}

quantized_result = quantizer.quantize_model(
    input_model_path=INPUT_MODEL_PATH,
    output_dir=OUTPUT_DIR,
    calibration_data=CALIBRATION_DATA,
    weights_dtype=QuantizePrecision.INT8,
    activations_dtype=QuantizePrecision.INT8,
    job_name=JOB_NAME,
)
print("Quantization task started")

# Monitor task status
while True:
    status = quantizer.get_quantize_task_status(quantized_result.quantize_info.quantize_task_uuid)
    if status.finished:
        quantized_result = quantizer.update_quantize_task(quantized_result)
        print("Quantization task completed")
        break
    else:
        print("Quantization task is still running")

Create Calibration Datasets

Using good, representative input samples for calibration helps improve performance on target hardware and retains model accuracy

import cv2
from glob import glob

import numpy as np
from netspresso.inferencer.preprocessors.base import Preprocessor

IMG_SIZE = 640
preprocess_list = [
    {
        "name": "resize",
        "size": IMG_SIZE,
        "interpolation": "bilinear",
        "max_size": None,
        "resize_criteria": "long",
    },
    {
        "name": "pad",
        "size": IMG_SIZE,
        "fill": 114,
    }
]

preprocessor = Preprocessor(preprocess_list)

DATASET_PATH = "YOUR_DATASET_PATH"
NUM_DATASET = 100
image_paths = glob(f"{DATASET_PATH}/*.jpg")[:NUM_DATASET]

inputs_array = []

for image_path in image_paths:
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = preprocessor(img)
    img = np.transpose(img, (0, 3, 1, 2))
    inputs_array.append(img)

>> np.array(inputs_array).shape
(100, 1, 3, 512, 512)