Skip to content

rasterize

estimate_png_byte_size(image, assummed_compression_ratio=4.0, overhead_bytes=1024)

Provides an estimate of the size of a PNG image given the uncompressed size and an assumed compression ratio.

The default compression ratio of 4.0 is based on the assumption that the image is a document, and represents a pessimistic estimate.

Source code in docprompt/rasterize.py
def estimate_png_byte_size(
    image: Image.Image,
    assummed_compression_ratio: float = 4.0,
    overhead_bytes: int = 1024,
) -> int:
    """
    Provides an estimate of the size of a PNG image given the uncompressed size and an assumed compression ratio.

    The default compression ratio of 4.0 is based on the assumption that the image is a document, and represents a
    pessimistic estimate.
    """
    width, height = image.size
    mode = image.mode

    # Determine bytes per pixel based on image mode
    if mode == "1":
        bytes_per_pixel = 1 / 8  # 1 bit per pixel
    elif mode == "L":
        bytes_per_pixel = 1  # 1 byte per pixel
    elif mode == "LA":
        bytes_per_pixel = 2  # 2 bytes per pixel
    elif mode == "RGB":
        bytes_per_pixel = 3  # 3 bytes per pixel
    elif mode == "RGBA":
        bytes_per_pixel = 4  # 4 bytes per pixel
    else:
        raise ValueError(f"Unsupported image mode: {mode}")

    uncompressed_size = width * height * bytes_per_pixel
    compressed_size = uncompressed_size / assummed_compression_ratio

    return int(compressed_size + overhead_bytes)

mask_image_from_bboxes(image, bboxes, *, mask_color='black')

Given a set of normalized bounding boxes, masks the image. :param image: PIL Image object or bytes object representing an image. :param bboxes: Iterable of NormBBox objects. :param mask_color: Color used for the mask, can be a string (e.g., "black") or a tuple (e.g., (0, 0, 0)).

Source code in docprompt/rasterize.py
def mask_image_from_bboxes(
    image: PILOrBytes,
    bboxes: Iterable[NormBBox],
    *,
    mask_color: Union[str, int] = "black",
):
    """
    Given a set of normalized bounding boxes, masks the image.
    :param image: PIL Image object or bytes object representing an image.
    :param bboxes: Iterable of NormBBox objects.
    :param mask_color: Color used for the mask, can be a string (e.g., "black") or a tuple (e.g., (0, 0, 0)).
    """
    # Convert bytes image to PIL Image if necessary
    if isinstance(image, bytes):
        image = load_image_from_bytes(image)

    # Get image dimensions
    width, height = image.size

    # Create a drawing context
    draw = ImageDraw.Draw(image)

    # Draw rectangles over the specified bounding boxes
    for bbox in bboxes:
        # Convert normalized coordinates to absolute coordinates
        absolute_bbox = (
            bbox.x0 * width,
            bbox.top * height,
            bbox.x1 * width,
            bbox.bottom * height,
        )
        # Draw rectangle
        draw.rectangle(absolute_bbox, fill=mask_color)

    return image

resize_image_to_fize_size_limit(image, max_file_size_bytes, *, resize_mode='thumbnail', resize_step_size=0.1, allow_channel_reduction=True, image_convert_mode='L')

Incrementally resizes an image until it is under a certain file size

Source code in docprompt/rasterize.py
def resize_image_to_fize_size_limit(
    image: PILOrBytes,
    max_file_size_bytes: int,
    *,
    resize_mode: ResizeModes = "thumbnail",
    resize_step_size: float = 0.1,
    allow_channel_reduction: bool = True,
    image_convert_mode: str = "L",
) -> Image.Image:
    """
    Incrementally resizes an image until it is under a certain file size
    """
    if resize_step_size <= 0 or resize_step_size >= 0.5:
        raise ValueError("resize_step_size must be between 0 and 0.5")

    if isinstance(image, bytes):
        image = load_image_from_bytes(image)

    estimated_bytes = estimate_png_byte_size(image)

    if estimated_bytes < max_file_size_bytes:
        return image

    # Convert image to the desired mode if it has multiple channels
    if allow_channel_reduction and image.mode in ["LA", "RGBA"]:
        image = image.convert(image_convert_mode)

        if estimate_png_byte_size(image) < max_file_size_bytes:
            return image

    step_count = 0
    working_image = image.copy()

    while estimated_bytes > max_file_size_bytes:
        new_width = int(image.width * (1 - resize_step_size * step_count))
        new_height = int(image.height * (1 - resize_step_size * step_count))

        if new_width <= 200 or new_height <= 200:
            logger.warning(
                f"Image could not be resized to under {max_file_size_bytes} bytes. Reached {estimated_bytes} bytes."
            )
            break

        if resize_mode == "thumbnail":
            working_image.thumbnail((new_width, new_height))
        elif resize_mode == "resize":
            working_image = working_image.resize((new_width, new_height))

        estimated_bytes = estimate_png_byte_size(working_image)

        if estimated_bytes < max_file_size_bytes:
            return working_image

        step_count += 1

    return working_image