Skip to content

rasterizer

DocumentRasterizer

Source code in docprompt/schema/pipeline/rasterizer.py
class DocumentRasterizer:
    def __init__(self, owner: "DocumentNode"):
        self.owner = owner

    def rasterize(
        self,
        name: str,
        *,
        return_mode: Literal["bytes", "pil"] = "bytes",
        dpi: int = 100,
        downscale_size: Optional[Tuple[int, int]] = None,
        resize_mode: ResizeModes = "thumbnail",
        resize_aspect_ratios: Optional[Iterable[AspectRatioRule]] = None,
        do_convert: bool = False,
        image_convert_mode: str = "L",
        do_quantize: bool = False,
        quantize_color_count: int = 8,
        max_file_size_bytes: Optional[int] = None,
        render_grayscale: bool = False,
    ) -> List[Union[bytes, Image.Image]]:
        images = self.owner.document.rasterize_pdf(
            dpi=dpi,
            downscale_size=downscale_size,
            resize_mode=resize_mode,
            resize_aspect_ratios=resize_aspect_ratios,
            do_convert=do_convert,
            image_convert_mode=image_convert_mode,
            do_quantize=do_quantize,
            quantize_color_count=quantize_color_count,
            max_file_size_bytes=max_file_size_bytes,
            render_grayscale=render_grayscale,
            return_mode=return_mode,
        )

        for page_number, image in images.items():
            page_node = self.owner.page_nodes[page_number - 1]

            page_node._raster_cache[name] = image

        return list(images.values())

    def propagate_cache(self, name: str, rasters: Dict[int, Union[bytes, Image.Image]]):
        """
        Should be one-indexed
        """
        for page_number, raster in rasters.items():
            page_node = self.owner.page_nodes[page_number - 1]

            page_node._raster_cache[name] = raster

propagate_cache(name, rasters)

Should be one-indexed

Source code in docprompt/schema/pipeline/rasterizer.py
def propagate_cache(self, name: str, rasters: Dict[int, Union[bytes, Image.Image]]):
    """
    Should be one-indexed
    """
    for page_number, raster in rasters.items():
        page_node = self.owner.page_nodes[page_number - 1]

        page_node._raster_cache[name] = raster