mirror of
https://github.com/invoke-ai/InvokeAI
synced 2026-03-02 04:59:06 +01:00
feat(mm): siglip model loading supports partial loading
In the previous commit, the LLaVA model was updated to support partial loading. In this commit, the SigLIP model is updated in the same way. This model is used for FLUX Redux. It's <4GB and only ever run in isolation, so it won't benefit from partial loading for the vast majority of users. Regardless, I think it is best if we make _all_ models work with partial loading. PS: I also fixed the initial load dtype issue, described in the prev commit. It's probably a non-issue for this model, but we may as well fix it.
This commit is contained in:
parent
c054501103
commit
814406d98a
@ -3,6 +3,7 @@ from typing import Literal, Optional
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import SiglipImageProcessor, SiglipVisionModel
|
||||
|
||||
from invokeai.app.invocations.baseinvocation import (
|
||||
BaseInvocation,
|
||||
@ -115,8 +116,14 @@ class FluxReduxInvocation(BaseInvocation):
|
||||
@torch.no_grad()
|
||||
def _siglip_encode(self, context: InvocationContext, image: Image.Image) -> torch.Tensor:
|
||||
siglip_model_config = self._get_siglip_model(context)
|
||||
with context.models.load(siglip_model_config.key).model_on_device() as (_, siglip_pipeline):
|
||||
assert isinstance(siglip_pipeline, SigLipPipeline)
|
||||
with context.models.load(siglip_model_config.key).model_on_device() as (_, model):
|
||||
assert isinstance(model, SiglipVisionModel)
|
||||
|
||||
model_abs_path = context.models.get_absolute_path(siglip_model_config)
|
||||
processor = SiglipImageProcessor.from_pretrained(model_abs_path, local_files_only=True)
|
||||
assert isinstance(processor, SiglipImageProcessor)
|
||||
|
||||
siglip_pipeline = SigLipPipeline(processor, model)
|
||||
return siglip_pipeline.encode_image(
|
||||
x=image, device=TorchDevice.choose_torch_device(), dtype=TorchDevice.choose_torch_dtype()
|
||||
)
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from transformers import SiglipVisionModel
|
||||
|
||||
from invokeai.backend.model_manager.config import (
|
||||
AnyModelConfig,
|
||||
)
|
||||
from invokeai.backend.model_manager.load.load_default import ModelLoader
|
||||
from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
|
||||
from invokeai.backend.model_manager.taxonomy import AnyModel, BaseModelType, ModelFormat, ModelType, SubModelType
|
||||
from invokeai.backend.sig_lip.sig_lip_pipeline import SigLipPipeline
|
||||
|
||||
|
||||
@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.SigLIP, format=ModelFormat.Diffusers)
|
||||
@ -23,6 +24,5 @@ class SigLIPModelLoader(ModelLoader):
|
||||
raise ValueError("Unexpected submodel requested for LLaVA OneVision model.")
|
||||
|
||||
model_path = Path(config.path)
|
||||
model = SigLipPipeline.load_from_path(model_path)
|
||||
model.to(dtype=self._torch_dtype)
|
||||
model = SiglipVisionModel.from_pretrained(model_path, local_files_only=True, torch_dtype=self._torch_dtype)
|
||||
return model
|
||||
@ -16,11 +16,9 @@ from invokeai.backend.image_util.depth_anything.depth_anything_pipeline import D
|
||||
from invokeai.backend.image_util.grounding_dino.grounding_dino_pipeline import GroundingDinoPipeline
|
||||
from invokeai.backend.image_util.segment_anything.segment_anything_pipeline import SegmentAnythingPipeline
|
||||
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
|
||||
from invokeai.backend.llava_onevision_model import LlavaOnevisionModel
|
||||
from invokeai.backend.model_manager.taxonomy import AnyModel
|
||||
from invokeai.backend.onnx.onnx_runtime import IAIOnnxRuntimeModel
|
||||
from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
|
||||
from invokeai.backend.sig_lip.sig_lip_pipeline import SigLipPipeline
|
||||
from invokeai.backend.spandrel_image_to_image_model import SpandrelImageToImageModel
|
||||
from invokeai.backend.textual_inversion import TextualInversionModelRaw
|
||||
from invokeai.backend.util.calc_tensor_size import calc_tensor_size
|
||||
@ -51,8 +49,6 @@ def calc_model_size_by_data(logger: logging.Logger, model: AnyModel) -> int:
|
||||
GroundingDinoPipeline,
|
||||
SegmentAnythingPipeline,
|
||||
DepthAnythingPipeline,
|
||||
SigLipPipeline,
|
||||
LlavaOnevisionModel,
|
||||
),
|
||||
):
|
||||
return model.calc_size()
|
||||
|
||||
@ -1,14 +1,9 @@
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from transformers import SiglipImageProcessor, SiglipVisionModel
|
||||
|
||||
from invokeai.backend.raw_model import RawModel
|
||||
|
||||
|
||||
class SigLipPipeline(RawModel):
|
||||
class SigLipPipeline:
|
||||
"""A wrapper for a SigLIP model + processor."""
|
||||
|
||||
def __init__(
|
||||
@ -19,25 +14,7 @@ class SigLipPipeline(RawModel):
|
||||
self._siglip_processor = siglip_processor
|
||||
self._siglip_model = siglip_model
|
||||
|
||||
@classmethod
|
||||
def load_from_path(cls, path: str | Path):
|
||||
siglip_model = SiglipVisionModel.from_pretrained(path, local_files_only=True)
|
||||
assert isinstance(siglip_model, SiglipVisionModel)
|
||||
siglip_processor = SiglipImageProcessor.from_pretrained(path, local_files_only=True)
|
||||
assert isinstance(siglip_processor, SiglipImageProcessor)
|
||||
return cls(siglip_processor, siglip_model)
|
||||
|
||||
def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None:
|
||||
self._siglip_model.to(device=device, dtype=dtype)
|
||||
|
||||
def encode_image(self, x: Image.Image, device: torch.device, dtype: torch.dtype) -> torch.Tensor:
|
||||
imgs = self._siglip_processor.preprocess(images=[x], do_resize=True, return_tensors="pt", do_convert_rgb=True)
|
||||
encoded_x = self._siglip_model(**imgs.to(device=device, dtype=dtype)).last_hidden_state
|
||||
return encoded_x
|
||||
|
||||
def calc_size(self) -> int:
|
||||
"""Get size of the model in memory in bytes."""
|
||||
# HACK(ryand): Fix this issue with circular imports.
|
||||
from invokeai.backend.model_manager.load.model_util import calc_module_size
|
||||
|
||||
return calc_module_size(self._siglip_model)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user