Fix memory issues when installing models on Windows (#8652)

* Wrap GGUF loader for context managed close() Wrap gguf.GGUFReader and then use a context manager to load memory-mapped GGUF files, so that they will automatically close properly when no longer needed. Should prevent the 'file in use in another process' errors on Windows. * Additional check for cached state_dict Additional check for cached state_dict as path is now optional - should solve model manager 'missing' this and the resultant memory errors. * Appease ruff * Further ruff appeasement * ruff * loaders.py fix for linux No longer attempting to delete internal object. * loaders.py - one more _mmap ref removed --------- Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com>
2026-03-02 21:19:11 +01:00 · 2025-11-16 09:25:52 -05:00 · 2025-11-16 09:25:52 -05:00 · 382d85ee23
commit 382d85ee23
parent abcc987f6f
2 changed files with 46 additions and 12 deletions
--- a/invokeai/backend/model_manager/model_on_disk.py
+++ b/invokeai/backend/model_manager/model_on_disk.py
@ -84,6 +84,9 @@ class ModelOnDisk:

        path = self.resolve_weight_file(path)

+        if path in self._state_dict_cache:
+            return self._state_dict_cache[path]
+
        with SilenceWarnings():
            if path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
                scan_result = scan_file_path(path)
--- a/invokeai/backend/quantization/gguf/loaders.py
+++ b/invokeai/backend/quantization/gguf/loaders.py
@ -1,3 +1,4 @@
+import gc
 from pathlib import Path

 import gguf
@ -5,18 +6,48 @@ import torch

 from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
 from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES
+from invokeai.backend.util.logging import InvokeAILogger
+
+logger = InvokeAILogger.get_logger()
+
+
+class WrappedGGUFReader:
+    """Wrapper around GGUFReader that adds a close() method."""
+
+    def __init__(self, path: Path):
+        self.reader = gguf.GGUFReader(path)
+
+    def __enter__(self):
+        return self.reader
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+        return False
+
+    def close(self):
+        """Explicitly close the memory-mapped file."""
+        if hasattr(self.reader, "data"):
+            try:
+                self.reader.data.flush()
+                del self.reader.data
+            except (AttributeError, OSError, ValueError) as e:
+                logger.warning(f"Wasn't able to close GGUF memory map: {e}")
+        del self.reader
+        gc.collect()


 def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTensor]:
-    reader = gguf.GGUFReader(path)
-
-    sd: dict[str, GGMLTensor] = {}
-    for tensor in reader.tensors:
-        torch_tensor = torch.from_numpy(tensor.data)
-        shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape)))
-        if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES:
-            torch_tensor = torch_tensor.view(*shape)
-        sd[tensor.name] = GGMLTensor(
-            torch_tensor, ggml_quantization_type=tensor.tensor_type, tensor_shape=shape, compute_dtype=compute_dtype
-        )
-    return sd
+    with WrappedGGUFReader(path) as reader:
+        sd: dict[str, GGMLTensor] = {}
+        for tensor in reader.tensors:
+            torch_tensor = torch.from_numpy(tensor.data)
+            shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape)))
+            if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES:
+                torch_tensor = torch_tensor.view(*shape)
+            sd[tensor.name] = GGMLTensor(
+                torch_tensor,
+                ggml_quantization_type=tensor.tensor_type,
+                tensor_shape=shape,
+                compute_dtype=compute_dtype,
+            )
+        return sd