mirror of
https://github.com/invoke-ai/InvokeAI
synced 2026-03-02 21:19:11 +01:00
Fix memory issues when installing models on Windows (#8652)
* Wrap GGUF loader for context managed close() Wrap gguf.GGUFReader and then use a context manager to load memory-mapped GGUF files, so that they will automatically close properly when no longer needed. Should prevent the 'file in use in another process' errors on Windows. * Additional check for cached state_dict Additional check for cached state_dict as path is now optional - should solve model manager 'missing' this and the resultant memory errors. * Appease ruff * Further ruff appeasement * ruff * loaders.py fix for linux No longer attempting to delete internal object. * loaders.py - one more _mmap ref removed --------- Co-authored-by: Lincoln Stein <lincoln.stein@gmail.com>
This commit is contained in:
parent
abcc987f6f
commit
382d85ee23
@ -84,6 +84,9 @@ class ModelOnDisk:
|
||||
|
||||
path = self.resolve_weight_file(path)
|
||||
|
||||
if path in self._state_dict_cache:
|
||||
return self._state_dict_cache[path]
|
||||
|
||||
with SilenceWarnings():
|
||||
if path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")):
|
||||
scan_result = scan_file_path(path)
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
import gc
|
||||
from pathlib import Path
|
||||
|
||||
import gguf
|
||||
@ -5,18 +6,48 @@ import torch
|
||||
|
||||
from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
|
||||
from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES
|
||||
from invokeai.backend.util.logging import InvokeAILogger
|
||||
|
||||
logger = InvokeAILogger.get_logger()
|
||||
|
||||
|
||||
class WrappedGGUFReader:
|
||||
"""Wrapper around GGUFReader that adds a close() method."""
|
||||
|
||||
def __init__(self, path: Path):
|
||||
self.reader = gguf.GGUFReader(path)
|
||||
|
||||
def __enter__(self):
|
||||
return self.reader
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
return False
|
||||
|
||||
def close(self):
|
||||
"""Explicitly close the memory-mapped file."""
|
||||
if hasattr(self.reader, "data"):
|
||||
try:
|
||||
self.reader.data.flush()
|
||||
del self.reader.data
|
||||
except (AttributeError, OSError, ValueError) as e:
|
||||
logger.warning(f"Wasn't able to close GGUF memory map: {e}")
|
||||
del self.reader
|
||||
gc.collect()
|
||||
|
||||
|
||||
def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTensor]:
|
||||
reader = gguf.GGUFReader(path)
|
||||
|
||||
sd: dict[str, GGMLTensor] = {}
|
||||
for tensor in reader.tensors:
|
||||
torch_tensor = torch.from_numpy(tensor.data)
|
||||
shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape)))
|
||||
if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES:
|
||||
torch_tensor = torch_tensor.view(*shape)
|
||||
sd[tensor.name] = GGMLTensor(
|
||||
torch_tensor, ggml_quantization_type=tensor.tensor_type, tensor_shape=shape, compute_dtype=compute_dtype
|
||||
)
|
||||
return sd
|
||||
with WrappedGGUFReader(path) as reader:
|
||||
sd: dict[str, GGMLTensor] = {}
|
||||
for tensor in reader.tensors:
|
||||
torch_tensor = torch.from_numpy(tensor.data)
|
||||
shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape)))
|
||||
if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES:
|
||||
torch_tensor = torch_tensor.view(*shape)
|
||||
sd[tensor.name] = GGMLTensor(
|
||||
torch_tensor,
|
||||
ggml_quantization_type=tensor.tensor_type,
|
||||
tensor_shape=shape,
|
||||
compute_dtype=compute_dtype,
|
||||
)
|
||||
return sd
|
||||
|
||||
Loading…
Reference in New Issue
Block a user