ggml/examples/python/example_test_all_quants.py
ochafik 53a0003018 Add python example w/ cffi-generated bindings
Add python example w/ cffi-generated bindings

Features:

- Seamless copies between tensors (ggml & numpy alike) with automatic (de/re)quantization
- Access to full C API (incl. CUDA, MPI, OpenCL, Metal, alloc... and any local API changes)
- Trivial regeneration with `python regenerate.py` (uses llama.cpp headers by default, README.md for options)
2023-08-13 20:05:13 +01:00

68 lines
1.9 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from ggml import ffi, lib
from ggml.utils import init, numpy, copy
import numpy as np
from math import pi, cos, sin, ceil
import matplotlib.pyplot as plt
ctx = init(mem_size=100*1024*1024) # Will be auto-GC'd
n = 256
orig = np.array([
[
cos(j * 2 * pi / n) * (sin(i * 2 * pi / n))
for j in range(n)
]
for i in range(n)
], np.float32)
orig_tensor = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, n, n)
copy(orig, orig_tensor)
quants = [
type for type in range(lib.GGML_TYPE_COUNT)
if lib.ggml_is_quantized(type) and
type not in [lib.GGML_TYPE_Q8_1, lib.GGML_TYPE_Q8_K] # Apparently not supported
]
# quants = [lib.GGML_TYPE_Q2_K] # Test a single one
def get_name(type):
name = lib.ggml_type_name(type)
return ffi.string(name).decode('utf-8') if name else '?'
quants.sort(key=get_name)
quants.insert(0, None)
print(quants)
ncols=4
nrows = ceil(len(quants) / ncols)
plt.figure(figsize=(ncols * 5, nrows * 5), layout='tight')
for i, type in enumerate(quants):
plt.subplot(nrows, ncols, i + 1)
try:
if type == None:
plt.title('Original')
plt.imshow(orig)
else:
quantized_tensor = lib.ggml_new_tensor_2d(ctx, type, n, n)
copy(orig_tensor, quantized_tensor)
quantized = numpy(quantized_tensor, allow_copy=True)
d = quantized - orig
results = {
"l2": np.linalg.norm(d, 2),
"linf": np.linalg.norm(d, np.inf),
"compression":
round(lib.ggml_nbytes(orig_tensor) /
lib.ggml_nbytes(quantized_tensor), 1)
}
name = get_name(type)
print(f'{name}: {results}')
plt.title(f'{name} ({results["compression"]}x smaller)')
plt.imshow(quantized, interpolation='nearest')
except Exception as e:
print(f'Error: {e}')
plt.show()