mirror of
https://github.com/ggerganov/llama.cpp
synced 2026-03-05 06:39:29 +01:00
This commit replaces/merges the inspect-org-model.py script with the contents tensor-info.py script. The merged script has also been updated to also print tensor sizes which was the only thing that was not done before (by tensor-info.py that is). The motivation for this is that tensor-info.py does not load the tensor weights which can be time consuming for larger models. And also now that both are doing almost the same thing it makes sense to just have one and not two scripts to maintain.
236 lines
8.7 KiB
Makefile
236 lines
8.7 KiB
Makefile
MAKEFLAGS += --no-print-directory
|
|
|
|
define validate_model_path
|
|
@if [ -z "$(MODEL_PATH)" ]; then \
|
|
echo "Error: MODEL_PATH must be provided either as:"; \
|
|
echo " 1. Environment variable: export MODEL_PATH=/path/to/model"; \
|
|
echo " 2. Command line argument: make $(1) MODEL_PATH=/path/to/model"; \
|
|
exit 1; \
|
|
fi
|
|
endef
|
|
|
|
define validate_embedding_model_path
|
|
@if [ -z "$(EMBEDDING_MODEL_PATH)" ]; then \
|
|
echo "Error: EMBEDDING_MODEL_PATH must be provided either as:"; \
|
|
echo " 1. Environment variable: export EMBEDDING_MODEL_PATH=/path/to/model"; \
|
|
echo " 2. Command line argument: make $(1) EMBEDDING_MODEL_PATH=/path/to/model"; \
|
|
exit 1; \
|
|
fi
|
|
endef
|
|
|
|
define quantize_model
|
|
@CONVERTED_MODEL="$(1)" QUANTIZED_TYPE="$(QUANTIZED_TYPE)" \
|
|
TOKEN_EMBD_TYPE="$(TOKEN_EMBD_TYPE)" OUTPUT_TYPE="$(OUTPUT_TYPE)" \
|
|
./scripts/utils/quantize.sh "$(1)" "$(QUANTIZED_TYPE)" "$(TOKEN_EMBD_TYPE)" "$(OUTPUT_TYPE)"
|
|
@echo "Export the quantized model path to $(2) variable in your environment"
|
|
endef
|
|
|
|
DEVICE ?= auto
|
|
|
|
###
|
|
### Casual Model targets/recipes
|
|
###
|
|
causal-convert-model-bf16: OUTTYPE=bf16
|
|
causal-convert-model-bf16: causal-convert-model
|
|
|
|
causal-convert-model-debug: DEBUG=--debug
|
|
causal-convert-model-debug: causal-convert-model
|
|
|
|
causal-convert-model:
|
|
$(call validate_model_path,causal-convert-model)
|
|
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
|
|
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
|
|
./scripts/causal/convert-model.sh $(DEBUG)
|
|
|
|
causal-convert-mm-model-bf16: OUTTYPE=bf16
|
|
causal-convert-mm-model-bf16: MM_OUTTYPE=f16
|
|
causal-convert-mm-model-bf16: causal-convert-mm-model
|
|
|
|
causal-convert-mm-model:
|
|
$(call validate_model_path,causal-convert-mm-model)
|
|
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
|
|
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
|
|
./scripts/causal/convert-model.sh
|
|
|
|
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(MM_OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
|
|
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
|
|
./scripts/causal/convert-model.sh --mmproj
|
|
|
|
causal-run-original-model:
|
|
$(call validate_model_path,causal-run-original-model)
|
|
@MODEL_PATH="$(MODEL_PATH)" ./scripts/causal/run-org-model.py --device "$(DEVICE)"
|
|
|
|
causal-run-converted-model:
|
|
@CONVERTED_MODEL="$(CONVERTED_MODEL)" ./scripts/causal/run-converted-model.sh
|
|
|
|
causal-verify-logits: causal-run-original-model causal-run-converted-model
|
|
@MODEL_PATH="$(MODEL_PATH)" ./scripts/causal/compare-logits.py
|
|
@MODEL_PATH="$(MODEL_PATH)" ./scripts/utils/check-nmse.py -m ${MODEL_PATH}
|
|
|
|
causal-run-original-embeddings:
|
|
@./scripts/causal/run-casual-gen-embeddings-org.py
|
|
|
|
causal-run-converted-embeddings:
|
|
@./scripts/causal/run-converted-model-embeddings-logits.sh
|
|
|
|
causal-verify-embeddings: causal-run-original-embeddings causal-run-converted-embeddings
|
|
@./scripts/causal/compare-embeddings-logits.sh
|
|
|
|
causal-inspect-original-model:
|
|
@./scripts/utils/inspect-org-model.py --list-all -s
|
|
|
|
causal-list-original-model-tensors:
|
|
@./scripts/utils/inspect-org-model.py --list-all-short -s
|
|
|
|
causal-inspect-converted-model:
|
|
@./scripts/utils/inspect-converted-model.sh
|
|
|
|
causal-start-embedding-server:
|
|
@./scripts/utils/run-embedding-server.sh ${CONVERTED_MODEL}
|
|
|
|
causal-curl-embedding-endpoint: causal-run-original-embeddings
|
|
@./scripts/utils/curl-embedding-server.sh | ./scripts/causal/compare-embeddings-logits.sh
|
|
|
|
causal-quantize-Q8_0: QUANTIZED_TYPE = Q8_0
|
|
causal-quantize-Q8_0: causal-quantize-model
|
|
|
|
causal-quantize-Q4_0: QUANTIZED_TYPE = Q4_0
|
|
causal-quantize-Q4_0: causal-quantize-model
|
|
|
|
# For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
|
|
# token embedding and output types to Q8_0 instead of the default Q6_K.
|
|
causal-quantize-qat-Q4_0: QUANTIZED_TYPE = Q4_0
|
|
causal-quantize-qat-Q4_0: TOKEN_EMBD_TYPE = Q8_0
|
|
causal-quantize-qat-Q4_0: OUTPUT_TYPE = Q8_0
|
|
causal-quantize-qat-Q4_0: causal-quantize-model
|
|
|
|
causal-quantize-model:
|
|
$(call quantize_model,$(CONVERTED_MODEL),QUANTIZED_MODEL)
|
|
|
|
causal-run-quantized-model:
|
|
@QUANTIZED_MODEL="$(QUANTIZED_MODEL)" ./scripts/causal/run-converted-model.sh ${QUANTIZED_MODEL}
|
|
|
|
|
|
###
|
|
### Embedding Model targets/recipes
|
|
###
|
|
|
|
embedding-convert-model-bf16: OUTTYPE=bf16
|
|
embedding-convert-model-bf16: embedding-convert-model
|
|
|
|
embedding-convert-model:
|
|
$(call validate_embedding_model_path,embedding-convert-model)
|
|
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
|
|
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
|
|
./scripts/embedding/convert-model.sh
|
|
|
|
embedding-convert-model-st:
|
|
$(call validate_embedding_model_path,embedding-convert-model-st)
|
|
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
|
|
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
|
|
./scripts/embedding/convert-model.sh -st
|
|
|
|
embedding-run-original-model:
|
|
$(call validate_embedding_model_path,embedding-run-original-model)
|
|
@EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
|
|
USE_SENTENCE_TRANSFORMERS="$(USE_SENTENCE_TRANSFORMERS)" \
|
|
./scripts/embedding/run-original-model.py \
|
|
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") \
|
|
$(if $(USE_SENTENCE_TRANSFORMERS),--use-sentence-transformers)
|
|
|
|
embedding-run-original-model-st: USE_SENTENCE_TRANSFORMERS=1
|
|
embedding-run-original-model-st: embedding-run-original-model
|
|
|
|
embedding-run-converted-model:
|
|
@./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \
|
|
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") \
|
|
$(if $(EMBD_NORMALIZE),--embd-normalize "$(EMBD_NORMALIZE)")
|
|
|
|
embedding-verify-logits: embedding-run-original-model embedding-run-converted-model
|
|
@./scripts/embedding/compare-embeddings-logits.sh \
|
|
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
|
|
|
|
embedding-verify-logits-st: embedding-run-original-model-st embedding-run-converted-model
|
|
@./scripts/embedding/compare-embeddings-logits.sh \
|
|
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
|
|
|
|
embedding-inspect-original-model:
|
|
$(call validate_embedding_model_path,embedding-inspect-original-model)
|
|
@EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/utils/inspect-org-model.py -m ${EMBEDDING_MODEL_PATH} --list-all -s
|
|
|
|
embedding-inspect-converted-model:
|
|
@CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/utils/inspect-converted-model.sh ${CONVERTED_EMBEDDING_MODEL}
|
|
|
|
embedding-start-embedding-server:
|
|
@./scripts/utils/run-embedding-server.sh ${CONVERTED_EMBEDDING_MODEL}
|
|
|
|
embedding-curl-embedding-endpoint:
|
|
@./scripts/utils/curl-embedding-server.sh | ./scripts/embedding/compare-embeddings-logits.sh
|
|
|
|
embedding-quantize-Q8_0: QUANTIZED_TYPE = Q8_0
|
|
embedding-quantize-Q8_0: embedding-quantize-model
|
|
|
|
embedding-quantize-Q4_0: QUANTIZED_TYPE = Q4_0
|
|
embedding-quantize-Q4_0: embedding-quantize-model
|
|
|
|
# For Quantization Aware Trained (QAT) models in Q4_0 we explicitly set the
|
|
# token embedding and output types to Q8_0 instead of the default Q6_K.
|
|
embedding-quantize-qat-Q4_0: QUANTIZED_TYPE = Q4_0
|
|
embedding-quantize-qat-Q4_0: TOKEN_EMBD_TYPE = Q8_0
|
|
embedding-quantize-qat-Q4_0: OUTPUT_TYPE = Q8_0
|
|
embedding-quantize-qat-Q4_0: embedding-quantize-model
|
|
|
|
embedding-quantize-model:
|
|
$(call quantize_model,$(CONVERTED_EMBEDDING_MODEL),QUANTIZED_EMBEDDING_MODEL)
|
|
|
|
embedding-run-quantized-model:
|
|
@./scripts/embedding/run-converted-model.sh $(QUANTIZED_EMBEDDING_MODEL) \
|
|
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")
|
|
|
|
###
|
|
### Perplexity targets/recipes
|
|
###
|
|
perplexity-data-gen:
|
|
CONVERTED_MODEL="$(CONVERTED_MODEL)" ./scripts/utils/perplexity-gen.sh
|
|
|
|
perplexity-run-full:
|
|
QUANTIZED_MODEL="$(QUANTIZED_MODEL)" LOOGITS_FILE="$(LOGITS_FILE)" \
|
|
./scripts/utils/perplexity-run.sh
|
|
|
|
perplexity-run:
|
|
QUANTIZED_MODEL="$(QUANTIZED_MODEL)" ./scripts/utils/perplexity-run-simple.sh
|
|
|
|
###
|
|
### HuggingFace targets/recipes
|
|
###
|
|
|
|
hf-create-model:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}"
|
|
|
|
hf-create-model-dry-run:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -d
|
|
|
|
hf-create-model-embedding:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e
|
|
|
|
hf-create-model-embedding-dry-run:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e -d
|
|
|
|
hf-create-model-private:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -p
|
|
|
|
hf-upload-gguf-to-model:
|
|
@./scripts/utils/hf-upload-gguf-model.py -m "${MODEL_PATH}" -r "${REPO_ID}" -o "${NAME_IN_REPO}"
|
|
|
|
hf-create-collection:
|
|
@./scripts/utils/hf-create-collection.py -n "${NAME}" -d "${DESCRIPTION}" -ns "${NAMESPACE}"
|
|
|
|
hf-add-model-to-collection:
|
|
@./scripts/utils/hf-add-model-to-collection.py -c "${COLLECTION}" -m "${MODEL}"
|
|
|
|
|
|
.PHONY: clean
|
|
clean:
|
|
@${RM} -rf data .converted_embedding_model.txt .converted_model.txt .embedding_model_name.txt .model_name.txt
|
|
|