mirror of
https://github.com/ggerganov/llama.cpp
synced 2026-03-12 10:10:43 +01:00
* Adding --direct-io flag for model loading * Fixing read_raw() calls * Fixing Windows read_raw_at * Changing type off_t to size_t for windows and Renaming functions * disable direct io when mmap is explicitly enabled * Use read_raw_unsafe when upload_backend is available, not functional on some devices with Vulkan and SYCL * Fallback to std::fread in case O_DIRECT fails due to bad address * Windows: remove const keywords and unused functions * Update src/llama-mmap.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: jtischbein <jtischbein@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
74 lines
1.6 KiB
C++
74 lines
1.6 KiB
C++
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <cstdio>
|
|
|
|
struct llama_file;
|
|
struct llama_mmap;
|
|
struct llama_mlock;
|
|
|
|
using llama_files = std::vector<std::unique_ptr<llama_file>>;
|
|
using llama_mmaps = std::vector<std::unique_ptr<llama_mmap>>;
|
|
using llama_mlocks = std::vector<std::unique_ptr<llama_mlock>>;
|
|
|
|
struct llama_file {
|
|
llama_file(const char * fname, const char * mode, bool use_direct_io = false);
|
|
~llama_file();
|
|
|
|
size_t tell() const;
|
|
size_t size() const;
|
|
|
|
int file_id() const; // fileno overload
|
|
|
|
void seek(size_t offset, int whence) const;
|
|
|
|
void read_raw(void * ptr, size_t len);
|
|
void read_raw_unsafe(void * ptr, size_t len);
|
|
void read_aligned_chunk(void * dest, size_t size);
|
|
uint32_t read_u32();
|
|
|
|
void write_raw(const void * ptr, size_t len) const;
|
|
void write_u32(uint32_t val) const;
|
|
|
|
size_t read_alignment() const;
|
|
bool has_direct_io() const;
|
|
private:
|
|
struct impl;
|
|
std::unique_ptr<impl> pimpl;
|
|
};
|
|
|
|
struct llama_mmap {
|
|
llama_mmap(const llama_mmap &) = delete;
|
|
llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1, bool numa = false);
|
|
~llama_mmap();
|
|
|
|
size_t size() const;
|
|
void * addr() const;
|
|
|
|
void unmap_fragment(size_t first, size_t last);
|
|
|
|
static const bool SUPPORTED;
|
|
|
|
private:
|
|
struct impl;
|
|
std::unique_ptr<impl> pimpl;
|
|
};
|
|
|
|
struct llama_mlock {
|
|
llama_mlock();
|
|
~llama_mlock();
|
|
|
|
void init(void * ptr);
|
|
void grow_to(size_t target_size);
|
|
|
|
static const bool SUPPORTED;
|
|
|
|
private:
|
|
struct impl;
|
|
std::unique_ptr<impl> pimpl;
|
|
};
|
|
|
|
size_t llama_path_max();
|