mirror of
https://github.com/ggerganov/llama.cpp
synced 2026-03-03 13:50:01 +01:00
* Faster tensors (#8) Add fast matrix and matrix/vector multiplication. * Use map for shader replacements instead of pair of strings * Wasm (#9) * webgpu : fix build on emscripten * more debugging stuff * test-backend-ops: force single thread on wasm * fix single-thread case for init_tensor_uniform * use jspi * add pthread * test: remember to set n_thread for cpu backend * Add buffer label and enable dawn-specific toggles to turn off some checks * Intermediate state * Fast working f16/f32 vec4 * Working float fast mul mat * Clean up naming of mul_mat to match logical model, start work on q mul_mat * Setup for subgroup matrix mat mul * Basic working subgroup matrix * Working subgroup matrix tiling * Handle weirder sg matrix sizes (but still % sg matrix size) * Working start to gemv * working f16 accumulation with shared memory staging * Print out available subgroup matrix configurations * Vectorize dst stores for sg matrix shader * Gemv working scalar * Minor set_rows optimization (#4) * updated optimization, fixed errors * non vectorized version now dispatches one thread per element * Simplify * Change logic for set_rows pipelines --------- Co-authored-by: Neha Abbas <nehaabbas@macbookpro.lan> Co-authored-by: Neha Abbas <nehaabbas@ReeseLevines-MacBook-Pro.local> Co-authored-by: Reese Levine <reeselevine1@gmail.com> * Comment on dawn toggles * Working subgroup matrix code for (semi)generic sizes * Remove some comments * Cleanup code * Update dawn version and move to portable subgroup size * Try to fix new dawn release * Update subgroup size comment * Only check for subgroup matrix configs if they are supported * Add toggles for subgroup matrix/f16 support on nvidia+vulkan * Make row/col naming consistent * Refactor shared memory loading * Move sg matrix stores to correct file * Working q4_0 * Formatting * Work with emscripten builds * Fix test-backend-ops emscripten for f16/quantized types * Use emscripten memory64 to support get_memory * Add build flags and try ci --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co> * Remove extra whitespace * Move wasm single-thread logic out of test-backend-ops for cpu backend * Disable multiple threads for emscripten single-thread builds in ggml_graph_plan * Fix .gitignore * Add memory64 option and remove unneeded macros for setting threads to 1 --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
111 lines
3.3 KiB
JavaScript
111 lines
3.3 KiB
JavaScript
const http = require('http');
|
|
const fs = require('fs').promises;
|
|
const path = require('path');
|
|
|
|
// This file is used for testing wasm build from emscripten
|
|
// Example build command:
|
|
// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_CURL=OFF
|
|
// cmake --build build-wasm --target test-backend-ops -j
|
|
|
|
const PORT = 8080;
|
|
const STATIC_DIR = path.join(__dirname, '../build-wasm/bin');
|
|
console.log(`Serving static files from: ${STATIC_DIR}`);
|
|
|
|
const mimeTypes = {
|
|
'.html': 'text/html',
|
|
'.js': 'text/javascript',
|
|
'.css': 'text/css',
|
|
'.png': 'image/png',
|
|
'.jpg': 'image/jpeg',
|
|
'.gif': 'image/gif',
|
|
'.svg': 'image/svg+xml',
|
|
'.json': 'application/json',
|
|
'.woff': 'font/woff',
|
|
'.woff2': 'font/woff2',
|
|
};
|
|
|
|
async function generateDirListing(dirPath, reqUrl) {
|
|
const files = await fs.readdir(dirPath);
|
|
let html = `
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<title>Directory Listing</title>
|
|
<style>
|
|
body { font-family: Arial, sans-serif; padding: 20px; }
|
|
ul { list-style: none; padding: 0; }
|
|
li { margin: 5px 0; }
|
|
a { text-decoration: none; color: #0066cc; }
|
|
a:hover { text-decoration: underline; }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>Directory: ${reqUrl}</h1>
|
|
<ul>
|
|
`;
|
|
|
|
if (reqUrl !== '/') {
|
|
html += `<li><a href="../">../ (Parent Directory)</a></li>`;
|
|
}
|
|
|
|
for (const file of files) {
|
|
const filePath = path.join(dirPath, file);
|
|
const stats = await fs.stat(filePath);
|
|
const link = encodeURIComponent(file) + (stats.isDirectory() ? '/' : '');
|
|
html += `<li><a href="${link}">${file}${stats.isDirectory() ? '/' : ''}</a></li>`;
|
|
}
|
|
|
|
html += `
|
|
</ul>
|
|
</body>
|
|
</html>
|
|
`;
|
|
return html;
|
|
}
|
|
|
|
const server = http.createServer(async (req, res) => {
|
|
try {
|
|
// Set COOP and COEP headers
|
|
res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
|
|
res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp');
|
|
res.setHeader('Cache-Control', 'no-store, no-cache, must-revalidate, proxy-revalidate');
|
|
res.setHeader('Pragma', 'no-cache');
|
|
res.setHeader('Expires', '0');
|
|
|
|
const filePath = path.join(STATIC_DIR, decodeURIComponent(req.url));
|
|
const stats = await fs.stat(filePath);
|
|
|
|
if (stats.isDirectory()) {
|
|
const indexPath = path.join(filePath, 'index.html');
|
|
try {
|
|
const indexData = await fs.readFile(indexPath);
|
|
res.writeHeader(200, { 'Content-Type': 'text/html' });
|
|
res.end(indexData);
|
|
} catch {
|
|
// No index.html, generate directory listing
|
|
const dirListing = await generateDirListing(filePath, req.url);
|
|
res.writeHeader(200, { 'Content-Type': 'text/html' });
|
|
res.end(dirListing);
|
|
}
|
|
} else {
|
|
const ext = path.extname(filePath).toLowerCase();
|
|
const contentType = mimeTypes[ext] || 'application/octet-stream';
|
|
const data = await fs.readFile(filePath);
|
|
res.writeHeader(200, { 'Content-Type': contentType });
|
|
res.end(data);
|
|
}
|
|
} catch (err) {
|
|
if (err.code === 'ENOENT') {
|
|
res.writeHeader(404, { 'Content-Type': 'text/plain' });
|
|
res.end('404 Not Found');
|
|
} else {
|
|
res.writeHeader(500, { 'Content-Type': 'text/plain' });
|
|
res.end('500 Internal Server Error');
|
|
}
|
|
}
|
|
});
|
|
|
|
server.listen(PORT, () => {
|
|
console.log(`Server running at http://localhost:${PORT}/`);
|
|
});
|