llama.cpp/scripts/serve-static.js
Reese Levine 7ca5991d2b
ggml webgpu: add support for emscripten builds (#17184)
* Faster tensors (#8)

Add fast matrix and matrix/vector multiplication.

* Use map for shader replacements instead of pair of strings

* Wasm (#9)

* webgpu : fix build on emscripten

* more debugging stuff

* test-backend-ops: force single thread on wasm

* fix single-thread case for init_tensor_uniform

* use jspi

* add pthread

* test: remember to set n_thread for cpu backend

* Add buffer label and enable dawn-specific toggles to turn off some checks

* Intermediate state

* Fast working f16/f32 vec4

* Working float fast mul mat

* Clean up naming of mul_mat to match logical model, start work on q mul_mat

* Setup for subgroup matrix mat mul

* Basic working subgroup matrix

* Working subgroup matrix tiling

* Handle weirder sg matrix sizes (but still % sg matrix size)

* Working start to gemv

* working f16 accumulation with shared memory staging

* Print out available subgroup matrix configurations

* Vectorize dst stores for sg matrix shader

* Gemv working scalar

* Minor set_rows optimization (#4)

* updated optimization, fixed errors

* non vectorized version now dispatches one thread per element

* Simplify

* Change logic for set_rows pipelines

---------

Co-authored-by: Neha Abbas <nehaabbas@macbookpro.lan>
Co-authored-by: Neha Abbas <nehaabbas@ReeseLevines-MacBook-Pro.local>
Co-authored-by: Reese Levine <reeselevine1@gmail.com>

* Comment on dawn toggles

* Working subgroup matrix code for (semi)generic sizes

* Remove some comments

* Cleanup code

* Update dawn version and move to portable subgroup size

* Try to fix new dawn release

* Update subgroup size comment

* Only check for subgroup matrix configs if they are supported

* Add toggles for subgroup matrix/f16 support on nvidia+vulkan

* Make row/col naming consistent

* Refactor shared memory loading

* Move sg matrix stores to correct file

* Working q4_0

* Formatting

* Work with emscripten builds

* Fix test-backend-ops emscripten for f16/quantized types

* Use emscripten memory64 to support get_memory

* Add build flags and try ci

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>

* Remove extra whitespace

* Move wasm single-thread logic out of test-backend-ops for cpu backend

* Disable multiple threads for emscripten single-thread builds in ggml_graph_plan

* Fix .gitignore

* Add memory64 option and remove unneeded macros for setting threads to 1

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2025-12-03 10:25:34 +01:00

111 lines
3.3 KiB
JavaScript

const http = require('http');
const fs = require('fs').promises;
const path = require('path');
// This file is used for testing wasm build from emscripten
// Example build command:
// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_CURL=OFF
// cmake --build build-wasm --target test-backend-ops -j
const PORT = 8080;
const STATIC_DIR = path.join(__dirname, '../build-wasm/bin');
console.log(`Serving static files from: ${STATIC_DIR}`);
const mimeTypes = {
'.html': 'text/html',
'.js': 'text/javascript',
'.css': 'text/css',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.gif': 'image/gif',
'.svg': 'image/svg+xml',
'.json': 'application/json',
'.woff': 'font/woff',
'.woff2': 'font/woff2',
};
async function generateDirListing(dirPath, reqUrl) {
const files = await fs.readdir(dirPath);
let html = `
<!DOCTYPE html>
<html>
<head>
<title>Directory Listing</title>
<style>
body { font-family: Arial, sans-serif; padding: 20px; }
ul { list-style: none; padding: 0; }
li { margin: 5px 0; }
a { text-decoration: none; color: #0066cc; }
a:hover { text-decoration: underline; }
</style>
</head>
<body>
<h1>Directory: ${reqUrl}</h1>
<ul>
`;
if (reqUrl !== '/') {
html += `<li><a href="../">../ (Parent Directory)</a></li>`;
}
for (const file of files) {
const filePath = path.join(dirPath, file);
const stats = await fs.stat(filePath);
const link = encodeURIComponent(file) + (stats.isDirectory() ? '/' : '');
html += `<li><a href="${link}">${file}${stats.isDirectory() ? '/' : ''}</a></li>`;
}
html += `
</ul>
</body>
</html>
`;
return html;
}
const server = http.createServer(async (req, res) => {
try {
// Set COOP and COEP headers
res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp');
res.setHeader('Cache-Control', 'no-store, no-cache, must-revalidate, proxy-revalidate');
res.setHeader('Pragma', 'no-cache');
res.setHeader('Expires', '0');
const filePath = path.join(STATIC_DIR, decodeURIComponent(req.url));
const stats = await fs.stat(filePath);
if (stats.isDirectory()) {
const indexPath = path.join(filePath, 'index.html');
try {
const indexData = await fs.readFile(indexPath);
res.writeHeader(200, { 'Content-Type': 'text/html' });
res.end(indexData);
} catch {
// No index.html, generate directory listing
const dirListing = await generateDirListing(filePath, req.url);
res.writeHeader(200, { 'Content-Type': 'text/html' });
res.end(dirListing);
}
} else {
const ext = path.extname(filePath).toLowerCase();
const contentType = mimeTypes[ext] || 'application/octet-stream';
const data = await fs.readFile(filePath);
res.writeHeader(200, { 'Content-Type': contentType });
res.end(data);
}
} catch (err) {
if (err.code === 'ENOENT') {
res.writeHeader(404, { 'Content-Type': 'text/plain' });
res.end('404 Not Found');
} else {
res.writeHeader(500, { 'Content-Type': 'text/plain' });
res.end('500 Internal Server Error');
}
}
});
server.listen(PORT, () => {
console.log(`Server running at http://localhost:${PORT}/`);
});