Express

Middleware and route handlers for Express.js applications.

Installation

Terminal
npm install @tryhamster/gerbil express

Quick Start

Mount all Gerbil routes with a single line:

server.ts
01import express from "express";
02import { gerbil } from "@tryhamster/gerbil/express";
03
04const app = express();
05app.use(express.json());
06
07// Mount all AI routes under /ai
08app.use("/ai", gerbil({ model: "qwen3-0.6b" })());
09
10app.listen(3000, () => {
11 console.log("Server running on http://localhost:3000");
12});

Available Endpoints

The middleware creates these endpoints:

MethodEndpointDescription
POST/generateGenerate text
POST/streamStream text (SSE)
POST/jsonStructured JSON output
POST/embedGenerate embeddings
POST/chatMulti-turn chat
GET/infoModel info
GET/statsSession statistics

Configuration

config.ts
01import { gerbil } from "@tryhamster/gerbil/express";
02
03const aiMiddleware = gerbil({
04 // Model configuration
05 model: "qwen3-0.6b",
06 device: "auto", // "auto" | "gpu" | "cpu"
07 dtype: "q4", // "q4" | "q8" | "fp16" | "fp32"
08 cacheDir: "./models", // Model cache directory
09
10 // Default generation options
11 maxTokens: 500,
12 temperature: 0.7,
13 topP: 0.9,
14
15 // System prompt
16 system: "You are a helpful assistant.",
17
18 // CORS (if needed)
19 cors: true,
20 corsOptions: {
21 origin: "https://myapp.com",
22 credentials: true,
23 },
24});
25
26app.use("/ai", aiMiddleware());

POST /generate

Generate text from a prompt:

generate.json
// Request
POST /ai/generate
Content-Type: application/json
{
"prompt": "Write a haiku about coding",
"options": {
"maxTokens": 100,
"temperature": 0.8,
"thinking": false
}
}
// Response
{
"text": "Silent keystrokes fall\nBugs emerge from tangled code\nCoffee saves the day",
"tokensGenerated": 23,
"tokensPerSecond": 45.2,
"totalTime": 0.51
}

POST /stream

Stream text using Server-Sent Events:

stream.txt
// Request
POST /ai/stream
Content-Type: application/json
{
"prompt": "Tell me a story",
"options": {
"maxTokens": 500
}
}
// Response (SSE stream)
data: {"token": "Once"}
data: {"token": " upon"}
data: {"token": " a"}
data: {"token": " time"}
...
data: {"done": true, "tokensGenerated": 150}

Client-side consumption:

client.ts
01const response = await fetch("/ai/stream", {
02 method: "POST",
03 headers: { "Content-Type": "application/json" },
04 body: JSON.stringify({ prompt: "Tell me a story" }),
05});
06
07const reader = response.body.getReader();
08const decoder = new TextDecoder();
09
10while (true) {
11 const { done, value } = await reader.read();
12 if (done) break;
13
14 const chunk = decoder.decode(value);
15 const lines = chunk.split("\n").filter(Boolean);
16
17 for (const line of lines) {
18 if (line.startsWith("data: ")) {
19 const data = JSON.parse(line.slice(6));
20 if (data.token) {
21 process.stdout.write(data.token);
22 }
23 }
24 }
25}

POST /json

Generate structured JSON with schema validation:

json.json
// Request
POST /ai/json
Content-Type: application/json
{
"prompt": "Extract: John is 32 and lives in NYC",
"schema": {
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "number" },
"city": { "type": "string" }
},
"required": ["name", "age", "city"]
},
"options": {
"retries": 3
}
}
// Response
{
"data": {
"name": "John",
"age": 32,
"city": "NYC"
}
}

POST /embed

Generate embeddings for text:

embed.json
// Single text
POST /ai/embed
{
"text": "Hello world"
}
// Response
{
"vector": [0.123, -0.456, ...], // 384 dimensions
"dimensions": 384
}
// Batch
POST /ai/embed
{
"texts": ["Hello", "World", "Foo"]
}
// Response
{
"vectors": [
[0.123, ...],
[0.456, ...],
[0.789, ...]
],
"dimensions": 384
}

POST /chat

Multi-turn conversation:

chat.json
// Request
POST /ai/chat
{
"messages": [
{ "role": "user", "content": "My name is Alice" },
{ "role": "assistant", "content": "Hello Alice!" },
{ "role": "user", "content": "What's my name?" }
],
"options": {
"stream": false,
"thinking": false
}
}
// Response
{
"message": {
"role": "assistant",
"content": "Your name is Alice!"
},
"tokensGenerated": 8
}

Custom Routes

Create custom endpoints with the Gerbil instance:

custom-routes.ts
01import express from "express";
02import gerbil from "@tryhamster/gerbil";
03
04const app = express();
05app.use(express.json());
06
07// Load model on startup
08await gerbil.loadModel("qwen3-0.6b");
09
10// Custom summarize endpoint
11app.post("/summarize", async (req, res) => {
12 const { content, length = "short" } = req.body;
13
14 const result = await gerbil.generate(
15 `Summarize the following in a ${length} paragraph:\n\n${content}`,
16 { maxTokens: length === "short" ? 100 : 300 }
17 );
18
19 res.json({ summary: result.text });
20});
21
22// Custom code review endpoint
23app.post("/review", async (req, res) => {
24 const { code, language } = req.body;
25
26 const result = await gerbil.generate(
27 `Review this ${language} code for bugs and improvements:\n\n${code}`,
28 { maxTokens: 500, temperature: 0.3 }
29 );
30
31 res.json({ review: result.text });
32});
33
34app.listen(3000);

Middleware Chain

Add authentication, rate limiting, and logging:

middleware-chain.ts
01import express from "express";
02import { gerbil } from "@tryhamster/gerbil/express";
03import rateLimit from "express-rate-limit";
04
05const app = express();
06app.use(express.json());
07
08// Rate limiting
09const limiter = rateLimit({
10 windowMs: 60 * 1000, // 1 minute
11 max: 20, // 20 requests per minute
12 message: { error: "Too many requests" },
13});
14
15// Authentication middleware
16const auth = (req, res, next) => {
17 const token = req.headers.authorization?.split(" ")[1];
18 if (!token || !isValidToken(token)) {
19 return res.status(401).json({ error: "Unauthorized" });
20 }
21 next();
22};
23
24// Logging middleware
25const logger = (req, res, next) => {
26 console.log(`[${new Date().toISOString()}] ${req.method} ${req.path}`);
27 next();
28};
29
30// Apply middleware chain
31app.use("/ai",
32 limiter,
33 auth,
34 logger,
35 gerbil({ model: "qwen3-0.6b" })()
36);
37
38app.listen(3000);

Error Handling

error-handling.ts
01import { gerbil, GerbilError } from "@tryhamster/gerbil/express";
02
03// Custom error handler
04app.use("/ai", gerbil({
05 model: "qwen3-0.6b",
06 onError: (error, req, res) => {
07 console.error("AI Error:", error);
08
09 if (error instanceof GerbilError) {
10 return res.status(400).json({
11 error: error.message,
12 code: error.code,
13 });
14 }
15
16 res.status(500).json({
17 error: "Internal server error",
18 });
19 },
20})());
21
22// Global error handler
23app.use((err, req, res, next) => {
24 console.error(err);
25 res.status(500).json({ error: "Something went wrong" });
26});

TypeScript Types

types.ts
import { gerbil, GerbilOptions, GenerateRequest, GenerateResponse } from "@tryhamster/gerbil/express";
import { Request, Response } from "express";
const options: GerbilOptions = {
model: "qwen3-0.6b",
maxTokens: 500,
};
// Typed request handler
app.post("/custom", async (req: Request<{}, {}, GenerateRequest>, res: Response<GenerateResponse>) => {
const { prompt, options } = req.body;
// ...
});