API Reference
Complete API documentation for Gerbil.
Gerbil Class
The main class for local LLM inference.
Constructor
constructor.ts
import { Gerbil } from "@tryhamster/gerbil";
const g = new Gerbil(config?: GerbilConfig);loadModel()
loadModel.ts
await g.loadModel( modelId: string, options?: LoadOptions): Promise<void>
// Exampleawait g.loadModel("qwen3-0.6b", { device: "gpu", dtype: "q4", onProgress: (info) => console.log(info.status),});generate()
generate.ts
await g.generate( prompt: string, options?: GenerateOptions): Promise<GenerateResult>
// Exampleconst result = await g.generate("Hello", { maxTokens: 256, temperature: 0.7, topP: 0.9, topK: 50, system: "You are helpful.", thinking: true, stopSequences: ["\n\n"],});stream()
stream.ts
g.stream( prompt: string, options?: GenerateOptions): AsyncGenerator<string, GenerateResult, unknown>
// Examplefor await (const chunk of g.stream("Tell me a story")) { process.stdout.write(chunk);}json()
json.ts
await g.json<T>( prompt: string, options: JsonOptions<T>): Promise<T>
// Exampleconst person = await g.json("Extract: John, 32, NYC", { schema: z.object({ name: z.string(), age: z.number(), city: z.string(), }), retries: 3, temperature: 0.3,});embed()
embed.ts
await g.embed( text: string, options?: EmbedOptions): Promise<EmbedResult>
// Exampleconst result = await g.embed("Hello world");console.log(result.vector); // number[]Response Cache Methods
cache-methods.ts
// Get response cache statisticsg.getResponseCacheStats(): ResponseCacheStats// Returns: { hits, misses, size, hitRate }
// Clear all cached responsesg.clearResponseCache(): voidOther Methods
other-methods.ts
// Check if model is loadedg.isLoaded(): boolean
// Get current model infog.getModelInfo(): ModelConfig | null
// Check if loaded model supports visiong.supportsVision(): boolean
// Get current device modeg.getDeviceMode(): "webgpu" | "cpu"
// Get session statsg.getStats(): SessionStats
// Get system infog.getInfo(): SystemInfo
// Clear KV cache (attention states)g.clearCache(): void
// Clean upawait g.dispose(): Promise<void>
// Static: List built-in modelsGerbil.listModels(): ModelConfig[]
// Static: Get model by IDGerbil.getModel(id: string): ModelConfig | undefinedTypes
GerbilConfig
GerbilConfig.ts
interface GerbilConfig { model?: string; device?: "auto" | "gpu" | "cpu"; dtype?: "q4" | "q8" | "fp16" | "fp32"; cache?: CacheConfig; fallback?: FallbackConfig;}GenerateOptions
GenerateOptions.ts
interface GenerateOptions { maxTokens?: number; // default: 256 temperature?: number; // default: 0.7 topP?: number; // default: 0.9 topK?: number; // default: 50 stopSequences?: string[]; system?: string; thinking?: boolean; onToken?: (token: string) => void; images?: ImageInput[]; // Vision: images to include cache?: boolean; // Enable response caching (default: false) cacheTtl?: number; // Cache TTL in ms (default: 5 min)}GenerateResult
GenerateResult.ts
interface GenerateResult { text: string; thinking?: string; tokensGenerated: number; tokensPerSecond: number; totalTime: number; finishReason: "stop" | "length" | "error"; provider?: "local" | "openai" | "anthropic"; cached?: boolean;}LoadOptions
LoadOptions.ts
interface LoadOptions { onProgress?: (info: ProgressInfo) => void; device?: "auto" | "gpu" | "cpu" | "webgpu"; dtype?: "q4" | "q8" | "fp16" | "fp32"; contextLength?: number;}ModelConfig
ModelConfig.ts
interface ModelConfig { id: string; repo: string; description: string; size: string; contextLength: number; supportsThinking: boolean; supportsJson: boolean; supportsVision?: boolean; // Vision: model understands images visionEncoderSize?: string; // Vision: encoder params (e.g. "0.4B") family: "qwen" | "smollm" | "phi" | "mistral" | "llama" | "other";}ImageInput
ImageInput.ts
interface ImageInput { /** Image source: URL, base64 data URI, or local file path */ source: string; /** Optional alt text for context */ alt?: string;}ResponseCacheStats
ResponseCacheStats.ts
interface ResponseCacheStats { hits: number; // Number of cache hits misses: number; // Number of cache misses size: number; // Number of cached entries hitRate: number; // Hit rate percentage (0-100)}Functional API
One-liner functions that auto-load the model:
functional.ts
import gerbil, { generate, stream, json, embed } from "@tryhamster/gerbil";
// Default export - simplest usageconst text = await gerbil("Hello");
// Named exportsconst result = await generate("Hello");const data = await json("Extract...", { schema });const vec = await embed("Hello");
for await (const chunk of stream("Story")) { console.log(chunk);}Skills API
skills-api.ts
import { // Skill system defineSkill, useSkill, listSkills, loadSkills, // Built-in skills commit, summarize, explain, review, test, translate, extract, title, // Vision skills (require ministral-3b or vision model) describeImage, analyzeScreenshot, extractFromImage, compareImages, captionImage,} from "@tryhamster/gerbil/skills";
// All skills accept an input object and return Promise<string | T>const msg = await commit({ type: "conventional" });const summary = await summarize({ content, length: "short" });const explanation = await explain({ content, level: "beginner" });const feedback = await review({ code, focus: ["security"] });const tests = await test({ code, framework: "vitest" });const translated = await translate({ text, to: "es" });const headline = await title({ content, style: "professional" });
// Vision skillsconst description = await describeImage({ image: url, focus: "details" });const analysis = await analyzeScreenshot({ image: dataUri, type: "qa" });const text = await extractFromImage({ image, extract: "text" });const diff = await compareImages({ image1, image2, focus: "differences" });const alt = await captionImage({ image, style: "descriptive" });
// extract() returns Promise<T>const data = await extract({ content, schema: myZodSchema });
// Custom skillsconst mySkill = defineSkill({ name: "my-skill", input: z.object({ text: z.string() }), run: async ({ input, gerbil }) => gerbil.generate(input.text),});