Lesson 6.5: Deployment Basics
Duration: 60 minutes
Learning Objectives
By the end of this lesson, you will be able to:
- Prepare your application for production deployment
- Configure environment variables securely
- Deploy to cloud platforms (Railway, Render, Fly.io)
- Set up monitoring and logging
- Implement basic security measures
Introduction
Your AI assistant works locally. Now it is time to deploy it so others can use it. This lesson covers the essentials of taking your application from development to production.
┌─────────────────────────────────────────────────────────────────────┐
│ DEPLOYMENT PIPELINE │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ Local │───▶│ Build │───▶│ Test │───▶│ Deploy │ │
│ │ Dev │ │ & Lint │ │ & Verify │ │ to Prod │ │
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
│ │
│ │ │ │
│ ▼ ▼ │
│ ┌──────────────┐ ┌──────────────┐ │
│ │ GitHub │ │ Cloud │ │
│ │ Repository │ │ Platform │ │
│ └──────────────┘ └──────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────┘
Production Preparation
1. Build Configuration
Update package.json for production:
{
"name": "ai-assistant",
"version": "1.0.0",
"type": "module",
"engines": {
"node": ">=18.0.0"
},
"scripts": {
"start": "node dist/index.js",
"dev": "tsx watch src/index.ts",
"build": "tsc",
"lint": "tsc --noEmit",
"test": "tsx --test tests/**/*.test.ts",
"clean": "rm -rf dist",
"prebuild": "npm run clean"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.24.0",
"@langchain/community": "^0.2.0",
"@langchain/openai": "^0.2.0",
"chromadb": "^1.8.0",
"dotenv": "^16.4.0",
"langchain": "^0.2.0",
"openai": "^4.52.0",
"zod": "^3.23.0"
},
"devDependencies": {
"@types/node": "^20.14.0",
"tsx": "^4.16.0",
"typescript": "^5.5.0"
}
}
2. TypeScript Configuration for Production
Update tsconfig.json:
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"outDir": "dist",
"rootDir": "src",
"declaration": true,
"sourceMap": true,
"removeComments": true,
"noUnusedLocals": true,
"noUnusedParameters": true,
"noImplicitReturns": true,
"noFallthroughCasesInSwitch": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "tests"]
}
3. Environment Configuration
Create src/core/config.ts with production defaults:
import dotenv from 'dotenv';
import { z } from 'zod';
// Load .env file only in development
if (process.env.NODE_ENV !== 'production') {
dotenv.config();
}
const ConfigSchema = z.object({
// Environment
nodeEnv: z.enum(['development', 'production', 'test']).default('development'),
// AI Providers
openaiApiKey: z.string().min(1, 'OpenAI API key is required'),
anthropicApiKey: z.string().optional(),
// Model settings
defaultProvider: z.enum(['openai', 'anthropic']).default('openai'),
defaultModel: z.string().default('gpt-4o'),
maxTokens: z.number().default(4096),
temperature: z.number().min(0).max(2).default(0.7),
// RAG settings
embeddingModel: z.string().default('text-embedding-3-small'),
chunkSize: z.number().default(1000),
chunkOverlap: z.number().default(200),
retrievalTopK: z.number().default(3),
// Application settings
documentsPath: z.string().default('./documents'),
logLevel: z.enum(['debug', 'info', 'warn', 'error']).default('info'),
// Server settings (for API mode)
port: z.number().default(3000),
host: z.string().default('0.0.0.0'),
// Security
rateLimitRequests: z.number().default(100),
rateLimitWindow: z.number().default(60000), // 1 minute
// Optional API keys
weatherApiKey: z.string().optional(),
});
export type Config = z.infer<typeof ConfigSchema>;
function loadConfig(): Config {
const rawConfig = {
nodeEnv: process.env.NODE_ENV,
openaiApiKey: process.env.OPENAI_API_KEY,
anthropicApiKey: process.env.ANTHROPIC_API_KEY,
defaultProvider: process.env.DEFAULT_PROVIDER,
defaultModel: process.env.DEFAULT_MODEL,
maxTokens: process.env.MAX_TOKENS ? parseInt(process.env.MAX_TOKENS, 10) : undefined,
temperature: process.env.TEMPERATURE ? parseFloat(process.env.TEMPERATURE) : undefined,
embeddingModel: process.env.EMBEDDING_MODEL,
chunkSize: process.env.CHUNK_SIZE ? parseInt(process.env.CHUNK_SIZE, 10) : undefined,
chunkOverlap: process.env.CHUNK_OVERLAP ? parseInt(process.env.CHUNK_OVERLAP, 10) : undefined,
retrievalTopK: process.env.RETRIEVAL_TOP_K
? parseInt(process.env.RETRIEVAL_TOP_K, 10)
: undefined,
documentsPath: process.env.DOCUMENTS_PATH,
logLevel: process.env.LOG_LEVEL,
port: process.env.PORT ? parseInt(process.env.PORT, 10) : undefined,
host: process.env.HOST,
rateLimitRequests: process.env.RATE_LIMIT_REQUESTS
? parseInt(process.env.RATE_LIMIT_REQUESTS, 10)
: undefined,
rateLimitWindow: process.env.RATE_LIMIT_WINDOW
? parseInt(process.env.RATE_LIMIT_WINDOW, 10)
: undefined,
weatherApiKey: process.env.WEATHER_API_KEY,
};
const result = ConfigSchema.safeParse(rawConfig);
if (!result.success) {
const errors = result.error.issues
.map((issue) => ` - ${issue.path.join('.')}: ${issue.message}`)
.join('\n');
console.error(`Configuration validation failed:\n${errors}`);
process.exit(1);
}
return result.data;
}
export const config = loadConfig();
export function isProduction(): boolean {
return config.nodeEnv === 'production';
}
Creating an HTTP API
For production, expose the assistant as an HTTP API instead of CLI.
Create src/server.ts:
import { IncomingMessage, ServerResponse, createServer } from 'http';
import { Assistant } from './core/assistant.js';
import { config, isProduction } from './core/config.js';
import { createRetriever } from './rag/retriever.js';
import { calculatorTool, notesTool, weatherTool, webSearchTool } from './tools/index.js';
import { createLogger } from './utils/logger.js';
const logger = createLogger('Server');
// Simple rate limiting
const requestCounts = new Map<string, { count: number; resetTime: number }>();
function checkRateLimit(ip: string): boolean {
const now = Date.now();
const record = requestCounts.get(ip);
if (!record || now > record.resetTime) {
requestCounts.set(ip, { count: 1, resetTime: now + config.rateLimitWindow });
return true;
}
if (record.count >= config.rateLimitRequests) {
return false;
}
record.count++;
return true;
}
// Request body parser
async function parseBody(req: IncomingMessage): Promise<unknown> {
return new Promise((resolve, reject) => {
let body = '';
req.on('data', (chunk) => (body += chunk));
req.on('end', () => {
try {
resolve(body ? JSON.parse(body) : {});
} catch {
reject(new Error('Invalid JSON'));
}
});
req.on('error', reject);
});
}
// CORS headers
function setCorsHeaders(res: ServerResponse): void {
res.setHeader('Access-Control-Allow-Origin', '*');
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
}
// JSON response helper
function sendJson(res: ServerResponse, status: number, data: unknown): void {
res.writeHead(status, { 'Content-Type': 'application/json' });
res.end(JSON.stringify(data));
}
export async function startServer(): Promise<void> {
// Initialize assistant
const assistant = new Assistant({
enableRag: true,
tools: [calculatorTool, weatherTool, notesTool, webSearchTool],
});
const retriever = createRetriever();
assistant.setRagRetriever(retriever);
// Initialize RAG
try {
await retriever('initialize');
logger.info('Knowledge base initialized');
} catch (error) {
logger.warn('Could not initialize knowledge base', error);
}
// Conversation sessions (in production, use Redis or a database)
const sessions = new Map<string, Assistant>();
function getOrCreateSession(sessionId: string): Assistant {
if (!sessions.has(sessionId)) {
const newAssistant = new Assistant({
enableRag: true,
tools: [calculatorTool, weatherTool, notesTool, webSearchTool],
});
newAssistant.setRagRetriever(retriever);
sessions.set(sessionId, newAssistant);
}
return sessions.get(sessionId)!;
}
const server = createServer(async (req, res) => {
setCorsHeaders(res);
// Handle CORS preflight
if (req.method === 'OPTIONS') {
res.writeHead(204);
res.end();
return;
}
const ip = req.socket.remoteAddress ?? 'unknown';
// Rate limiting
if (!checkRateLimit(ip)) {
sendJson(res, 429, { error: 'Too many requests' });
return;
}
const url = new URL(req.url ?? '/', `http://${req.headers.host}`);
try {
// Health check
if (req.method === 'GET' && url.pathname === '/health') {
sendJson(res, 200, { status: 'ok', timestamp: new Date().toISOString() });
return;
}
// Chat endpoint
if (req.method === 'POST' && url.pathname === '/chat') {
const body = (await parseBody(req)) as { message?: string; sessionId?: string };
if (!body.message || typeof body.message !== 'string') {
sendJson(res, 400, { error: 'Message is required' });
return;
}
const sessionId = body.sessionId ?? 'default';
const sessionAssistant = getOrCreateSession(sessionId);
const response = await sessionAssistant.chat(body.message);
sendJson(res, 200, {
message: response.content,
toolsUsed: response.toolsUsed,
sessionId,
});
return;
}
// Streaming chat endpoint
if (req.method === 'POST' && url.pathname === '/chat/stream') {
const body = (await parseBody(req)) as { message?: string; sessionId?: string };
if (!body.message || typeof body.message !== 'string') {
sendJson(res, 400, { error: 'Message is required' });
return;
}
const sessionId = body.sessionId ?? 'default';
const sessionAssistant = getOrCreateSession(sessionId);
// Server-Sent Events
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
});
for await (const chunk of sessionAssistant.chatStream(body.message)) {
if (chunk.type === 'text' && chunk.content) {
res.write(`data: ${JSON.stringify({ type: 'text', content: chunk.content })}\n\n`);
} else if (chunk.type === 'tool_call' && chunk.toolCall) {
res.write(`data: ${JSON.stringify({ type: 'tool', name: chunk.toolCall.name })}\n\n`);
} else if (chunk.type === 'done') {
res.write(`data: ${JSON.stringify({ type: 'done' })}\n\n`);
}
}
res.end();
return;
}
// Clear session
if (req.method === 'POST' && url.pathname === '/clear') {
const body = (await parseBody(req)) as { sessionId?: string };
const sessionId = body.sessionId ?? 'default';
if (sessions.has(sessionId)) {
sessions.get(sessionId)!.clearHistory();
}
sendJson(res, 200, { message: 'Session cleared', sessionId });
return;
}
// 404 for unknown routes
sendJson(res, 404, { error: 'Not found' });
} catch (error) {
logger.error('Request error', error);
sendJson(res, 500, {
error: isProduction() ? 'Internal server error' : String(error),
});
}
});
server.listen(config.port, config.host, () => {
logger.info(`Server running at http://${config.host}:${config.port}`);
logger.info('Endpoints:');
logger.info(' GET /health - Health check');
logger.info(' POST /chat - Send message');
logger.info(' POST /chat/stream - Send message (streaming)');
logger.info(' POST /clear - Clear session');
});
}
Update src/index.ts to support both CLI and server modes:
import { config } from './core/config.js';
import { createLogger } from './utils/logger.js';
const logger = createLogger('Main');
async function main() {
const mode = process.argv[2] ?? 'cli';
if (mode === 'server' || config.nodeEnv === 'production') {
const { startServer } = await import('./server.js');
await startServer();
} else {
const { startCli } = await import('./cli.js');
await startCli();
}
}
main().catch((error) => {
logger.error('Fatal error', error);
process.exit(1);
});
Move CLI code to src/cli.ts:
import * as readline from 'readline';
import { Assistant } from './core/assistant.js';
import { createRetriever } from './rag/retriever.js';
import { calculatorTool, notesTool, weatherTool, webSearchTool } from './tools/index.js';
import { createLogger } from './utils/logger.js';
const logger = createLogger('CLI');
export async function startCli(): Promise<void> {
console.log('AI Knowledge Assistant');
console.log('======================');
console.log('Type your message and press Enter.');
console.log('Commands: /clear, /status, /tools, /exit');
console.log('');
const assistant = new Assistant({
enableRag: true,
tools: [calculatorTool, weatherTool, notesTool, webSearchTool],
});
const retriever = createRetriever();
assistant.setRagRetriever(retriever);
console.log('Initializing knowledge base...');
try {
await retriever('initialize');
console.log('Knowledge base ready!');
} catch {
console.log('Warning: Could not load documents.');
}
console.log('Tools available: calculator, weather, notes, web_search\n');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
const prompt = () => {
rl.question('You: ', async (input) => {
const trimmed = input.trim();
if (!trimmed) {
prompt();
return;
}
if (trimmed === '/exit') {
console.log('Goodbye!');
rl.close();
process.exit(0);
}
if (trimmed === '/clear') {
assistant.clearHistory();
console.log('Conversation history cleared.\n');
prompt();
return;
}
if (trimmed === '/status') {
const history = assistant.getHistory();
console.log(`Messages in history: ${history.length}\n`);
prompt();
return;
}
if (trimmed === '/tools') {
console.log('Available tools:');
console.log(' - calculator: Perform math calculations');
console.log(' - weather: Get current weather');
console.log(' - notes: Save and retrieve notes');
console.log(' - web_search: Search the web\n');
prompt();
return;
}
process.stdout.write('Assistant: ');
try {
for await (const chunk of assistant.chatStream(trimmed)) {
if (chunk.type === 'text' && chunk.content) {
process.stdout.write(chunk.content);
} else if (chunk.type === 'tool_call' && chunk.toolCall) {
process.stdout.write(`\n[Using ${chunk.toolCall.name}...]\n`);
}
}
console.log('\n');
} catch (error) {
console.error(`\nError: ${error instanceof Error ? error.message : error}\n`);
}
prompt();
});
};
prompt();
}
Deploying to Railway
Railway provides simple deployment from GitHub.
1. Prepare Your Repository
Create a Procfile in the project root:
web: npm start
Create railway.json:
{
"$schema": "https://railway.app/railway.schema.json",
"build": {
"builder": "NIXPACKS",
"buildCommand": "npm run build"
},
"deploy": {
"startCommand": "npm start",
"healthcheckPath": "/health",
"healthcheckTimeout": 30,
"restartPolicyType": "ON_FAILURE",
"restartPolicyMaxRetries": 3
}
}
2. Deploy Steps
- Push your code to GitHub
- Go to railway.app and sign in
- Click "New Project" > "Deploy from GitHub repo"
- Select your repository
- Add environment variables in the Railway dashboard:
OPENAI_API_KEYNODE_ENV=production- Any other required variables
- Railway will automatically build and deploy
Deploying to Render
Render is another excellent option with a free tier.
1. Create render.yaml
services:
- type: web
name: ai-assistant
env: node
plan: free
buildCommand: npm install && npm run build
startCommand: npm start
healthCheckPath: /health
envVars:
- key: NODE_ENV
value: production
- key: OPENAI_API_KEY
sync: false
2. Deploy Steps
- Push code to GitHub
- Go to render.com and sign in
- Click "New" > "Web Service"
- Connect your GitHub repository
- Render will detect
render.yamland configure automatically - Add secret environment variables in the dashboard
Deploying to Fly.io
Fly.io offers global edge deployment.
1. Create fly.toml
app = "ai-assistant"
primary_region = "iad"
[build]
builder = "heroku/buildpacks:20"
[env]
NODE_ENV = "production"
PORT = "8080"
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 0
[[services]]
protocol = "tcp"
internal_port = 8080
[[services.ports]]
port = 80
handlers = ["http"]
force_https = true
[[services.ports]]
port = 443
handlers = ["tls", "http"]
[[services.http_checks]]
interval = "30s"
timeout = "5s"
path = "/health"
2. Create Dockerfile
FROM node:20-slim
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
COPY . .
RUN npm run build
ENV NODE_ENV=production
EXPOSE 8080
CMD ["npm", "start"]
3. Deploy Steps
# Install Fly CLI
curl -L https://fly.io/install.sh | sh
# Login
fly auth login
# Launch app
fly launch
# Set secrets
fly secrets set OPENAI_API_KEY=sk-proj-your-key
# Deploy
fly deploy
Monitoring and Logging
Structured Logging
Update src/utils/logger.ts for production:
import { config, isProduction } from '../core/config.js';
type LogLevel = 'debug' | 'info' | 'warn' | 'error';
const LOG_LEVELS: Record<LogLevel, number> = {
debug: 0,
info: 1,
warn: 2,
error: 3,
};
interface LogEntry {
timestamp: string;
level: LogLevel;
logger: string;
message: string;
data?: unknown;
}
class Logger {
private level: LogLevel;
private name: string;
constructor(name: string) {
this.name = name;
this.level = config.logLevel;
}
private shouldLog(level: LogLevel): boolean {
return LOG_LEVELS[level] >= LOG_LEVELS[this.level];
}
private log(level: LogLevel, message: string, data?: unknown): void {
if (!this.shouldLog(level)) return;
const entry: LogEntry = {
timestamp: new Date().toISOString(),
level,
logger: this.name,
message,
};
if (data !== undefined) {
entry.data = data;
}
if (isProduction()) {
// JSON format for production log aggregation
console.log(JSON.stringify(entry));
} else {
// Human-readable for development
const prefix = `[${entry.timestamp}] [${level.toUpperCase()}] [${this.name}]`;
if (data) {
console.log(prefix, message, data);
} else {
console.log(prefix, message);
}
}
}
debug(message: string, data?: unknown): void {
this.log('debug', message, data);
}
info(message: string, data?: unknown): void {
this.log('info', message, data);
}
warn(message: string, data?: unknown): void {
this.log('warn', message, data);
}
error(message: string, data?: unknown): void {
this.log('error', message, data);
}
}
export function createLogger(name: string): Logger {
return new Logger(name);
}
Health Checks
The /health endpoint should return detailed status:
// In server.ts
if (req.method === 'GET' && url.pathname === '/health') {
sendJson(res, 200, {
status: 'ok',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
memory: process.memoryUsage(),
sessions: sessions.size,
});
return;
}
Security Checklist
Before deploying to production, verify:
- API keys are in environment variables, not code
-
.envis in.gitignore - Rate limiting is enabled
- Input validation is in place
- Error messages do not leak sensitive info
- CORS is configured appropriately
- HTTPS is enforced
- Dependencies are up to date
Key Takeaways
- Prepare for production with proper build configuration
- Use environment variables for all secrets and configuration
- Multiple platforms offer easy deployment (Railway, Render, Fly.io)
- Structured logging enables log aggregation and analysis
- Security measures like rate limiting protect your API
Practice Exercise
- Deploy your assistant to one of the platforms
- Set up a custom domain
- Add request logging with timestamps and durations
- Implement API key authentication for the endpoints
- Create a simple web frontend that connects to your API
Next Steps
Your assistant is deployed! In the final lesson, you will review the code, add tests, and discuss improvements.