Lesson 6.5: Deployment Basics

Duration: 60 minutes

Learning Objectives

By the end of this lesson, you will be able to:

Prepare your application for production deployment
Configure environment variables securely
Deploy to cloud platforms (Railway, Render, Fly.io)
Set up monitoring and logging
Implement basic security measures

Introduction

Your AI assistant works locally. Now it is time to deploy it so others can use it. This lesson covers the essentials of taking your application from development to production.

Production Preparation

1. Build Configuration

Update package.json for production:

{
  "name": "ai-assistant",
  "version": "1.0.0",
  "type": "module",
  "engines": {
    "node": ">=18.0.0"
  },
  "scripts": {
    "start": "node dist/index.js",
    "dev": "tsx watch src/index.ts",
    "build": "tsc",
    "lint": "tsc --noEmit",
    "test": "tsx --test tests/**/*.test.ts",
    "clean": "rm -rf dist",
    "prebuild": "npm run clean"
  },
  "dependencies": {
    "@anthropic-ai/sdk": "^0.24.0",
    "@langchain/community": "^0.2.0",
    "@langchain/openai": "^0.2.0",
    "chromadb": "^1.8.0",
    "dotenv": "^16.4.0",
    "langchain": "^0.2.0",
    "openai": "^4.52.0",
    "zod": "^3.23.0"
  },
  "devDependencies": {
    "@types/node": "^20.14.0",
    "tsx": "^4.16.0",
    "typescript": "^5.5.0"
  }
}

2. TypeScript Configuration for Production

Update tsconfig.json:

{
  "compilerOptions": {
    "target": "ES2022",
    "module": "NodeNext",
    "moduleResolution": "NodeNext",
    "strict": true,
    "esModuleInterop": true,
    "skipLibCheck": true,
    "outDir": "dist",
    "rootDir": "src",
    "declaration": true,
    "sourceMap": true,
    "removeComments": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noImplicitReturns": true,
    "noFallthroughCasesInSwitch": true
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules", "dist", "tests"]
}

3. Environment Configuration

Create src/core/config.ts with production defaults:

import dotenv from 'dotenv';
import { z } from 'zod';

// Load .env file only in development
if (process.env.NODE_ENV !== 'production') {
  dotenv.config();
}

const ConfigSchema = z.object({
  // Environment
  nodeEnv: z.enum(['development', 'production', 'test']).default('development'),

  // AI Providers
  openaiApiKey: z.string().min(1, 'OpenAI API key is required'),
  anthropicApiKey: z.string().optional(),

  // Model settings
  defaultProvider: z.enum(['openai', 'anthropic']).default('openai'),
  defaultModel: z.string().default('gpt-4o'),
  maxTokens: z.number().default(4096),
  temperature: z.number().min(0).max(2).default(0.7),

  // RAG settings
  embeddingModel: z.string().default('text-embedding-3-small'),
  chunkSize: z.number().default(1000),
  chunkOverlap: z.number().default(200),
  retrievalTopK: z.number().default(3),

  // Application settings
  documentsPath: z.string().default('./documents'),
  logLevel: z.enum(['debug', 'info', 'warn', 'error']).default('info'),

  // Server settings (for API mode)
  port: z.number().default(3000),
  host: z.string().default('0.0.0.0'),

  // Security
  rateLimitRequests: z.number().default(100),
  rateLimitWindow: z.number().default(60000), // 1 minute

  // Optional API keys
  weatherApiKey: z.string().optional(),
});

export type Config = z.infer<typeof ConfigSchema>;

function loadConfig(): Config {
  const rawConfig = {
    nodeEnv: process.env.NODE_ENV,
    openaiApiKey: process.env.OPENAI_API_KEY,
    anthropicApiKey: process.env.ANTHROPIC_API_KEY,
    defaultProvider: process.env.DEFAULT_PROVIDER,
    defaultModel: process.env.DEFAULT_MODEL,
    maxTokens: process.env.MAX_TOKENS ? parseInt(process.env.MAX_TOKENS, 10) : undefined,
    temperature: process.env.TEMPERATURE ? parseFloat(process.env.TEMPERATURE) : undefined,
    embeddingModel: process.env.EMBEDDING_MODEL,
    chunkSize: process.env.CHUNK_SIZE ? parseInt(process.env.CHUNK_SIZE, 10) : undefined,
    chunkOverlap: process.env.CHUNK_OVERLAP ? parseInt(process.env.CHUNK_OVERLAP, 10) : undefined,
    retrievalTopK: process.env.RETRIEVAL_TOP_K
      ? parseInt(process.env.RETRIEVAL_TOP_K, 10)
      : undefined,
    documentsPath: process.env.DOCUMENTS_PATH,
    logLevel: process.env.LOG_LEVEL,
    port: process.env.PORT ? parseInt(process.env.PORT, 10) : undefined,
    host: process.env.HOST,
    rateLimitRequests: process.env.RATE_LIMIT_REQUESTS
      ? parseInt(process.env.RATE_LIMIT_REQUESTS, 10)
      : undefined,
    rateLimitWindow: process.env.RATE_LIMIT_WINDOW
      ? parseInt(process.env.RATE_LIMIT_WINDOW, 10)
      : undefined,
    weatherApiKey: process.env.WEATHER_API_KEY,
  };

  const result = ConfigSchema.safeParse(rawConfig);

  if (!result.success) {
    const errors = result.error.issues
      .map((issue) => `  - ${issue.path.join('.')}: ${issue.message}`)
      .join('\n');
    console.error(`Configuration validation failed:\n${errors}`);
    process.exit(1);
  }

  return result.data;
}

export const config = loadConfig();

export function isProduction(): boolean {
  return config.nodeEnv === 'production';
}

Creating an HTTP API

For production, expose the assistant as an HTTP API instead of CLI.

Create src/server.ts:

import { IncomingMessage, ServerResponse, createServer } from 'http';

import { Assistant } from './core/assistant.js';
import { config, isProduction } from './core/config.js';
import { createRetriever } from './rag/retriever.js';
import { calculatorTool, notesTool, weatherTool, webSearchTool } from './tools/index.js';
import { createLogger } from './utils/logger.js';

const logger = createLogger('Server');

// Simple rate limiting
const requestCounts = new Map<string, { count: number; resetTime: number }>();

function checkRateLimit(ip: string): boolean {
  const now = Date.now();
  const record = requestCounts.get(ip);

  if (!record || now > record.resetTime) {
    requestCounts.set(ip, { count: 1, resetTime: now + config.rateLimitWindow });
    return true;
  }

  if (record.count >= config.rateLimitRequests) {
    return false;
  }

  record.count++;
  return true;
}

// Request body parser
async function parseBody(req: IncomingMessage): Promise<unknown> {
  return new Promise((resolve, reject) => {
    let body = '';
    req.on('data', (chunk) => (body += chunk));
    req.on('end', () => {
      try {
        resolve(body ? JSON.parse(body) : {});
      } catch {
        reject(new Error('Invalid JSON'));
      }
    });
    req.on('error', reject);
  });
}

// CORS headers
function setCorsHeaders(res: ServerResponse): void {
  res.setHeader('Access-Control-Allow-Origin', '*');
  res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
  res.setHeader('Access-Control-Allow-Headers', 'Content-Type, Authorization');
}

// JSON response helper
function sendJson(res: ServerResponse, status: number, data: unknown): void {
  res.writeHead(status, { 'Content-Type': 'application/json' });
  res.end(JSON.stringify(data));
}

export async function startServer(): Promise<void> {
  // Initialize assistant
  const assistant = new Assistant({
    enableRag: true,
    tools: [calculatorTool, weatherTool, notesTool, webSearchTool],
  });

  const retriever = createRetriever();
  assistant.setRagRetriever(retriever);

  // Initialize RAG
  try {
    await retriever('initialize');
    logger.info('Knowledge base initialized');
  } catch (error) {
    logger.warn('Could not initialize knowledge base', error);
  }

  // Conversation sessions (in production, use Redis or a database)
  const sessions = new Map<string, Assistant>();

  function getOrCreateSession(sessionId: string): Assistant {
    if (!sessions.has(sessionId)) {
      const newAssistant = new Assistant({
        enableRag: true,
        tools: [calculatorTool, weatherTool, notesTool, webSearchTool],
      });
      newAssistant.setRagRetriever(retriever);
      sessions.set(sessionId, newAssistant);
    }
    return sessions.get(sessionId)!;
  }

  const server = createServer(async (req, res) => {
    setCorsHeaders(res);

    // Handle CORS preflight
    if (req.method === 'OPTIONS') {
      res.writeHead(204);
      res.end();
      return;
    }

    const ip = req.socket.remoteAddress ?? 'unknown';

    // Rate limiting
    if (!checkRateLimit(ip)) {
      sendJson(res, 429, { error: 'Too many requests' });
      return;
    }

    const url = new URL(req.url ?? '/', `http://${req.headers.host}`);

    try {
      // Health check
      if (req.method === 'GET' && url.pathname === '/health') {
        sendJson(res, 200, { status: 'ok', timestamp: new Date().toISOString() });
        return;
      }

      // Chat endpoint
      if (req.method === 'POST' && url.pathname === '/chat') {
        const body = (await parseBody(req)) as { message?: string; sessionId?: string };

        if (!body.message || typeof body.message !== 'string') {
          sendJson(res, 400, { error: 'Message is required' });
          return;
        }

        const sessionId = body.sessionId ?? 'default';
        const sessionAssistant = getOrCreateSession(sessionId);

        const response = await sessionAssistant.chat(body.message);

        sendJson(res, 200, {
          message: response.content,
          toolsUsed: response.toolsUsed,
          sessionId,
        });
        return;
      }

      // Streaming chat endpoint
      if (req.method === 'POST' && url.pathname === '/chat/stream') {
        const body = (await parseBody(req)) as { message?: string; sessionId?: string };

        if (!body.message || typeof body.message !== 'string') {
          sendJson(res, 400, { error: 'Message is required' });
          return;
        }

        const sessionId = body.sessionId ?? 'default';
        const sessionAssistant = getOrCreateSession(sessionId);

        // Server-Sent Events
        res.writeHead(200, {
          'Content-Type': 'text/event-stream',
          'Cache-Control': 'no-cache',
          Connection: 'keep-alive',
        });

        for await (const chunk of sessionAssistant.chatStream(body.message)) {
          if (chunk.type === 'text' && chunk.content) {
            res.write(`data: ${JSON.stringify({ type: 'text', content: chunk.content })}\n\n`);
          } else if (chunk.type === 'tool_call' && chunk.toolCall) {
            res.write(`data: ${JSON.stringify({ type: 'tool', name: chunk.toolCall.name })}\n\n`);
          } else if (chunk.type === 'done') {
            res.write(`data: ${JSON.stringify({ type: 'done' })}\n\n`);
          }
        }

        res.end();
        return;
      }

      // Clear session
      if (req.method === 'POST' && url.pathname === '/clear') {
        const body = (await parseBody(req)) as { sessionId?: string };
        const sessionId = body.sessionId ?? 'default';

        if (sessions.has(sessionId)) {
          sessions.get(sessionId)!.clearHistory();
        }

        sendJson(res, 200, { message: 'Session cleared', sessionId });
        return;
      }

      // 404 for unknown routes
      sendJson(res, 404, { error: 'Not found' });
    } catch (error) {
      logger.error('Request error', error);
      sendJson(res, 500, {
        error: isProduction() ? 'Internal server error' : String(error),
      });
    }
  });

  server.listen(config.port, config.host, () => {
    logger.info(`Server running at http://${config.host}:${config.port}`);
    logger.info('Endpoints:');
    logger.info('  GET  /health       - Health check');
    logger.info('  POST /chat         - Send message');
    logger.info('  POST /chat/stream  - Send message (streaming)');
    logger.info('  POST /clear        - Clear session');
  });
}

Update src/index.ts to support both CLI and server modes:

import { config } from './core/config.js';
import { createLogger } from './utils/logger.js';

const logger = createLogger('Main');

async function main() {
  const mode = process.argv[2] ?? 'cli';

  if (mode === 'server' || config.nodeEnv === 'production') {
    const { startServer } = await import('./server.js');
    await startServer();
  } else {
    const { startCli } = await import('./cli.js');
    await startCli();
  }
}

main().catch((error) => {
  logger.error('Fatal error', error);
  process.exit(1);
});

Move CLI code to src/cli.ts:

import * as readline from 'readline';

import { Assistant } from './core/assistant.js';
import { createRetriever } from './rag/retriever.js';
import { calculatorTool, notesTool, weatherTool, webSearchTool } from './tools/index.js';
import { createLogger } from './utils/logger.js';

const logger = createLogger('CLI');

export async function startCli(): Promise<void> {
  console.log('AI Knowledge Assistant');
  console.log('======================');
  console.log('Type your message and press Enter.');
  console.log('Commands: /clear, /status, /tools, /exit');
  console.log('');

  const assistant = new Assistant({
    enableRag: true,
    tools: [calculatorTool, weatherTool, notesTool, webSearchTool],
  });

  const retriever = createRetriever();
  assistant.setRagRetriever(retriever);

  console.log('Initializing knowledge base...');

  try {
    await retriever('initialize');
    console.log('Knowledge base ready!');
  } catch {
    console.log('Warning: Could not load documents.');
  }

  console.log('Tools available: calculator, weather, notes, web_search\n');

  const rl = readline.createInterface({
    input: process.stdin,
    output: process.stdout,
  });

  const prompt = () => {
    rl.question('You: ', async (input) => {
      const trimmed = input.trim();

      if (!trimmed) {
        prompt();
        return;
      }

      if (trimmed === '/exit') {
        console.log('Goodbye!');
        rl.close();
        process.exit(0);
      }

      if (trimmed === '/clear') {
        assistant.clearHistory();
        console.log('Conversation history cleared.\n');
        prompt();
        return;
      }

      if (trimmed === '/status') {
        const history = assistant.getHistory();
        console.log(`Messages in history: ${history.length}\n`);
        prompt();
        return;
      }

      if (trimmed === '/tools') {
        console.log('Available tools:');
        console.log('  - calculator: Perform math calculations');
        console.log('  - weather: Get current weather');
        console.log('  - notes: Save and retrieve notes');
        console.log('  - web_search: Search the web\n');
        prompt();
        return;
      }

      process.stdout.write('Assistant: ');

      try {
        for await (const chunk of assistant.chatStream(trimmed)) {
          if (chunk.type === 'text' && chunk.content) {
            process.stdout.write(chunk.content);
          } else if (chunk.type === 'tool_call' && chunk.toolCall) {
            process.stdout.write(`\n[Using ${chunk.toolCall.name}...]\n`);
          }
        }
        console.log('\n');
      } catch (error) {
        console.error(`\nError: ${error instanceof Error ? error.message : error}\n`);
      }

      prompt();
    });
  };

  prompt();
}

Deploying to Railway

Railway provides simple deployment from GitHub.

1. Prepare Your Repository

Create a Procfile in the project root:

web: npm start

Create railway.json:

{
  "$schema": "https://railway.app/railway.schema.json",
  "build": {
    "builder": "NIXPACKS",
    "buildCommand": "npm run build"
  },
  "deploy": {
    "startCommand": "npm start",
    "healthcheckPath": "/health",
    "healthcheckTimeout": 30,
    "restartPolicyType": "ON_FAILURE",
    "restartPolicyMaxRetries": 3
  }
}

2. Deploy Steps

Push your code to GitHub
Go to railway.app and sign in
Click "New Project" > "Deploy from GitHub repo"
Select your repository
Add environment variables in the Railway dashboard:
- OPENAI_API_KEY
- NODE_ENV=production
- Any other required variables
Railway will automatically build and deploy

Deploying to Render

Render is another excellent option with a free tier.

1. Create render.yaml

services:
  - type: web
    name: ai-assistant
    env: node
    plan: free
    buildCommand: npm install && npm run build
    startCommand: npm start
    healthCheckPath: /health
    envVars:
      - key: NODE_ENV
        value: production
      - key: OPENAI_API_KEY
        sync: false

2. Deploy Steps

Push code to GitHub
Go to render.com and sign in
Click "New" > "Web Service"
Connect your GitHub repository
Render will detect render.yaml and configure automatically
Add secret environment variables in the dashboard

Deploying to Fly.io

Fly.io offers global edge deployment.

1. Create fly.toml

app = "ai-assistant"
primary_region = "iad"

[build]
  builder = "heroku/buildpacks:20"

[env]
  NODE_ENV = "production"
  PORT = "8080"

[http_service]
  internal_port = 8080
  force_https = true
  auto_stop_machines = true
  auto_start_machines = true
  min_machines_running = 0

[[services]]
  protocol = "tcp"
  internal_port = 8080

  [[services.ports]]
    port = 80
    handlers = ["http"]
    force_https = true

  [[services.ports]]
    port = 443
    handlers = ["tls", "http"]

  [[services.http_checks]]
    interval = "30s"
    timeout = "5s"
    path = "/health"

2. Create Dockerfile

FROM node:20-slim

WORKDIR /app

COPY package*.json ./
RUN npm ci --only=production

COPY . .
RUN npm run build

ENV NODE_ENV=production
EXPOSE 8080

CMD ["npm", "start"]

3. Deploy Steps

# Install Fly CLI
curl -L https://fly.io/install.sh | sh

# Login
fly auth login

# Launch app
fly launch

# Set secrets
fly secrets set OPENAI_API_KEY=sk-proj-your-key

# Deploy
fly deploy

Monitoring and Logging

Structured Logging

Update src/utils/logger.ts for production:

import { config, isProduction } from '../core/config.js';

type LogLevel = 'debug' | 'info' | 'warn' | 'error';

const LOG_LEVELS: Record<LogLevel, number> = {
  debug: 0,
  info: 1,
  warn: 2,
  error: 3,
};

interface LogEntry {
  timestamp: string;
  level: LogLevel;
  logger: string;
  message: string;
  data?: unknown;
}

class Logger {
  private level: LogLevel;
  private name: string;

  constructor(name: string) {
    this.name = name;
    this.level = config.logLevel;
  }

  private shouldLog(level: LogLevel): boolean {
    return LOG_LEVELS[level] >= LOG_LEVELS[this.level];
  }

  private log(level: LogLevel, message: string, data?: unknown): void {
    if (!this.shouldLog(level)) return;

    const entry: LogEntry = {
      timestamp: new Date().toISOString(),
      level,
      logger: this.name,
      message,
    };

    if (data !== undefined) {
      entry.data = data;
    }

    if (isProduction()) {
      // JSON format for production log aggregation
      console.log(JSON.stringify(entry));
    } else {
      // Human-readable for development
      const prefix = `[${entry.timestamp}] [${level.toUpperCase()}] [${this.name}]`;
      if (data) {
        console.log(prefix, message, data);
      } else {
        console.log(prefix, message);
      }
    }
  }

  debug(message: string, data?: unknown): void {
    this.log('debug', message, data);
  }

  info(message: string, data?: unknown): void {
    this.log('info', message, data);
  }

  warn(message: string, data?: unknown): void {
    this.log('warn', message, data);
  }

  error(message: string, data?: unknown): void {
    this.log('error', message, data);
  }
}

export function createLogger(name: string): Logger {
  return new Logger(name);
}

Health Checks

The /health endpoint should return detailed status:

// In server.ts
if (req.method === 'GET' && url.pathname === '/health') {
  sendJson(res, 200, {
    status: 'ok',
    timestamp: new Date().toISOString(),
    uptime: process.uptime(),
    memory: process.memoryUsage(),
    sessions: sessions.size,
  });
  return;
}

Security Checklist

Before deploying to production, verify:

API keys are in environment variables, not code
.env is in .gitignore
Rate limiting is enabled
Input validation is in place
Error messages do not leak sensitive info
CORS is configured appropriately
HTTPS is enforced
Dependencies are up to date

Key Takeaways

Prepare for production with proper build configuration
Use environment variables for all secrets and configuration
Multiple platforms offer easy deployment (Railway, Render, Fly.io)
Structured logging enables log aggregation and analysis
Security measures like rate limiting protect your API

Practice Exercise

Deploy your assistant to one of the platforms
Set up a custom domain
Add request logging with timestamps and durations
Implement API key authentication for the endpoints
Create a simple web frontend that connects to your API

Next Steps

Your assistant is deployed! In the final lesson, you will review the code, add tests, and discuss improvements.

Continue to Lesson 6.6: Code Review and Improvements