Database Design
Learn the fundamentals of database design, from conceptual modeling to physical implementation, with best practices for modern applications.
Overview
Database design is the process of creating a detailed data model of a database. This tutorial covers relational database design principles, normalization, indexing strategies, and modern database patterns.
Database Design Process
1. Requirements Analysis
- Identify Entities: What objects does your system need to track?
- Define Relationships: How do entities relate to each other?
- Determine Attributes: What properties does each entity have?
- Specify Constraints: What rules must the data follow?
2. Conceptual Design
Create an Entity-Relationship (ER) diagram:
[User] ----< has >---- [Order] ----< contains >---- [Product]
| | |
| | |
v v v
[Profile] [Payment] [Category]
3. Logical Design
Transform the ER diagram into a relational schema:
sql
-- Users table
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- User profiles table
CREATE TABLE user_profiles (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
first_name VARCHAR(100),
last_name VARCHAR(100),
phone VARCHAR(20),
address TEXT,
UNIQUE(user_id)
);
-- Categories table
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
description TEXT,
parent_id INTEGER REFERENCES categories(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Products table
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description TEXT,
price DECIMAL(10,2) NOT NULL,
category_id INTEGER REFERENCES categories(id),
stock_quantity INTEGER DEFAULT 0,
is_active BOOLEAN DEFAULT true,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Orders table
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
total_amount DECIMAL(10,2) NOT NULL,
status VARCHAR(50) DEFAULT 'pending',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Order items table
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER REFERENCES orders(id) ON DELETE CASCADE,
product_id INTEGER REFERENCES products(id),
quantity INTEGER NOT NULL,
unit_price DECIMAL(10,2) NOT NULL,
total_price DECIMAL(10,2) NOT NULL
);
Normalization
First Normal Form (1NF)
Eliminate repeating groups and ensure atomic values:
sql
-- Bad: Repeating groups
CREATE TABLE customers_bad (
id INTEGER,
name VARCHAR(100),
phone1 VARCHAR(20),
phone2 VARCHAR(20),
phone3 VARCHAR(20)
);
-- Good: Separate table for phone numbers
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL
);
CREATE TABLE customer_phones (
id SERIAL PRIMARY KEY,
customer_id INTEGER REFERENCES customers(id),
phone VARCHAR(20) NOT NULL,
type VARCHAR(20) -- 'home', 'work', 'mobile'
);
Second Normal Form (2NF)
Eliminate partial dependencies:
sql
-- Bad: Partial dependency
CREATE TABLE order_details_bad (
order_id INTEGER,
product_id INTEGER,
product_name VARCHAR(255), -- Depends only on product_id
quantity INTEGER,
unit_price DECIMAL(10,2),
PRIMARY KEY (order_id, product_id)
);
-- Good: Remove partial dependency
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
price DECIMAL(10,2) NOT NULL
);
CREATE TABLE order_items (
order_id INTEGER,
product_id INTEGER REFERENCES products(id),
quantity INTEGER NOT NULL,
unit_price DECIMAL(10,2) NOT NULL,
PRIMARY KEY (order_id, product_id)
);
Third Normal Form (3NF)
Eliminate transitive dependencies:
sql
-- Bad: Transitive dependency
CREATE TABLE employees_bad (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
department_id INTEGER,
department_name VARCHAR(100), -- Depends on department_id
department_budget DECIMAL(10,2) -- Depends on department_id
);
-- Good: Separate department information
CREATE TABLE departments (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
budget DECIMAL(10,2)
);
CREATE TABLE employees (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
department_id INTEGER REFERENCES departments(id)
);
Indexing Strategies
Primary Indexes
sql
-- Automatically created with PRIMARY KEY
CREATE TABLE users (
id SERIAL PRIMARY KEY, -- Creates unique index on id
email VARCHAR(255) UNIQUE NOT NULL -- Creates unique index on email
);
Secondary Indexes
sql
-- Single column index
CREATE INDEX idx_users_email ON users(email);
-- Composite index
CREATE INDEX idx_orders_user_date ON orders(user_id, created_at);
-- Partial index
CREATE INDEX idx_active_products ON products(name) WHERE is_active = true;
-- Functional index
CREATE INDEX idx_users_lower_email ON users(LOWER(email));
Index Performance Analysis
sql
-- Analyze query performance
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user@example.com';
-- Check index usage
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
Relationships and Constraints
One-to-One Relationship
sql
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL
);
CREATE TABLE user_profiles (
id SERIAL PRIMARY KEY,
user_id INTEGER UNIQUE REFERENCES users(id) ON DELETE CASCADE,
first_name VARCHAR(100),
last_name VARCHAR(100)
);
One-to-Many Relationship
sql
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL
);
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
category_id INTEGER REFERENCES categories(id)
);
Many-to-Many Relationship
sql
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL
);
CREATE TABLE tags (
id SERIAL PRIMARY KEY,
name VARCHAR(100) UNIQUE NOT NULL
);
-- Junction table
CREATE TABLE product_tags (
product_id INTEGER REFERENCES products(id) ON DELETE CASCADE,
tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY (product_id, tag_id)
);
Advanced Constraints
sql
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
total_amount DECIMAL(10,2) NOT NULL,
status VARCHAR(50) DEFAULT 'pending',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
-- Check constraints
CONSTRAINT chk_positive_amount CHECK (total_amount > 0),
CONSTRAINT chk_valid_status CHECK (status IN ('pending', 'processing', 'shipped', 'delivered', 'cancelled'))
);
-- Unique constraint on multiple columns
ALTER TABLE order_items
ADD CONSTRAINT uk_order_product UNIQUE (order_id, product_id);
Advanced Database Patterns
Soft Delete Pattern
sql
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
price DECIMAL(10,2) NOT NULL,
is_deleted BOOLEAN DEFAULT false,
deleted_at TIMESTAMP NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Create view for active products
CREATE VIEW active_products AS
SELECT * FROM products WHERE is_deleted = false;
-- Soft delete function
CREATE OR REPLACE FUNCTION soft_delete_product(product_id INTEGER)
RETURNS VOID AS $$
BEGIN
UPDATE products
SET is_deleted = true, deleted_at = CURRENT_TIMESTAMP
WHERE id = product_id;
END;
$$ LANGUAGE plpgsql;
Audit Trail Pattern
sql
CREATE TABLE audit_log (
id SERIAL PRIMARY KEY,
table_name VARCHAR(100) NOT NULL,
record_id INTEGER NOT NULL,
action VARCHAR(20) NOT NULL, -- INSERT, UPDATE, DELETE
old_values JSONB,
new_values JSONB,
user_id INTEGER,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Trigger function for audit logging
CREATE OR REPLACE FUNCTION audit_trigger_function()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'INSERT' THEN
INSERT INTO audit_log (table_name, record_id, action, new_values, user_id)
VALUES (TG_TABLE_NAME, NEW.id, 'INSERT', row_to_json(NEW), current_setting('app.user_id', true)::INTEGER);
RETURN NEW;
ELSIF TG_OP = 'UPDATE' THEN
INSERT INTO audit_log (table_name, record_id, action, old_values, new_values, user_id)
VALUES (TG_TABLE_NAME, NEW.id, 'UPDATE', row_to_json(OLD), row_to_json(NEW), current_setting('app.user_id', true)::INTEGER);
RETURN NEW;
ELSIF TG_OP = 'DELETE' THEN
INSERT INTO audit_log (table_name, record_id, action, old_values, user_id)
VALUES (TG_TABLE_NAME, OLD.id, 'DELETE', row_to_json(OLD), current_setting('app.user_id', true)::INTEGER);
RETURN OLD;
END IF;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-- Apply trigger to tables
CREATE TRIGGER audit_trigger_users
AFTER INSERT OR UPDATE OR DELETE ON users
FOR EACH ROW EXECUTE FUNCTION audit_trigger_function();
Hierarchical Data Pattern
sql
-- Adjacency List Model
CREATE TABLE categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
parent_id INTEGER REFERENCES categories(id),
level INTEGER DEFAULT 0,
path VARCHAR(500) -- Store full path like '/electronics/computers/laptops'
);
-- Nested Set Model
CREATE TABLE categories_nested (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
lft INTEGER NOT NULL,
rgt INTEGER NOT NULL,
level INTEGER DEFAULT 0
);
-- Query all descendants
SELECT * FROM categories_nested
WHERE lft > (SELECT lft FROM categories_nested WHERE id = 1)
AND rgt < (SELECT rgt FROM categories_nested WHERE id = 1);
Database Performance Optimization
Query Optimization
sql
-- Use EXPLAIN to analyze queries
EXPLAIN (ANALYZE, BUFFERS)
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE u.created_at >= '2023-01-01'
GROUP BY u.id, u.name
HAVING COUNT(o.id) > 5;
-- Optimize with proper indexing
CREATE INDEX idx_users_created_at ON users(created_at);
CREATE INDEX idx_orders_user_id ON orders(user_id);
Partitioning
sql
-- Range partitioning by date
CREATE TABLE orders (
id SERIAL,
user_id INTEGER,
total_amount DECIMAL(10,2),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
) PARTITION BY RANGE (created_at);
-- Create partitions
CREATE TABLE orders_2023_q1 PARTITION OF orders
FOR VALUES FROM ('2023-01-01') TO ('2023-04-01');
CREATE TABLE orders_2023_q2 PARTITION OF orders
FOR VALUES FROM ('2023-04-01') TO ('2023-07-01');
Connection Pooling
javascript
// Node.js with pg-pool
const { Pool } = require('pg')
const pool = new Pool({
user: 'username',
host: 'localhost',
database: 'myapp',
password: 'password',
port: 5432,
max: 20, // Maximum number of connections
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
})
// Use pool for queries
async function getUser(id) {
const client = await pool.connect()
try {
const result = await client.query('SELECT * FROM users WHERE id = $1', [id])
return result.rows[0]
} finally {
client.release()
}
}
Data Modeling Best Practices
Naming Conventions
sql
-- Table names: plural, lowercase, snake_case
CREATE TABLE user_profiles (
-- Column names: lowercase, snake_case
id SERIAL PRIMARY KEY,
user_id INTEGER,
first_name VARCHAR(100),
last_name VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Index names: descriptive with prefix
CREATE INDEX idx_user_profiles_user_id ON user_profiles(user_id);
-- Constraint names: descriptive with prefix
ALTER TABLE user_profiles
ADD CONSTRAINT fk_user_profiles_user_id
FOREIGN KEY (user_id) REFERENCES users(id);
Data Types Selection
sql
CREATE TABLE products (
id SERIAL PRIMARY KEY,
-- Use appropriate string lengths
name VARCHAR(255) NOT NULL, -- Variable length for names
sku CHAR(10) NOT NULL, -- Fixed length for SKUs
description TEXT, -- Unlimited text
-- Numeric types
price DECIMAL(10,2) NOT NULL, -- Exact decimal for money
weight REAL, -- Floating point for measurements
stock_quantity INTEGER DEFAULT 0,
-- Date and time
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
expires_at DATE, -- Date only
-- Boolean
is_active BOOLEAN DEFAULT true,
-- JSON for flexible data
metadata JSONB,
-- Arrays
tags TEXT[],
-- UUID for distributed systems
uuid UUID DEFAULT gen_random_uuid()
);
Denormalization Strategies
sql
-- Calculated fields for performance
CREATE TABLE order_summary (
order_id INTEGER PRIMARY KEY REFERENCES orders(id),
item_count INTEGER,
total_amount DECIMAL(10,2),
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Trigger to maintain denormalized data
CREATE OR REPLACE FUNCTION update_order_summary()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO order_summary (order_id, item_count, total_amount)
SELECT
NEW.order_id,
COUNT(*),
SUM(total_price)
FROM order_items
WHERE order_id = NEW.order_id
ON CONFLICT (order_id) DO UPDATE SET
item_count = EXCLUDED.item_count,
total_amount = EXCLUDED.total_amount,
last_updated = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trigger_update_order_summary
AFTER INSERT OR UPDATE OR DELETE ON order_items
FOR EACH ROW EXECUTE FUNCTION update_order_summary();
Modern Database Patterns
Event Sourcing
sql
CREATE TABLE events (
id SERIAL PRIMARY KEY,
aggregate_id UUID NOT NULL,
aggregate_type VARCHAR(100) NOT NULL,
event_type VARCHAR(100) NOT NULL,
event_data JSONB NOT NULL,
event_version INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(aggregate_id, event_version)
);
-- Create index for event retrieval
CREATE INDEX idx_events_aggregate ON events(aggregate_id, event_version);
-- Example: User aggregate events
INSERT INTO events (aggregate_id, aggregate_type, event_type, event_data, event_version)
VALUES
('550e8400-e29b-41d4-a716-446655440000', 'User', 'UserCreated',
'{"email": "user@example.com", "name": "John Doe"}', 1),
('550e8400-e29b-41d4-a716-446655440000', 'User', 'EmailChanged',
'{"old_email": "user@example.com", "new_email": "john@example.com"}', 2);
CQRS (Command Query Responsibility Segregation)
sql
-- Write model (normalized)
CREATE TABLE users_write (
id UUID PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Read model (denormalized for queries)
CREATE TABLE users_read (
id UUID PRIMARY KEY,
email VARCHAR(255),
full_name VARCHAR(200),
order_count INTEGER DEFAULT 0,
total_spent DECIMAL(10,2) DEFAULT 0,
last_order_date TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
Database Security
Access Control
sql
-- Create roles
CREATE ROLE app_read;
CREATE ROLE app_write;
CREATE ROLE app_admin;
-- Grant permissions
GRANT SELECT ON ALL TABLES IN SCHEMA public TO app_read;
GRANT SELECT, INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO app_write;
GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO app_admin;
-- Create application user
CREATE USER app_user WITH PASSWORD 'secure_password';
GRANT app_write TO app_user;
Row Level Security
sql
-- Enable RLS
ALTER TABLE orders ENABLE ROW LEVEL SECURITY;
-- Create policy
CREATE POLICY user_orders_policy ON orders
FOR ALL TO app_user
USING (user_id = current_setting('app.user_id')::INTEGER);
-- Set user context in application
SET app.user_id = 123;
Data Encryption
sql
-- Install pgcrypto extension
CREATE EXTENSION IF NOT EXISTS pgcrypto;
-- Encrypt sensitive data
CREATE TABLE user_secrets (
id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
encrypted_ssn BYTEA, -- Store encrypted data
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert encrypted data
INSERT INTO user_secrets (user_id, encrypted_ssn)
VALUES (1, pgp_sym_encrypt('123-45-6789', 'encryption_key'));
-- Query encrypted data
SELECT user_id, pgp_sym_decrypt(encrypted_ssn, 'encryption_key') as ssn
FROM user_secrets WHERE user_id = 1;
Testing Database Design
Unit Tests
javascript
// Jest test example
const { Pool } = require('pg')
const pool = new Pool({ connectionString: process.env.TEST_DATABASE_URL })
describe('User Database Operations', () => {
beforeEach(async () => {
await pool.query('TRUNCATE users, user_profiles CASCADE')
})
afterAll(async () => {
await pool.end()
})
test('should create user with profile', async () => {
const userResult = await pool.query(
'INSERT INTO users (email, password_hash) VALUES ($1, $2) RETURNING id',
['test@example.com', 'hashed_password']
)
const userId = userResult.rows[0].id
await pool.query(
'INSERT INTO user_profiles (user_id, first_name, last_name) VALUES ($1, $2, $3)',
[userId, 'John', 'Doe']
)
const profileResult = await pool.query(
'SELECT * FROM user_profiles WHERE user_id = $1',
[userId]
)
expect(profileResult.rows).toHaveLength(1)
expect(profileResult.rows[0].first_name).toBe('John')
})
})
Performance Testing
sql
-- Generate test data
INSERT INTO users (email, password_hash)
SELECT
'user' || i || '@example.com',
'hashed_password_' || i
FROM generate_series(1, 100000) i;
-- Test query performance
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM users WHERE email LIKE 'user1%';
-- Monitor query performance
SELECT
query,
calls,
total_time,
mean_time,
rows
FROM pg_stat_statements
ORDER BY total_time DESC
LIMIT 10;
Migration Strategies
Schema Versioning
sql
-- Migration tracking table
CREATE TABLE schema_migrations (
version VARCHAR(20) PRIMARY KEY,
applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Example migration script
-- Migration: 001_create_users_table.sql
BEGIN;
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO schema_migrations (version) VALUES ('001');
COMMIT;
Zero-Downtime Migrations
sql
-- Step 1: Add new column (nullable)
ALTER TABLE users ADD COLUMN full_name VARCHAR(200);
-- Step 2: Populate new column
UPDATE users SET full_name = first_name || ' ' || last_name;
-- Step 3: Make column NOT NULL (after application deployment)
ALTER TABLE users ALTER COLUMN full_name SET NOT NULL;
-- Step 4: Drop old columns (after confirming everything works)
ALTER TABLE users DROP COLUMN first_name, DROP COLUMN last_name;
This comprehensive database design tutorial covers fundamental concepts through advanced patterns. Apply these principles based on your specific application requirements and scale.