Log Aggregation Overview
Build comprehensive log aggregation systems to collect, parse, and analyze logs from multiple sources, enabling centralized monitoring, debugging, and compliance auditing.
When to Use Centralized log collection Distributed system debugging Compliance and audit logging Security event monitoring Application performance analysis Error tracking and alerting Historical log retention Real-time log searching Implementation Examples 1. ELK Stack Configuration
docker-compose.yml - ELK Stack setup
version: '3.8'
services: elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:8.5.0 environment: - discovery.type=single-node - xpack.security.enabled=false - "ES_JAVA_OPTS=-Xms512m -Xmx512m" ports: - "9200:9200" volumes: - elasticsearch_data:/usr/share/elasticsearch/data healthcheck: test: curl -s http://localhost:9200 >/dev/null || exit 1 interval: 10s timeout: 5s retries: 5
logstash: image: docker.elastic.co/logstash/logstash:8.5.0 volumes: - ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf ports: - "5000:5000" - "9600:9600" depends_on: - elasticsearch environment: - "LS_JAVA_OPTS=-Xmx256m -Xms256m"
kibana: image: docker.elastic.co/kibana/kibana:8.5.0 ports: - "5601:5601" environment: - ELASTICSEARCH_URL=http://elasticsearch:9200 depends_on: - elasticsearch
filebeat: image: docker.elastic.co/beats/filebeat:8.5.0 volumes: - ./filebeat.yml:/usr/share/filebeat/filebeat.yml - /var/lib/docker/containers:/var/lib/docker/containers:ro - /var/run/docker.sock:/var/run/docker.sock:ro command: filebeat -e -strict.perms=false depends_on: - elasticsearch
volumes: elasticsearch_data:
- Logstash Pipeline Configuration
logstash.conf
input { # Receive logs via TCP/UDP tcp { port => 5000 codec => json }
# Read from files file { path => "/var/log/app/*.log" start_position => "beginning" codec => multiline { pattern => "^%{TIMESTAMP_ISO8601}" negate => true what => "previous" } }
# Read from Kubernetes kubernetes { kubernetes_url => "https://kubernetes.default" ca_file => "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" } }
filter { # Parse JSON logs json { source => "message" target => "parsed" }
# Extract fields grok { match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} [%{LOGLEVEL:level}] %{GREEDYDATA:message}" } }
# Add timestamp date { match => ["timestamp", "ISO8601"] target => "@timestamp" }
# Add metadata mutate { add_field => { "environment" => "production" "datacenter" => "us-east-1" } remove_field => ["host"] }
# Drop debug logs in production if [level] == "DEBUG" { drop { } }
# Tag errors if [level] =~ /ERROR|FATAL/ { mutate { add_tag => ["error"] } } }
output { # Send to Elasticsearch elasticsearch { hosts => ["elasticsearch:9200"] index => "logs-%{+YYYY.MM.dd}" document_type => "_doc" }
# Also output errors to console if "error" in [tags] { stdout { codec => rubydebug } } }
- Filebeat Configuration
filebeat.yml
filebeat.inputs: - type: log enabled: true paths: - /var/log/app/*.log fields: app: myapp environment: production multiline.pattern: '^[' multiline.negate: true multiline.match: after
-
type: docker enabled: true hints.enabled: true hints.default_config: enabled: true type: container paths: - /var/lib/docker/containers/${data.docker.container.id}/*.log
-
type: log enabled: true paths:
- /var/log/syslog
- /var/log/auth.log fields: service: system environment: production
processors: - add_docker_metadata: host: "unix:///var/run/docker.sock" - add_kubernetes_metadata: in_cluster: true - add_host_metadata: - add_fields: target: '' fields: environment: production
output.elasticsearch: hosts: ["elasticsearch:9200"] index: "filebeat-%{+yyyy.MM.dd}"
logging.level: info logging.to_files: true logging.files: path: /var/log/filebeat name: filebeat keepfiles: 7 permissions: 0640
-
Kibana Dashboard and Alerts { "dashboard": { "title": "Application Logs Overview", "panels": [ { "title": "Error Rate by Service", "query": "level: ERROR", "visualization": "bar_chart", "groupBy": ["service"], "timeRange": "1h" }, { "title": "Top 10 Error Messages", "query": "level: ERROR", "visualization": "table", "fields": ["message", "count"], "sort": [{"count": "desc"}], "size": 10 }, { "title": "Request Latency Distribution", "query": "duration: *", "visualization": "histogram" }, { "title": "Errors Over Time", "query": "level: ERROR", "visualization": "line_chart", "dateHistogram": "1m" } ] }, "alerts": [ { "name": "High Error Rate", "query": "level: ERROR", "threshold": 100, "window": "5m", "action": "slack" }, { "name": "Critical Exceptions", "query": "level: FATAL", "threshold": 1, "window": "1m", "action": "email" } ] }
-
Loki Configuration (Kubernetes)
loki-config.yaml
apiVersion: v1 kind: ConfigMap metadata: name: loki-config namespace: logging data: loki-config.yaml: | auth_enabled: false
ingester:
chunk_idle_period: 3m
chunk_retain_period: 1m
max_chunk_age: 1h
chunk_encoding: snappy
chunk_target_size: 1048576
limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
schema_config:
configs:
- from: 2020-05-15
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
server:
http_listen_port: 3100
storage_config:
boltdb_shipper:
active_index_directory: /loki/index
cache_location: /loki/cache
shared_store: filesystem
filesystem:
directory: /loki/chunks
apiVersion: apps/v1 kind: Deployment metadata: name: loki namespace: logging spec: replicas: 1 selector: matchLabels: app: loki template: metadata: labels: app: loki spec: containers: - name: loki image: grafana/loki:2.8.0 ports: - containerPort: 3100 volumeMounts: - name: loki-config mountPath: /etc/loki - name: loki-storage mountPath: /loki args: - -config.file=/etc/loki/loki-config.yaml volumes: - name: loki-config configMap: name: loki-config - name: loki-storage emptyDir: {}
apiVersion: v1 kind: Service metadata: name: loki namespace: logging spec: selector: app: loki ports: - port: 3100 targetPort: 3100
- Log Aggregation Deployment Script
!/bin/bash
deploy-logging.sh - Deploy logging infrastructure
set -euo pipefail
NAMESPACE="logging" ENV="${1:-production}"
echo "Deploying logging stack to $ENV..."
Create namespace
kubectl create namespace "$NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
Deploy Elasticsearch
echo "Deploying Elasticsearch..." kubectl apply -f elasticsearch-deployment.yaml -n "$NAMESPACE" kubectl rollout status deployment/elasticsearch -n "$NAMESPACE" --timeout=5m
Deploy Logstash
echo "Deploying Logstash..." kubectl apply -f logstash-deployment.yaml -n "$NAMESPACE" kubectl rollout status deployment/logstash -n "$NAMESPACE" --timeout=5m
Deploy Kibana
echo "Deploying Kibana..." kubectl apply -f kibana-deployment.yaml -n "$NAMESPACE" kubectl rollout status deployment/kibana -n "$NAMESPACE" --timeout=5m
Deploy Filebeat as DaemonSet
echo "Deploying Filebeat..." kubectl apply -f filebeat-daemonset.yaml -n "$NAMESPACE"
Wait for all pods
echo "Waiting for all logging services..." kubectl wait --for=condition=ready pod -l app=elasticsearch -n "$NAMESPACE" --timeout=300s
Create default index pattern
echo "Setting up Kibana index pattern..." kubectl exec -it -n "$NAMESPACE" svc/kibana -- curl -X POST \ http://localhost:5601/api/saved_objects/index-pattern/logs \ -H 'kbn-xsrf: true' \ -H 'Content-Type: application/json' \ -d '{"attributes":{"title":"logs-*","timeFieldName":"@timestamp"}}'
echo "Logging stack deployed successfully!" echo "Kibana: http://localhost:5601"
Best Practices ✅ DO Parse and structure log data Use appropriate log levels Add contextual information Implement log retention policies Set up log-based alerting Index important fields Use consistent timestamp formats Implement access controls ❌ DON'T Store sensitive data in logs Log at DEBUG level in production Send raw unstructured logs Ignore storage costs Skip log parsing Lack monitoring of log systems Store logs forever Log PII without encryption Resources Elasticsearch Documentation Logstash Documentation Kibana Documentation Loki Documentation