From cd2bbf09d84c7bc36046804f4b45b868b632040e Mon Sep 17 00:00:00 2001 From: Jihyeok Jeong Date: Wed, 16 Oct 2024 23:40:07 +0900 Subject: [PATCH 01/10] chore: Add Grafana, Prometheus, Node_exporter, Loki, Promtail images --- docker-compose.dev.yml | 65 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index e373e4c..75a9d64 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -37,9 +37,74 @@ services: depends_on: - backend + + grafana: + image: grafana/grafana-enterprise:11.2.1-ubuntu + container_name: grafana + volumes: + - ./backend/app/monitoring/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources + - grafana_storage:/var/lib/grafana + env_file: + - ./backend/.env.monitoring + ports: + - "3001:3000" + networks: + - app-network + depends_on: + - backend + - prometheus + - loki + restart: unless-stopped + + prometheus: + image: prom/prometheus:v2.46.0 + container_name: prometheus + ports: + - "9090:9090" + networks: + - app-network + volumes: + - ./backend/app/monitoring/prometheus:/workdir/backend/app/monitoring/prometheus + command: + - "--config.file=/workdir/backend/app/monitoring/prometheus/prometheus.yml" + restart: unless-stopped + + node_exporter: + image: prom/node-exporter:v1.8.2 + container_name: node_exporter + ports: + - "9100:9100" + networks: + - app-network + + loki: + image: grafana/loki:2.8.0 + container_name: loki + ports: + - "3100:3100" + networks: + - app-network + volumes: + - ./backend/app/monitoring/loki:/workdir/backend/app/monitoring/loki + command: -config.file=/workdir/backend/app/monitoring/loki/local-config.yaml + restart: always + + promtail: + image: grafana/promtail:2.8.0 + container_name: promtail + networks: + - app-network + volumes: + - ./backend/app/monitoring/promtail:/workdir/backend/app/monitoring/promtail + - ./backend/app/monitoring/promtail:/workdir/backend/app/monitoring/promtail + - ./backend/app/monitoring/logs:/workdir/backend/app/monitoring/logs + command: -config.file=/workdir/backend/app/monitoring/promtail/config.yml + restart: always + networks: app-network: driver: bridge volumes: pgdata: + grafana_storage: {} From 74216e142c1f43a70fa498b3aed3ddf45b5f1a31 Mon Sep 17 00:00:00 2001 From: Jihyeok Jeong Date: Wed, 16 Oct 2024 23:43:43 +0900 Subject: [PATCH 02/10] feat: Add logging code that records on a daily basis --- backend/.gitignore | 1 + backend/app/llm/langchain.py | 2 ++ backend/app/utils/logging.py | 27 ++++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/backend/.gitignore b/backend/.gitignore index 7dc4a48..387d3b7 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -28,6 +28,7 @@ alembic.ini # logging *.log +logs # security *.pem \ No newline at end of file diff --git a/backend/app/llm/langchain.py b/backend/app/llm/langchain.py index 6f2b992..4bf567a 100644 --- a/backend/app/llm/langchain.py +++ b/backend/app/llm/langchain.py @@ -16,6 +16,7 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter from app.llm.message_task import store +from app.utils.logging import logger def get_rag_chain(document_text: str): @@ -115,6 +116,7 @@ def get_langchain_response(user_message: str, chat_room_id: int): ) except Exception as e: + logger.error("Error generating answer: ", exc_info={e}) raise RuntimeError(f"Error generating answer: {e}") diff --git a/backend/app/utils/logging.py b/backend/app/utils/logging.py index d1e4aef..c32c4a8 100644 --- a/backend/app/utils/logging.py +++ b/backend/app/utils/logging.py @@ -1,6 +1,10 @@ +import os import logging -from logging.handlers import RotatingFileHandler +from logging.handlers import TimedRotatingFileHandler from datetime import datetime, timedelta, timezone +from app.utils.file_system import PROJECT_ROOT +from app.utils.file_system import ensure_dir +from app.utils.file_system import combine_relative_path KST = timezone(timedelta(hours=9)) @@ -24,13 +28,30 @@ def format(self, record): return f"\n{divide}\n\n{log_message}" +def get_folder_path(target_path, dt=None, type='error'): + if dt is None: + dt = datetime.now(KST) + log_dir = combine_relative_path(PROJECT_ROOT, + target_path, + dt.strftime('%Y-%m-%d')) + ensure_dir(log_dir) + return log_dir + os.sep + type + '.log' + + LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" LOG_LEVEL = logging.DEBUG -LOG_FILE = "app.log" +LOG_FILE = get_folder_path('app/monitoring/logs/backend') formatter = KSTFormatter(LOG_FORMAT) -file_handler = RotatingFileHandler(LOG_FILE) +# 핸들러 설정 (날짜별 회전 설정) +file_handler = TimedRotatingFileHandler( + LOG_FILE, + when="midnight", + interval=1, + backupCount=30, + utc=False +) file_handler.setLevel(LOG_LEVEL) file_handler.setFormatter(formatter) From 93826f00d914b50935bffa90c514c65d8ffd6449 Mon Sep 17 00:00:00 2001 From: Jihyeok Jeong Date: Wed, 16 Oct 2024 23:47:11 +0900 Subject: [PATCH 03/10] feat: Add file system utilities module for project path management --- backend/app/utils/file_system.py | 54 ++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 backend/app/utils/file_system.py diff --git a/backend/app/utils/file_system.py b/backend/app/utils/file_system.py new file mode 100644 index 0000000..302b0f0 --- /dev/null +++ b/backend/app/utils/file_system.py @@ -0,0 +1,54 @@ +""" +File System Utilities Module + +This module provides file system-related utility functions +such as file and directory path management, creation and deletion +""" + +import os + + +def find_project_root(): + """프로젝트 루트 디렉토리를 찾기 위한 함수.""" + current_path = os.path.abspath(os.path.dirname(__file__)) + while current_path: + # 여기서 'pyproject.toml', 'setup.py', 또는 '.git' 등 프로젝트의 root임을 식별할 수 있는 파일이나 디렉터리를 확인합니다. + if 'pyproject.toml' in os.listdir(current_path) or '.git' in os.listdir(current_path): + return current_path + parent_path = os.path.dirname(current_path) + if parent_path == current_path: # 루트 디렉토리에 도달했거나 더 이상 올라갈 곳이 없는 경우 + break + current_path = parent_path + return None # 만약 프로젝트 루트를 찾지 못하면 None을 반환 + + +PROJECT_ROOT = find_project_root() + + +def get_relative_path(relative_path): + """프로젝트 루트를 기준으로 상대 경로를 절대 경로로 변환.""" + if not PROJECT_ROOT: + raise EnvironmentError("프로젝트 루트를 찾을 수 없습니다.") + return os.path.join(PROJECT_ROOT, relative_path) + + +def get_current_module_path(): + """현재 이 모듈이 위치한 경로를 반환.""" + return os.path.dirname(os.path.abspath(__file__)) + + +def combine_relative_path(*args): + """여러 경로를 결합하고, 상대 경로를 절대 경로로 변환.""" + return os.path.abspath(os.path.join(*args)) + + +def ensure_dir(path): + """디렉토리가 존재하지 않으면 생성.""" + os.makedirs(path, exist_ok=True) + return path + + +def is_path_within_project(path): + """주어진 경로가 프로젝트 경로 내에 있는지 확인.""" + return os.path.commonpath([PROJECT_ROOT, os.path.abspath(path)]) == PROJECT_ROOT + From 72063a037427dee0c264d65c081205572bead6d5 Mon Sep 17 00:00:00 2001 From: Jihyeok Jeong Date: Wed, 16 Oct 2024 23:51:20 +0900 Subject: [PATCH 04/10] chore: Exclude Loki's chunks, compactor, rules, WAL, and TSDB-related directories from version control --- backend/.gitignore | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/.gitignore b/backend/.gitignore index 387d3b7..0dc0396 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -31,4 +31,12 @@ alembic.ini logs # security -*.pem \ No newline at end of file +*.pem + +# loki +chunks +compactor +rules +tsdb-shipper-active +tsdb-shipper-cache +wal \ No newline at end of file From b628990474a2bd5381700d9d91623cde1d2f5b8c Mon Sep 17 00:00:00 2001 From: Jihyeok Jeong Date: Wed, 16 Oct 2024 23:52:15 +0900 Subject: [PATCH 05/10] feat: Add configuration code for Grafana dashboards --- backend/app/main.py | 1 + .../provisioning/datasources/datasources.yaml | 27 +++++++++++++++ backend/app/monitoring/loki/local-config.yaml | 33 +++++++++++++++++++ .../app/monitoring/prometheus/prometheus.yml | 22 +++++++++++++ backend/app/monitoring/promtail/config.yml | 32 ++++++++++++++++++ .../app/monitoring/promtail/positions.yaml | 4 +++ 6 files changed, 119 insertions(+) create mode 100644 backend/app/monitoring/grafana/provisioning/datasources/datasources.yaml create mode 100644 backend/app/monitoring/loki/local-config.yaml create mode 100644 backend/app/monitoring/prometheus/prometheus.yml create mode 100644 backend/app/monitoring/promtail/config.yml create mode 100644 backend/app/monitoring/promtail/positions.yaml diff --git a/backend/app/main.py b/backend/app/main.py index 5322007..3bb225b 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -19,6 +19,7 @@ async def lifespan(app: FastAPI): origins = [ "http://localhost:3000", + "http://localhost:3001", ] diff --git a/backend/app/monitoring/grafana/provisioning/datasources/datasources.yaml b/backend/app/monitoring/grafana/provisioning/datasources/datasources.yaml new file mode 100644 index 0000000..89f53b8 --- /dev/null +++ b/backend/app/monitoring/grafana/provisioning/datasources/datasources.yaml @@ -0,0 +1,27 @@ +apiVersion: 1 + +datasources: +- name: Loki + access: proxy + type: loki + url: http://loki:3100 + isDefault: false + database: '' + user: '' + password: '' + basicAuth: false + id: 1 + orgId: 1 + readOnly: false + jsonData: + keepCookies: [] + typeLogoUrl: public/app/plugins/datasource/loki/img/loki_icon.svg +- name: Prometheus + access: proxy + type: prometheus + url: http://prometheus:9090 + isDefault: true + orgId: 1 + editable: false + jsonData: + timeInterval: 10s \ No newline at end of file diff --git a/backend/app/monitoring/loki/local-config.yaml b/backend/app/monitoring/loki/local-config.yaml new file mode 100644 index 0000000..2df8a70 --- /dev/null +++ b/backend/app/monitoring/loki/local-config.yaml @@ -0,0 +1,33 @@ +# 사용자 인증 활성화 여부 +auth_enabled: false + +# Loki 서버의 포트번호 +server: + http_listen_port: 3100 + +# Loki 인스턴스의 주소 +common: + instance_addr: 127.0.0.1 + path_prefix: /workdir/backend/app/monitoring/loki + storage: + filesystem: # 파일시스템 기반 저장소를 사용 + chunks_directory: /workdir/backend/app/monitoring/loki/chunks + rules_directory: /workdir/backend/app/monitoring/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +# Loki의 스키마 설정 +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v12 + index: + prefix: index_ + period: 24h + +ruler: + alertmanager_url: http://localhost:9093 diff --git a/backend/app/monitoring/prometheus/prometheus.yml b/backend/app/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000..c23e2f3 --- /dev/null +++ b/backend/app/monitoring/prometheus/prometheus.yml @@ -0,0 +1,22 @@ +global: + scrape_interval: 15s # 15초마다 데이터 가지고옴 / default = 1m + scrape_timeout: 15s # 데이터를 가지고 오는 작업을 15초간 기다릴 수 있음 / default = 10s + evaluation_interval: 2m # 알람 규칙이 평가되는 빈도 (2분에 한번) / default = 1m + + external_labels: + monitor: 'backend-monitor' + +scrape_configs: + - job_name: 'backend' + scrape_interval: 15s + scrape_timeout: 10s + honor_labels: false + honor_timestamps: false + scheme: 'https' + static_configs: + - targets: ['backend:8000'] + + - job_name: 'node_exporter' + scrape_interval: 5s + static_configs: + - targets: ['node_exporter:9100'] diff --git a/backend/app/monitoring/promtail/config.yml b/backend/app/monitoring/promtail/config.yml new file mode 100644 index 0000000..cbbea32 --- /dev/null +++ b/backend/app/monitoring/promtail/config.yml @@ -0,0 +1,32 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /workdir/backend/app/monitoring/promtail/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: backend + static_configs: + - targets: + - backend:8000 + labels: + job: backend_logs + __path__: /workdir/backend/app/monitoring/logs/backend/**/**/*.log + - job_name: frontend + static_configs: + - targets: + - frontend:3000 + labels: + job: frontend_logs + __path__: /workdir/backend/app/monitoring/logs/frontend/**/*.log + - job_name: db + static_configs: + - targets: + - db:5432 + labels: + job: db_logs + __path__: /workdir/backend/app/monitoring/logs/db/**/*.log \ No newline at end of file diff --git a/backend/app/monitoring/promtail/positions.yaml b/backend/app/monitoring/promtail/positions.yaml new file mode 100644 index 0000000..326926e --- /dev/null +++ b/backend/app/monitoring/promtail/positions.yaml @@ -0,0 +1,4 @@ +positions: + /workdir/backend/app/monitoring/logs/backend/2024-10-14/error.log: "1962" + /workdir/backend/app/monitoring/logs/backend/2024-10-15/error.log: "5798" + /workdir/backend/app/monitoring/logs/backend/2024-10-16/error.log: "7285" From fa588b8c6ce6773520e32302f165c211e72d070c Mon Sep 17 00:00:00 2001 From: Jihyeok Jeong Date: Tue, 5 Nov 2024 03:38:37 +0900 Subject: [PATCH 06/10] feat: Add Backend Observability --- backend/app/main.py | 36 +- backend/app/metrics/prometheus_metrics.py | 6 + .../dashboards/back_observability.json | 645 ++++++++++++++++++ backend/app/monitoring/promtail/config.yml | 18 +- backend/app/routers/metrics.py | 12 + backend/pyproject.toml | 2 + 6 files changed, 716 insertions(+), 3 deletions(-) create mode 100644 backend/app/metrics/prometheus_metrics.py create mode 100644 backend/app/monitoring/grafana/provisioning/dashboards/back_observability.json create mode 100644 backend/app/routers/metrics.py diff --git a/backend/app/main.py b/backend/app/main.py index 3bb225b..572eb3a 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,11 +1,13 @@ +import time from fastapi import FastAPI, HTTPException, Request from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from contextlib import asynccontextmanager from app.db.database import init_db from app.utils.logging import log_exception -from app.routers import auth, me, folders, chatrooms, messages, links, rating - +from app.routers import auth, me, folders, chatrooms, messages, links, rating, metrics +from app.metrics.prometheus_metrics import REQUEST_COUNT, REQUEST_DURATION, RESPONSE_STATUS +from prometheus_fastapi_instrumentator import Instrumentator @asynccontextmanager async def lifespan(app: FastAPI): @@ -31,6 +33,35 @@ async def lifespan(app: FastAPI): allow_headers=["*"], ) +instrumentator = Instrumentator().instrument(app) +instrumentator.expose(app) + +@app.middleware("http") +async def metrics_middleware(request: Request, call_next): + start_time = time.time() + response = await call_next(request) + process_time = time.time() - start_time + + # 요청 처리 시간 기록 + response.headers["X-Process-Time"] = str(process_time) + + # 요청 메트릭을 수집 (HTTP 메서드, 요청 URL) + method = request.method + path = request.url.path + + # 메트릭 수집 + method = request.method + path = request.url.path + status_code = response.status_code + + REQUEST_COUNT.labels(method=method, path=path).inc() + REQUEST_DURATION.labels(method=method, path=path).observe(process_time) + RESPONSE_STATUS.labels(method=method, path=path, status_code=status_code).inc() + + response.headers["X-Process-Time"] = str(process_time) + + return response + @app.exception_handler(HTTPException) async def custom_http_exception_handler(req: Request, exc: HTTPException): @@ -52,3 +83,4 @@ async def custom_http_exception_handler(req: Request, exc: HTTPException): app.include_router(messages.router, prefix=prefix) app.include_router(links.router, prefix=prefix) app.include_router(rating.router, prefix=prefix) +app.include_router(metrics.router) diff --git a/backend/app/metrics/prometheus_metrics.py b/backend/app/metrics/prometheus_metrics.py new file mode 100644 index 0000000..feb2642 --- /dev/null +++ b/backend/app/metrics/prometheus_metrics.py @@ -0,0 +1,6 @@ +from prometheus_client import Counter, Histogram, Gauge + +# Prometheus 메트릭 정의 (backend) +REQUEST_COUNT = Counter("request_count", "Total request count", ["method", "path"]) +REQUEST_DURATION = Histogram("request_duration_seconds", "Request duration", ["method", "path"]) +RESPONSE_STATUS = Counter('http_response_status', 'HTTP Response Status Codes', ['method', 'path', 'status_code']) diff --git a/backend/app/monitoring/grafana/provisioning/dashboards/back_observability.json b/backend/app/monitoring/grafana/provisioning/dashboards/back_observability.json new file mode 100644 index 0000000..57640ed --- /dev/null +++ b/backend/app/monitoring/grafana/provisioning/dashboards/back_observability.json @@ -0,0 +1,645 @@ +{ + "id": null, + "uid": "Backend-Observability", + "title": "Backend Observability", + "tags": ["Prometheus", "Loki"], + "timezone": "browser", + "weekStart": "monday", + "fiscalYearStartMonth": 10, + "timepicker": { + "refresh_intervals": ["30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d", "7d"] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "editable": true, + "graphTooltip": 2, + "panels": [ + { + "id": 8, + "title": "HTTP Request", + "type": "row", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + } + }, + { + "id": 14, + "title": "HTTP Status code", + "type": "piechart", + "gridPos": { + "x": 0, + "y": 1, + "w": 6, + "h": 7 + }, + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.2.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "http_response_status_total{job=\"backend\", path!~\"/(metrics/backend|metrics/postgres)\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{status_code}} {{path}}", + "range": true, + "refId": "A", + "useBackend": false + } + ] + }, + { + "id": 10, + "title": "HTTP Request Duration", + "type": "timeseries", + "gridPos": { + "x": 6, + "y": 1, + "w": 9, + "h": 7 + }, + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "http_request_duration_seconds_sum{job=\"backend\", handler!=\"/metrics\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{method}} {{handler}}", + "range": true, + "refId": "A", + "useBackend": false + } + ] + }, + { + "id": 7, + "title": "Total Request Count", + "type": "bargauge", + "gridPos": { + "x": 15, + "y": 1, + "w": 9, + "h": 7 + }, + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "options": { + "displayMode": "lcd", + "minVizHeight": 10, + "minVizWidth": 0, + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "valueMode": "color" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "request_count_total{job=\"backend\", path!~\"/(metrics/backend|metrics/postgres)\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{method}} {{path}} {{status}}", + "range": true, + "refId": "A", + "useBackend": false + } + ] + }, + { + "id": 4, + "title": "Metrics", + "type": "row", + "collapsed": false, + "gridPos": { + "x": 0, + "y": 9, + "w": 24, + "h": 1 + }, + "panels": [] + }, + { + "id": 6, + "title": "CPU usage", + "type": "timeseries", + "gridPos": { + "x": 0, + "y": 10, + "w": 10, + "h": 8 + }, + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", + "instant": false, + "legendFormat": "CPU", + "range": true, + "refId": "A" + } + ] + }, + { + "id": 5, + "title": "Memory usage", + "type": "timeseries", + "gridPos": { + "x": 10, + "y": 10, + "w": 9, + "h": 8 + }, + "datasource": { + "default": true, + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "(node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes * 100", + "legendFormat": "RAM", + "range": true, + "refId": "A" + } + ] + }, + { + "id": 1, + "title": "All logs", + "type": "row", + "collapsed": false, + "gridPos": { + "x": 0, + "y": 18, + "w": 24, + "h": 1 + } + }, + { + "id": 3, + "title": "FastAPI Logs", + "type": "logs", + "datasource": { + "default": false, + "type": "loki", + "uid": "P8E80F9AEF21F6940" + }, + "gridPos": { + "x": 0, + "y": 19, + "w": 24, + "h": 9 + }, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "P8E80F9AEF21F6940" + }, + "editorMode": "builder", + "expr": "{job=\"backend_logs\"} |= ``", + "queryType": "range", + "refId": "A" + } + ] + } + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "uid": "Loki" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Log Time", + "multi": true, + "name": "LogTime", + "options": [], + "query": "label_values(time)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "uid": "Prometheus" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "File Path", + "multi": true, + "name": "FilePath", + "options": [], + "query": "label_values(file_path)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "uid": "Loki" + }, + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Log Level", + "multi": true, + "name": "LogLevel", + "options": [], + "query": "label_values(level)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "refresh": "5s", + "schemaVersion": 38, + "version": 1, + "style": "light", + "links": [] +} \ No newline at end of file diff --git a/backend/app/monitoring/promtail/config.yml b/backend/app/monitoring/promtail/config.yml index cbbea32..d427af1 100644 --- a/backend/app/monitoring/promtail/config.yml +++ b/backend/app/monitoring/promtail/config.yml @@ -16,13 +16,29 @@ scrape_configs: labels: job: backend_logs __path__: /workdir/backend/app/monitoring/logs/backend/**/**/*.log + pipeline_stages: + - multiline: + firstline: '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}' + max_wait_time: 3s + - regex: + expression: '^(?P