From 0fb2c721f0484ad9137878dac3e28b8c136a169c Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Thu, 13 Nov 2025 20:12:45 +0300 Subject: [PATCH 01/41] fix: try to fix client timeout --- .gitignore | 2 +- Dockerfile | 2 +- README.md | 15 +- .../dashboards/ServicesStatistic.json | 691 ++++++++++++------ pom.xml | 22 +- prometheus/prometheus.yml | 8 +- .../ru/quipy/apigateway/APIController.kt | 24 +- .../apigateway/GlobalExceptionHandler.kt | 53 ++ .../common/utils/SlidingWindowRateLimiter.kt | 14 +- .../exceptions/TooLongRequestEcexption.kt | 4 + .../exceptions/TooManyRequestsException.kt | 3 + .../TooManyRequestsRetriableException.kt | 4 + .../orders/subscribers/PaymentSubscriber.kt | 3 + .../payments/config/PaymentAccountsConfig.kt | 16 +- .../ru/quipy/payments/logic/OrderPayer.kt | 8 +- .../logic/PaymentExternalServiceImpl.kt | 106 ++- src/main/resources/application.properties | 17 +- test-local-run.http | 2 +- test-on-prem-run.http | 8 +- 19 files changed, 721 insertions(+), 281 deletions(-) create mode 100644 src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt create mode 100644 src/main/kotlin/ru/quipy/exceptions/TooLongRequestEcexption.kt create mode 100644 src/main/kotlin/ru/quipy/exceptions/TooManyRequestsException.kt create mode 100644 src/main/kotlin/ru/quipy/exceptions/TooManyRequestsRetriableException.kt diff --git a/.gitignore b/.gitignore index 259113f73..ea59400a3 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,4 @@ build/ !**/src/test/**/build/ ### VS Code ### -.vscode/ +.vscode/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index cc6f2e042..3d9587cd0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ RUN mvn dependency:go-offline COPY src src RUN mvn package -FROM openjdk:17-jdk-slim +FROM eclipse-temurin:17-alpine-3.22 COPY --from=build /app/target/*.jar /high-load-course.jar diff --git a/README.md b/README.md index 56d36970f..bc19c4280 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ # Template for the HighLoad course + This project is based on [Tiny Event Sourcing library](https://github.com/andrsuh/tiny-event-sourcing) ### Run PostgreSql + This example uses Postgres as an implementation of the Event store. You can see it in `pom.xml`: ``` @@ -12,7 +14,8 @@ This example uses Postgres as an implementation of the Event store. You can see ``` -Thus, you have to run Postgres in order to test this example. Postgres service is included in `docker-compose` file that we have in the root of the project. +Thus, you have to run Postgres in order to test this example. Postgres service is included in `docker-compose` file +that we have in the root of the project. # More comprehensive information about the course, project, how to run tests is here: @@ -21,19 +24,25 @@ https://andrsuh.notion.site/2595d535059281d8a815c2cb3875c376?source=copy_link https://andrsuh.notion.site/2625d5350592801aaf88c7c95302d10c?source=copy_link ### Run the infrastructure + Set of the services you need to start developing and testing process is following: -- Bombardier - service that is in charge of emulation the store's clients activity (creates the incoming load). Also serves as a third-party payment system. + +- Bombardier - service that is in charge of emulation the store's clients activity (creates the incoming load). Also + serves as a third-party payment system. - Postgres DBMS - Prometheus + Grafana - metrics collection and visualization services You can run all beforementioned services by the following command: + ``` docker compose -f docker-compose.yml up ``` ### Run the application -To make the application run you can start the main class `OnlineShopApplication`. It is not being launched as a docker contained to simplify and speed up the devevopment process as it is easier for you to refactor the application and re-run it immediately in the IDE. +To make the application run you can start the main class `OnlineShopApplication`. It is not being launched as a docker +contained to simplify and speed up the devevopment process as it is easier for you to refactor the application and +re-run it immediately in the IDE. ### If you want to pull changes from the main repository into your fork diff --git a/grafana/provisioning/dashboards/ServicesStatistic.json b/grafana/provisioning/dashboards/ServicesStatistic.json index 684b97269..eb6f028d2 100644 --- a/grafana/provisioning/dashboards/ServicesStatistic.json +++ b/grafana/provisioning/dashboards/ServicesStatistic.json @@ -1345,7 +1345,6 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "exemplar": true, "expr": "rate(stage_duration_ok_sum{service=~\"$service\", result=\"success\"}[1m]) / rate(stage_duration_ok_count{service=~\"$service\", result=\"success\"}[1m])", "interval": "", "legendFormat": "{{stage}}", @@ -1785,7 +1784,7 @@ "exemplar": true, "expr": "rate(external_sys_duration_count{service=~\"$service\"}[30s])", "hide": false, - "interval": "", + "instant": false, "legendFormat": "{{accountName}} - {{outcome}}", "range": true, "refId": "C" @@ -1923,7 +1922,7 @@ "exemplar": true, "expr": "rate(external_sys_duration_sum{service=~\"$service\"}[1m]) / rate(external_sys_duration_count{service=~\"$service\"}[1m])", "hide": false, - "interval": "", + "instant": false, "legendFormat": "{{accountName}} - {{outcome}}", "range": true, "refId": "C" @@ -2024,37 +2023,13 @@ } } ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "payOrder - " - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] } ] }, "gridPos": { "h": 8, "w": 12, - "x": 12, + "x": 0, "y": 108 }, "id": 43, @@ -2065,9 +2040,7 @@ ], "displayMode": "table", "placement": "right", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "mode": "single", @@ -2082,7 +2055,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "rate(http_request_latent_sum{service=~\"$service\"}[15s]) / rate(http_request_latent_count{service=~\"$service\"}[15s])", + "expr": "sum(rate(http_request_latent_sum{service=~\"$service\"}[15s]) / rate(http_request_latent_count{service=~\"$service\"}[15s]))", "hide": false, "instant": false, "legendFormat": "{{method}} - {{result}}", @@ -2390,7 +2363,6 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "exemplar": true, "expr": "sum by (name) (executor_active_threads{})", "interval": "", "legendFormat": "{{name}}", @@ -2669,20 +2641,18 @@ "h": 8, "w": 12, "x": 0, - "y": 38 + "y": 46 }, - "id": 95, + "id": 41, "options": { "legend": { - "calcs": [ - "max" - ], + "calcs": [], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { - "mode": "multi", + "mode": "single", "sort": "none" } }, @@ -2694,34 +2664,34 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum by (job) (system_cpu_usage{service=~\"$service\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{job}} - System CPU Usage", + "expr": "sum by (job) (process_files_open_files{})", + "instant": false, + "legendFormat": "{{job}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum by (job) (process_cpu_usage{service=~\"$service\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{job}} -Process CPU Usage", - "range": true, - "refId": "B" } ], - "title": "CPU Usage", + "title": "Open files", "type": "timeseries" - }, + } + ], + "title": "JVM metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 97, + "panels": [ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "P1C574E4B1E20B3B3" }, "fieldConfig": { "defaults": { @@ -2737,8 +2707,8 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 17, - "gradientMode": "opacity", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, @@ -2768,23 +2738,29 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] - }, - "unit": "s" + } }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 38 + "x": 0, + "y": 7 }, - "id": 42, + "id": 98, + "maxDataPoints": 100, "options": { "legend": { - "calcs": [], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "right", "showLegend": true @@ -2799,23 +2775,25 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "P1C574E4B1E20B3B3" }, "editorMode": "code", - "expr": "sum(rate(jvm_gc_pause_seconds_sum{}[5m])) by (job)", - "instant": false, + "expr": "sum by (job) (jetty_threads_config_max{})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "GC delay", + "title": "Max Threads", "type": "timeseries" }, { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "P1C574E4B1E20B3B3" }, "fieldConfig": { "defaults": { @@ -2831,8 +2809,8 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 17, - "gradientMode": "opacity", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, @@ -2862,23 +2840,29 @@ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] - }, - "unit": "short" + } }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 46 + "x": 12, + "y": 7 }, - "id": 41, + "id": 99, + "maxDataPoints": 100, "options": { "legend": { - "calcs": [], + "calcs": [ + "lastNotNull" + ], "displayMode": "table", "placement": "right", "showLegend": true @@ -2893,37 +2877,25 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "P1C574E4B1E20B3B3" }, "editorMode": "code", - "expr": "sum by (job) (process_files_open_files{})", - "instant": false, + "expr": "sum by (job) (jetty_threads_config_min{service=~\"$service\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, "legendFormat": "{{job}}", "range": true, "refId": "A" } ], - "title": "Open files", + "title": "Thread Config Min", "type": "timeseries" - } - ], - "title": "JVM metrics", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 97, - "panels": [ + }, { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "P1C574E4B1E20B3B3" }, "fieldConfig": { "defaults": { @@ -2939,7 +2911,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2953,7 +2925,7 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", + "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", @@ -2963,6 +2935,7 @@ "mode": "off" } }, + "links": [], "mappings": [], "thresholds": { "mode": "absolute", @@ -2976,18 +2949,18 @@ "value": 80 } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 12, + "w": 24, "x": 0, - "y": 7 + "y": 15 }, - "id": 98, - "maxDataPoints": 100, + "id": 24, "options": { "legend": { "calcs": [ @@ -2998,7 +2971,7 @@ "showLegend": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -3010,16 +2983,52 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum by (job) (jetty_threads_config_max{})", + "expr": "sum by (job) (jetty_threads_busy{})", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{job}}", + "legendFormat": "{{job}} - busy ", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (job) (jetty_threads_idle{})", + "interval": "", + "legendFormat": "{{job}} - idle", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (job) (jetty_threads_current{})", + "interval": "", + "legendFormat": "{{job}} - current", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (job) (jetty_threads_jobs{})", + "interval": "", + "legendFormat": "{{job}} - jobs", + "range": true, + "refId": "D" } ], - "title": "Max Threads", + "title": "Jetty Threads", "type": "timeseries" }, { @@ -3041,7 +3050,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3055,16 +3064,17 @@ "scaleDistribution": { "type": "linear" }, - "showPoints": "auto", + "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, + "links": [], "mappings": [], "thresholds": { "mode": "absolute", @@ -3078,29 +3088,27 @@ "value": 80 } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 7 + "x": 0, + "y": 23 }, - "id": 99, - "maxDataPoints": 100, + "id": 4, "options": { "legend": { - "calcs": [ - "lastNotNull" - ], + "calcs": [], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -3112,16 +3120,15 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum by (job) (jetty_threads_config_min{service=~\"$service\"})", + "expr": "irate(http_server_requests_seconds_count{service=~\"$service\", uri!~\".*actuator.*\"}[5m])", "format": "time_series", - "interval": "", "intervalFactor": 1, - "legendFormat": "{{job}}", + "legendFormat": "{{method}} [{{status}}] - {{uri}}", "range": true, "refId": "A" } ], - "title": "Thread Config Min", + "title": "Request Count", "type": "timeseries" }, { @@ -3161,13 +3168,12 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", @@ -3188,15 +3194,15 @@ }, "gridPos": { "h": 8, - "w": 24, - "x": 0, - "y": 15 + "w": 12, + "x": 12, + "y": 23 }, - "id": 24, + "id": 96, "options": { "legend": { "calcs": [ - "lastNotNull" + "max" ], "displayMode": "table", "placement": "right", @@ -3215,52 +3221,125 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum by (job) (jetty_threads_busy{})", + "expr": "sum by (job, method, status, uri) (irate(http_server_requests_seconds_sum{service=~\"$service\", exception=\"None\", uri!~\".*actuator.*\"}[5m]) / irate(http_server_requests_seconds_count{service=~\"$service\", exception=\"None\", uri!~\".*actuator.*\"}[5m]))", "format": "time_series", - "interval": "", "intervalFactor": 1, - "legendFormat": "{{job}} - busy ", + "legendFormat": "{{method}} [{{status}}] - {{uri}} - {{job}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + } + ], + "title": "Response Time", + "type": "timeseries" + } + ], + "title": "Jetty Statistics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 101, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "История изменения количества задач в очереди", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "editorMode": "code", - "expr": "sum by (job) (jetty_threads_idle{})", - "interval": "", - "legendFormat": "{{job}} - idle", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Задачи в очереди", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - "editorMode": "code", - "expr": "sum by (job) (jetty_threads_current{})", - "interval": "", - "legendFormat": "{{job}} - current", - "range": true, - "refId": "C" + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 8, + "y": 8 + }, + "id": 103, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.0.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "expr": "sum by (job) (jetty_threads_jobs{})", - "interval": "", - "legendFormat": "{{job}} - jobs", - "range": true, - "refId": "D" + "expr": "payment_processing_planned_total - payment_processing_started_total", + "legendFormat": "{{accountName}}", + "refId": "A" } ], - "title": "Jetty Threads", + "title": "История задач в очереди", "type": "timeseries" }, { @@ -3268,6 +3347,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "Скорость входа в ожидание и выхода из ожидания (requests/sec)", "fieldConfig": { "defaults": { "color": { @@ -3277,10 +3357,9 @@ "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", - "axisLabel": "", + "axisLabel": "Запросов/сек", "axisPlacement": "auto", "barAlignment": 0, - "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", @@ -3290,8 +3369,8 @@ "viz": false }, "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, + "lineInterpolation": "smooth", + "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" @@ -3306,7 +3385,6 @@ "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", @@ -3314,29 +3392,60 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "none" + "unit": "reqps" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Скорость увеличения очереди" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Скорость числа задач в обработке" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 23 + "y": 16 }, - "id": 4, + "id": 104, "options": { "legend": { - "calcs": [], + "calcs": [ + "mean", + "lastNotNull", + "max" + ], "displayMode": "table", - "placement": "right", + "placement": "bottom", "showLegend": true }, "tooltip": { @@ -3344,23 +3453,37 @@ "sort": "none" } }, - "pluginVersion": "11.4.0", + "pluginVersion": "9.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "expr": "irate(http_server_requests_seconds_count{service=~\"$service\", uri!~\".*actuator.*\"}[5m])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{method}} [{{status}}] - {{uri}}", - "range": true, + "expr": "rate(payment_processing_planned_total[1m])", + "legendFormat": "Попало в очередь: {{accountName}}", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "rate(payment_processing_started_total[1m])", + "legendFormat": "Приняты в обработку (вышли из очереди): {{accountName}}", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "rate(payment_processing_completed_total[1m])", + "legendFormat": "Обработаны: {{accountName}}", + "refId": "C" } ], - "title": "Request Count", + "title": "Throughput (в очереди vs в обработке vs обработаны)", "type": "timeseries" }, { @@ -3368,6 +3491,7 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "Если значение > 0, задачи накапливаются быстрее, чем обрабатываются", "fieldConfig": { "defaults": { "color": { @@ -3375,23 +3499,22 @@ }, "custom": { "axisBorderShow": false, - "axisCenteredZero": false, + "axisCenteredZero": true, "axisColorMode": "text", - "axisLabel": "", + "axisLabel": "Δ запросов/сек", "axisPlacement": "auto", "barAlignment": 0, - "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", + "fillOpacity": 30, + "gradientMode": "scheme", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, + "lineInterpolation": "smooth", + "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" @@ -3403,10 +3526,9 @@ "mode": "none" }, "thresholdsStyle": { - "mode": "off" + "mode": "area" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", @@ -3417,11 +3539,11 @@ }, { "color": "red", - "value": 80 + "value": 0.1 } ] }, - "unit": "s" + "unit": "short" }, "overrides": [] }, @@ -3429,16 +3551,17 @@ "h": 8, "w": 12, "x": 12, - "y": 23 + "y": 16 }, - "id": 96, + "id": 105, "options": { "legend": { "calcs": [ - "max" + "mean", + "lastNotNull" ], "displayMode": "table", - "placement": "right", + "placement": "bottom", "showLegend": true }, "tooltip": { @@ -3446,30 +3569,165 @@ "sort": "none" } }, - "pluginVersion": "11.4.0", + "pluginVersion": "9.0.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "expr": "sum by (job, method, status, uri) (irate(http_server_requests_seconds_sum{service=~\"$service\", exception=\"None\", uri!~\".*actuator.*\"}[5m]) / irate(http_server_requests_seconds_count{service=~\"$service\", exception=\"None\", uri!~\".*actuator.*\"}[5m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{method}} [{{status}}] - {{uri}} - {{job}}", - "range": true, + "expr": "rate(payment_processing_planned_total[1m]) - rate(payment_processing_started_total[1m])", + "legendFormat": "{{accountName}}", "refId": "A" } ], - "title": "Response Time", + "title": "Скорость накопления задач (увеличения очереди)", "type": "timeseries" } ], - "title": "Jetty Statistics", + "title": "Payment Processing - Active Tasks", "type": "row" - } - ], + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 108, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.75, sum(rate(payment_external_system_request_latency_seconds_bucket[1m])) by (le, accountName))", + "legendFormat": "p75 {{accountName}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.80, sum(rate(payment_external_system_request_latency_seconds_bucket[1m])) by (le, accountName))", + "legendFormat": "p80 {{accountName}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.85, sum(rate(payment_external_system_request_latency_seconds_bucket[1m])) by (le, accountName))", + "legendFormat": "p85 {{accountName}}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(payment_external_system_request_latency_seconds_bucket[1m])) by (le, accountName))", + "hide": false, + "legendFormat": "p90 {{accountName}}", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(payment_external_system_request_latency_seconds_bucket[1m])) by (le, accountName))", + "hide": false, + "legendFormat": "p95 {{accountName}}", + "range": true, + "refId": "E" + } + ], + "title": "Payment Processing Latency", + "type": "timeseries" + }], "preload": false, "refresh": "5s", "schemaVersion": 40, @@ -3516,4 +3774,5 @@ "uid": "KVr-Vmpnz", "version": 4, "weekStart": "" -} \ No newline at end of file +} + diff --git a/pom.xml b/pom.xml index 5724b2568..e244cb6aa 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,5 @@ - 4.0.0 @@ -15,7 +15,7 @@ OnlineShop Application for resilience and highly-loaded applications course - + 2.2.0 1.9.0 4.12.0 @@ -25,7 +25,7 @@ 3.1.8 - + io.github.resilience4j resilience4j-ratelimiter @@ -67,15 +67,15 @@ ${jetty.version} - - - - + + + + - - - - + + + + com.fasterxml.jackson.module diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index 936c7edd9..ce045546f 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -6,16 +6,16 @@ scrape_configs: - job_name: 'bombardier-docker-network-job' metrics_path: '/actuator/prometheus' static_configs: - - targets: ['bombardier:1234'] + - targets: [ 'bombardier:1234' ] - job_name: 'bombardier-host-job' metrics_path: '/actuator/prometheus' static_configs: - - targets: ['host.docker.internal:1234'] + - targets: [ 'host.docker.internal:1234' ] - job_name: 'online-store-job' metrics_path: '/actuator/prometheus' static_configs: - - targets: ['host.docker.internal:8081'] + - targets: [ 'host.docker.internal:8081' ] - job_name: 'online-shop-job' metrics_path: '/actuator/prometheus' static_configs: - - targets: ['host.docker.internal:18081'] \ No newline at end of file + - targets: [ 'host.docker.internal:18081' ] \ No newline at end of file diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 6f23fa18d..fff5ac386 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -2,23 +2,27 @@ package ru.quipy.apigateway import org.slf4j.Logger import org.slf4j.LoggerFactory -import org.springframework.beans.factory.annotation.Autowired +import org.springframework.beans.factory.annotation.Qualifier import org.springframework.web.bind.annotation.* +import ru.quipy.common.utils.LeakingBucketRateLimiter +import ru.quipy.common.utils.RateLimiter +import ru.quipy.exceptions.TooManyRequestsException +import ru.quipy.exceptions.TooManyRequestsRetriableException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer +import java.time.Duration import java.util.* @RestController -class APIController { +class APIController( + private val orderRepository: OrderRepository, + private val orderPayer: OrderPayer, + @field:Qualifier("parallelLimiter") + private val rateLimiter: RateLimiter = LeakingBucketRateLimiter(8, Duration.ofSeconds(1), 38) +) { val logger: Logger = LoggerFactory.getLogger(APIController::class.java) - @Autowired - private lateinit var orderRepository: OrderRepository - - @Autowired - private lateinit var orderPayer: OrderPayer - @PostMapping("/users") fun createUser(@RequestBody req: CreateUserRequest): User { return User(UUID.randomUUID(), req.name) @@ -56,13 +60,15 @@ class APIController { @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { + if (!rateLimiter.tick()) { + throw TooManyRequestsException(deadline) + } val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) it } ?: throw IllegalArgumentException("No such order $orderId") - val createdAt = orderPayer.processPayment(orderId, order.price, paymentId, deadline) return PaymentSubmissionDto(createdAt, paymentId) } diff --git a/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt b/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt new file mode 100644 index 000000000..d6c824805 --- /dev/null +++ b/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt @@ -0,0 +1,53 @@ +package ru.quipy.apigateway + +import org.slf4j.LoggerFactory +import org.springframework.http.HttpStatus +import org.springframework.http.ResponseEntity +import org.springframework.web.bind.annotation.ExceptionHandler +import org.springframework.web.bind.annotation.ResponseStatus +import org.springframework.web.bind.annotation.RestControllerAdvice +import ru.quipy.exceptions.TooManyRequestsRetriableException +import ru.quipy.exceptions.TooLongRequestException +import ru.quipy.exceptions.TooManyRequestsException +import java.util.concurrent.atomic.AtomicInteger +import java.util.concurrent.atomic.AtomicLong + +@RestControllerAdvice +class GlobalExceptionHandler( +) { + companion object { + val logger = LoggerFactory.getLogger(GlobalExceptionHandler::class.java) + private var currentRetryAfterSeconds = 1 + private var exp_base = 2; + private const val MAX_RETRY_AFTER_MS = 256 + private const val RESET_WINDOW_MS = 30000L + } + + private val rejectedRequestsCount = AtomicInteger(0) + private val lastRejectionTime = AtomicLong(0) + @ExceptionHandler(TooLongRequestException::class) + fun handleTooManyRequests(exception: TooLongRequestException): ResponseEntity { + return ResponseEntity.status(200).body("your request very long, i am so sorry") + } + @ExceptionHandler(TooManyRequestsException::class) + fun handleTooManyRequestsRetriable(exception: TooManyRequestsException): ResponseEntity { + logger.warn("to many request") + val currentTime = System.currentTimeMillis() + val lastRejection = lastRejectionTime.get() + + if (currentTime - lastRejection > RESET_WINDOW_MS) { + rejectedRequestsCount.set(0) + currentRetryAfterSeconds = 1 + } + lastRejectionTime.set(currentTime) + + if (rejectedRequestsCount.get() < 4 && exception.deadline < System.currentTimeMillis() + 1200) { + return ResponseEntity + .status(HttpStatus.TOO_MANY_REQUESTS) + .header("Retry-After", MAX_RETRY_AFTER_MS.coerceAtLeast(currentRetryAfterSeconds).toString()) + .build() + } + + return ResponseEntity.status(200).build() + } +} \ No newline at end of file diff --git a/src/main/kotlin/ru/quipy/common/utils/SlidingWindowRateLimiter.kt b/src/main/kotlin/ru/quipy/common/utils/SlidingWindowRateLimiter.kt index 6ff3092ab..c5bd36a0a 100644 --- a/src/main/kotlin/ru/quipy/common/utils/SlidingWindowRateLimiter.kt +++ b/src/main/kotlin/ru/quipy/common/utils/SlidingWindowRateLimiter.kt @@ -1,5 +1,7 @@ package ru.quipy.common.utils +import com.github.dockerjava.zerodep.shaded.org.apache.hc.core5.util.Deadline +import com.github.dockerjava.zerodep.shaded.org.apache.hc.core5.util.Timeout import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.asCoroutineDispatcher import kotlinx.coroutines.delay @@ -10,8 +12,6 @@ import java.time.Duration import java.util.concurrent.Executors import java.util.concurrent.PriorityBlockingQueue import java.util.concurrent.atomic.AtomicLong -import java.util.concurrent.locks.ReentrantLock -import kotlin.concurrent.withLock class SlidingWindowRateLimiter( private val rate: Long, @@ -39,6 +39,15 @@ class SlidingWindowRateLimiter( } } + fun tickBlocking(timeout: Long): Boolean { + val timeStarted = System.currentTimeMillis() + while (System.currentTimeMillis()-timeStarted < timeout && !tick()) { + Thread.sleep(2) + } + return System.currentTimeMillis()-timeStarted < timeout + } + + data class Measure( val value: Long, val timestamp: Long @@ -64,6 +73,7 @@ class SlidingWindowRateLimiter( queue.take() } }.invokeOnCompletion { th -> if (th != null) logger.error("Rate limiter release job completed", th) } + companion object { private val logger: Logger = LoggerFactory.getLogger(SlidingWindowRateLimiter::class.java) } diff --git a/src/main/kotlin/ru/quipy/exceptions/TooLongRequestEcexption.kt b/src/main/kotlin/ru/quipy/exceptions/TooLongRequestEcexption.kt new file mode 100644 index 000000000..c50d9bc48 --- /dev/null +++ b/src/main/kotlin/ru/quipy/exceptions/TooLongRequestEcexption.kt @@ -0,0 +1,4 @@ +package ru.quipy.exceptions + +class TooLongRequestException() : RuntimeException() { +} \ No newline at end of file diff --git a/src/main/kotlin/ru/quipy/exceptions/TooManyRequestsException.kt b/src/main/kotlin/ru/quipy/exceptions/TooManyRequestsException.kt new file mode 100644 index 000000000..cacd67810 --- /dev/null +++ b/src/main/kotlin/ru/quipy/exceptions/TooManyRequestsException.kt @@ -0,0 +1,3 @@ +package ru.quipy.exceptions + +class TooManyRequestsException(val deadline: Long) : RuntimeException() \ No newline at end of file diff --git a/src/main/kotlin/ru/quipy/exceptions/TooManyRequestsRetriableException.kt b/src/main/kotlin/ru/quipy/exceptions/TooManyRequestsRetriableException.kt new file mode 100644 index 000000000..4c95b4b30 --- /dev/null +++ b/src/main/kotlin/ru/quipy/exceptions/TooManyRequestsRetriableException.kt @@ -0,0 +1,4 @@ +package ru.quipy.exceptions + +class TooManyRequestsRetriableException(val deadline: Long) : RuntimeException(){ +} \ No newline at end of file diff --git a/src/main/kotlin/ru/quipy/orders/subscribers/PaymentSubscriber.kt b/src/main/kotlin/ru/quipy/orders/subscribers/PaymentSubscriber.kt index 767f23b3e..a4c312eaf 100644 --- a/src/main/kotlin/ru/quipy/orders/subscribers/PaymentSubscriber.kt +++ b/src/main/kotlin/ru/quipy/orders/subscribers/PaymentSubscriber.kt @@ -1,5 +1,6 @@ package ru.quipy.orders.subscribers +import io.micrometer.core.instrument.Metrics import jakarta.annotation.PostConstruct import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -19,6 +20,7 @@ class PaymentSubscriber { val logger: Logger = LoggerFactory.getLogger(PaymentSubscriber::class.java) + val paymentSucceededCounter = Metrics.counter("succeeded.payments", "account", "acc-7") @Autowired lateinit var subscriptionsManager: AggregateSubscriptionsManager @@ -42,6 +44,7 @@ class PaymentSubscriber { ).toSeconds() }, spent in queue: ${event.spentInQueueDuration.toSeconds()}" ) + paymentSucceededCounter.increment() } } } diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index eceb90cff..e10bd805a 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -3,17 +3,22 @@ package ru.quipy.payments.config import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.fasterxml.jackson.module.kotlin.registerKotlinModule +import io.micrometer.core.instrument.MeterRegistry import org.springframework.beans.factory.annotation.Value import org.springframework.context.annotation.Bean import org.springframework.context.annotation.Configuration import ru.quipy.core.EventSourcingService import ru.quipy.payments.api.PaymentAggregate -import ru.quipy.payments.logic.* +import ru.quipy.payments.logic.PaymentAccountProperties +import ru.quipy.payments.logic.PaymentAggregateState +import ru.quipy.payments.logic.PaymentExternalSystemAdapter +import ru.quipy.payments.logic.PaymentExternalSystemAdapterImpl import java.net.URI import java.net.http.HttpClient import java.net.http.HttpRequest import java.net.http.HttpResponse import java.util.* +import java.util.concurrent.Semaphore @Configuration @@ -36,7 +41,10 @@ class PaymentAccountsConfig { lateinit var allowedAccounts: List @Bean - fun accountAdapters(paymentService: EventSourcingService): List { + fun accountAdapters( + paymentService: EventSourcingService, + meterRegistry: MeterRegistry, + ): List { val request = HttpRequest.newBuilder() .uri(URI("http://${paymentProviderHostPort}/external/accounts?serviceName=$serviceName&token=$token")) .GET() @@ -57,7 +65,9 @@ class PaymentAccountsConfig { it, paymentService, paymentProviderHostPort, - token + token, + meterRegistry, + Semaphore(it.parallelRequests) ) } } diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index a5909b85b..d31fcfff5 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -27,11 +27,11 @@ class OrderPayer { private lateinit var paymentService: PaymentService private val paymentExecutor = ThreadPoolExecutor( - 16, - 16, - 0L, + 30, + 50, + 100L, TimeUnit.MILLISECONDS, - LinkedBlockingQueue(8_000), + LinkedBlockingQueue(40), NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 5cb12106a..b8f38757c 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -2,15 +2,23 @@ package ru.quipy.payments.logic import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.registerKotlinModule +import io.micrometer.core.instrument.MeterRegistry +import java.util.concurrent.Semaphore import okhttp3.OkHttpClient import okhttp3.Request import okhttp3.RequestBody +import okio.IOException import org.slf4j.LoggerFactory +import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService +import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate +import java.io.InterruptedIOException import java.net.SocketTimeoutException import java.time.Duration import java.util.* +import java.util.concurrent.TimeUnit +import kotlin.math.pow // Advice: always treat time as a Duration @@ -19,6 +27,8 @@ class PaymentExternalSystemAdapterImpl( private val paymentESService: EventSourcingService, private val paymentProviderHostPort: String, private val token: String, + private val meterRegistry: MeterRegistry, + private val parallelLimiter: Semaphore ) : PaymentExternalSystemAdapter { companion object { @@ -28,13 +38,21 @@ class PaymentExternalSystemAdapterImpl( val mapper = ObjectMapper().registerKotlinModule() } + private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests + private val rateLimit: SlidingWindowRateLimiter by lazy { + SlidingWindowRateLimiter( + rate = rateLimitPerSec.toLong(), + window = Duration.ofMillis(1000), + ) + } - private val client = OkHttpClient.Builder().build() + private val client = OkHttpClient.Builder() + .callTimeout(1000, TimeUnit.MILLISECONDS).build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { logger.warn("[$accountName] Submitting payment request for payment $paymentId") @@ -49,26 +67,71 @@ class PaymentExternalSystemAdapterImpl( logger.info("[$accountName] Submit: $paymentId , txId: $transactionId") + if (!rateLimit.tickBlocking(timeToDead(deadline))) { + throw TooManyRequestsException(deadline) + } + parallelLimiter.acquire() + val timeBeforeCall = now() try { val request = Request.Builder().run { url("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount") post(emptyBody) }.build() - client.newCall(request).execute().use { response -> - val body = try { - mapper.readValue(response.body?.string(), ExternalSysResponse::class.java) - } catch (e: Exception) { - logger.error("[$accountName] [ERROR] Payment processed for txId: $transactionId, payment: $paymentId, result code: ${response.code}, reason: ${response.body?.string()}") - ExternalSysResponse(transactionId.toString(), paymentId.toString(),false, e.message) + var isCompletedRequest = false + var retryCount = 0 + + if (timeToDead(deadline) < 0) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "deadline was expired") } + return + } + while (!isCompletedRequest && now() < deadline) { + if (timeToDead(deadline) < 0) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "deadline was expired") + } + return + } + client.newCall(request).execute().use { response -> + val body = try { + mapper.readValue(response.body?.string(), ExternalSysResponse::class.java) + } catch (e: Exception) { + logger.error("[$accountName] [ERROR] Payment processed for txId: $transactionId, payment: $paymentId, result code: ${response.code}, reason: ${response.body?.string()}") + ExternalSysResponse( + transactionId.toString(), + paymentId.toString(), + false, + e.message + ) + } + isCompletedRequest = if (!body.result && !(response.code >= 500 || response.code == 429)) { + if (retryCount < 3) { + retryCount++ + val backoffTime = (2.0.pow(retryCount.toDouble()) * 10 + Random().nextLong(0, 10)).toLong() + Thread.sleep(backoffTime) + continue + } else { + true + } + } else { + true + } - logger.warn("[$accountName] Payment processed for txId: $transactionId, payment: $paymentId, succeeded: ${body.result}, message: ${body.message}") + logger.warn("[$accountName] Payment processed for txId: $transactionId, payment: $paymentId, succeeded: ${body.result}, message: ${body.message}") - // Здесь мы обновляем состояние оплаты в зависимости от результата в базе данных оплат. - // Это требуется сделать ВО ВСЕХ ИСХОДАХ (успешная оплата / неуспешная / ошибочная ситуация) + // Здесь мы обновляем состояние оплаты в зависимости от результата в базе данных оплат. + // Это требуется сделать ВО ВСЕХ ИСХОДАХ (успешная оплата / неуспешная / ошибочная ситуация) + paymentESService.update(paymentId) { + it.logProcessing(body.result, now(), transactionId, reason = body.message) + } + } + } + + if (timeToDead(deadline) < 0) { paymentESService.update(paymentId) { - it.logProcessing(body.result, now(), transactionId, reason = body.message) + it.logProcessing(false, now(), transactionId, "deadline was expired") } } } catch (e: Exception) { @@ -79,7 +142,18 @@ class PaymentExternalSystemAdapterImpl( it.logProcessing(false, now(), transactionId, reason = "Request timeout.") } } - + is InterruptedIOException -> { + logger.error("[$accountName] Server timeout for txId: $transactionId, payment: $paymentId", e) + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, reason = "Server timeout") + } + } + is IOException -> { + logger.error("[$accountName] Server timeout for txId: $transactionId, payment: $paymentId", e) + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, reason = "Server timeout") + } + } else -> { logger.error("[$accountName] Payment failed for txId: $transactionId, payment: $paymentId", e) @@ -88,6 +162,9 @@ class PaymentExternalSystemAdapterImpl( } } } + } finally { + timer.record(now()-timeBeforeCall, TimeUnit.MILLISECONDS) + parallelLimiter.release() } } @@ -96,7 +173,8 @@ class PaymentExternalSystemAdapterImpl( override fun isEnabled() = properties.enabled override fun name() = properties.accountName - + fun timeToDead(deadline: Long): Long { + return deadline - now() - requestAverageProcessingTime.toMillis() - 200 + } } - public fun now() = System.currentTimeMillis() \ No newline at end of file diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 33d51a58b..3170b6dad 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -2,29 +2,30 @@ server.address=0.0.0.0 server.port=8081 server.http2.enabled=true spring.main.allow-bean-definition-overriding=true - # MongoDB properties spring.data.mongodb.host=localhost spring.data.mongodb.port=27017 spring.data.mongodb.database=online-shop - # Tiny event sourcing library properties event.sourcing.auto-scan-enabled=true event.sourcing.scan-package=ru.quipy event.sourcing.snapshots-enabled=false event.sourcing.sagas-enabled=false - # Postgres event store properties spring.datasource.hikari.jdbc-url=jdbc:postgresql://${POSTGRES_ADDRESS:localhost}:${POSTGRES_PORT:65432}/postgres spring.datasource.hikari.username=tiny_es spring.datasource.hikari.password=tiny_es -spring.datasource.hikari.leak-detection-threshold=2000 +spring.datasource.hikari.leak-detection-threshold=2000 management.metrics.web.server.request.autotime.percentiles=0.95 management.metrics.export.prometheus.enabled=true -management.endpoints.web.exposure.include=info,health,prometheus,metrics - +management.endpoints.web.exposure.include=prometheus,health,info +management.metrics.distribution.percentiles-histogram.http.server.requests=true +management.metrics.distribution.percentiles-histogram.payment.external.system.request.latency=true payment.service-name=${PAYMENT_SERVICE_NAME} payment.token=${PAYMENT_TOKEN} -payment.accounts=${PAYMENT_ACCOUNTS:acc-12,acc-20} -payment.hostPort=${PAYMENT_HOST:localhost}:${PAYMENT_PORT:1234} \ No newline at end of file +# payment.accounts=${PAYMENT_ACCOUNTS:acc-12,acc-20} +# payment.accounts=${PAYMENT_ACCOUNTS:acc-3} +payment.accounts=${PAYMENT_ACCOUNTS:acc-7} +# payment.accounts=${PAYMENT_ACCOUNTS:acc-18} +payment.hostPort=${PAYMENT_HOST:localhost}:${PAYMENT_PORT:1234} diff --git a/test-local-run.http b/test-local-run.http index 7be0e4f73..dc8dbeedf 100644 --- a/test-local-run.http +++ b/test-local-run.http @@ -12,4 +12,4 @@ Content-Type: application/json ### Stop running test to save time and resources # @timeout 120 -POST http://localhost:1234/test/stop/{{serviceName}} \ No newline at end of file +POST http://localhost:4321/test/stop/"{{serviceName}}" \ No newline at end of file diff --git a/test-on-prem-run.http b/test-on-prem-run.http index 584edc0b5..dd5765987 100644 --- a/test-on-prem-run.http +++ b/test-on-prem-run.http @@ -7,13 +7,13 @@ Content-Type: application/json "serviceName": "{{serviceName}}", "token": "{{token}}", "branch": "main", - "accounts": "acc-3", - "ratePerSecond": 11, - "testCount": 1200, + "accounts": "acc-12,acc-20", + "ratePerSecond": 2, + "testCount": 10, "processingTimeMillis": 80000, "onPremises": true } ### Stop running test to save credits # @timeout 120 -POST http://77.234.215.138:31234/test/stop/{{serviceName}} \ No newline at end of file +POST http://77.234.215.138:31234/test/stop/"{{serviceName}}" \ No newline at end of file From cb3b5c72808a30500c0c21fdabc1edf0dcaf7d9d Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Thu, 13 Nov 2025 21:13:11 +0300 Subject: [PATCH 02/41] fix: try to fix client timeout --- .../ru/quipy/apigateway/APIController.kt | 3 +-- .../apigateway/GlobalExceptionHandler.kt | 22 +----------------- .../ru/quipy/payments/logic/OrderPayer.kt | 21 +++++++++++++---- .../logic/PaymentExternalServiceImpl.kt | 23 ++++++++----------- 4 files changed, 28 insertions(+), 41 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index fff5ac386..c07d96b7a 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -7,7 +7,6 @@ import org.springframework.web.bind.annotation.* import ru.quipy.common.utils.LeakingBucketRateLimiter import ru.quipy.common.utils.RateLimiter import ru.quipy.exceptions.TooManyRequestsException -import ru.quipy.exceptions.TooManyRequestsRetriableException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer import java.time.Duration @@ -18,7 +17,7 @@ class APIController( private val orderRepository: OrderRepository, private val orderPayer: OrderPayer, @field:Qualifier("parallelLimiter") - private val rateLimiter: RateLimiter = LeakingBucketRateLimiter(8, Duration.ofSeconds(1), 38) + private val rateLimiter: RateLimiter = LeakingBucketRateLimiter(8, Duration.ofSeconds(1), 30) ) { val logger: Logger = LoggerFactory.getLogger(APIController::class.java) diff --git a/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt b/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt index d6c824805..5679ea372 100644 --- a/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt +++ b/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt @@ -4,9 +4,7 @@ import org.slf4j.LoggerFactory import org.springframework.http.HttpStatus import org.springframework.http.ResponseEntity import org.springframework.web.bind.annotation.ExceptionHandler -import org.springframework.web.bind.annotation.ResponseStatus import org.springframework.web.bind.annotation.RestControllerAdvice -import ru.quipy.exceptions.TooManyRequestsRetriableException import ru.quipy.exceptions.TooLongRequestException import ru.quipy.exceptions.TooManyRequestsException import java.util.concurrent.atomic.AtomicInteger @@ -19,7 +17,6 @@ class GlobalExceptionHandler( val logger = LoggerFactory.getLogger(GlobalExceptionHandler::class.java) private var currentRetryAfterSeconds = 1 private var exp_base = 2; - private const val MAX_RETRY_AFTER_MS = 256 private const val RESET_WINDOW_MS = 30000L } @@ -31,23 +28,6 @@ class GlobalExceptionHandler( } @ExceptionHandler(TooManyRequestsException::class) fun handleTooManyRequestsRetriable(exception: TooManyRequestsException): ResponseEntity { - logger.warn("to many request") - val currentTime = System.currentTimeMillis() - val lastRejection = lastRejectionTime.get() - - if (currentTime - lastRejection > RESET_WINDOW_MS) { - rejectedRequestsCount.set(0) - currentRetryAfterSeconds = 1 - } - lastRejectionTime.set(currentTime) - - if (rejectedRequestsCount.get() < 4 && exception.deadline < System.currentTimeMillis() + 1200) { - return ResponseEntity - .status(HttpStatus.TOO_MANY_REQUESTS) - .header("Retry-After", MAX_RETRY_AFTER_MS.coerceAtLeast(currentRetryAfterSeconds).toString()) - .build() - } - - return ResponseEntity.status(200).build() + return ResponseEntity.status(HttpStatus.TOO_MANY_REQUESTS).header("Retry-After", "1").body("to many requests") } } \ No newline at end of file diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index d31fcfff5..359aa0a59 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -6,8 +6,12 @@ import org.springframework.beans.factory.annotation.Autowired import org.springframework.stereotype.Service import ru.quipy.common.utils.CallerBlockingRejectedExecutionHandler import ru.quipy.common.utils.NamedThreadFactory +import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService +import ru.quipy.exceptions.RateLimitWasBreached +import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate +import java.time.Duration import java.util.* import java.util.concurrent.LinkedBlockingQueue import java.util.concurrent.ThreadPoolExecutor @@ -19,6 +23,12 @@ class OrderPayer { companion object { val logger: Logger = LoggerFactory.getLogger(OrderPayer::class.java) } + private val rateLimit: SlidingWindowRateLimiter by lazy { + SlidingWindowRateLimiter( + rate = 8, + window = Duration.ofMillis(1000), + ) + } @Autowired private lateinit var paymentESService: EventSourcingService @@ -27,17 +37,20 @@ class OrderPayer { private lateinit var paymentService: PaymentService private val paymentExecutor = ThreadPoolExecutor( - 30, - 50, - 100L, + 16, + 16, + 0, TimeUnit.MILLISECONDS, - LinkedBlockingQueue(40), + LinkedBlockingQueue(8_000), NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) fun processPayment(orderId: UUID, amount: Int, paymentId: UUID, deadline: Long): Long { val createdAt = System.currentTimeMillis() + if (!rateLimit.tickBlocking(deadline- System.currentTimeMillis())) { + throw TooManyRequestsException(deadline) + } paymentExecutor.submit { val createdEvent = paymentESService.create { it.create( diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index b8f38757c..fd368f079 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -3,7 +3,6 @@ package ru.quipy.payments.logic import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.registerKotlinModule import io.micrometer.core.instrument.MeterRegistry -import java.util.concurrent.Semaphore import okhttp3.OkHttpClient import okhttp3.Request import okhttp3.RequestBody @@ -11,12 +10,12 @@ import okio.IOException import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService -import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate import java.io.InterruptedIOException import java.net.SocketTimeoutException import java.time.Duration import java.util.* +import java.util.concurrent.Semaphore import java.util.concurrent.TimeUnit import kotlin.math.pow @@ -27,7 +26,7 @@ class PaymentExternalSystemAdapterImpl( private val paymentESService: EventSourcingService, private val paymentProviderHostPort: String, private val token: String, - private val meterRegistry: MeterRegistry, + meterRegistry: MeterRegistry, private val parallelLimiter: Semaphore ) : PaymentExternalSystemAdapter { @@ -44,15 +43,9 @@ class PaymentExternalSystemAdapterImpl( private val requestAverageProcessingTime = properties.averageProcessingTime private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests - private val rateLimit: SlidingWindowRateLimiter by lazy { - SlidingWindowRateLimiter( - rate = rateLimitPerSec.toLong(), - window = Duration.ofMillis(1000), - ) - } private val client = OkHttpClient.Builder() - .callTimeout(1000, TimeUnit.MILLISECONDS).build() + .callTimeout(1100, TimeUnit.MILLISECONDS).build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { logger.warn("[$accountName] Submitting payment request for payment $paymentId") @@ -67,10 +60,12 @@ class PaymentExternalSystemAdapterImpl( logger.info("[$accountName] Submit: $paymentId , txId: $transactionId") - if (!rateLimit.tickBlocking(timeToDead(deadline))) { - throw TooManyRequestsException(deadline) + if (!parallelLimiter.tryAcquire(timeToDead(deadline), TimeUnit.MILLISECONDS)) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "deadline was expired") + } + return } - parallelLimiter.acquire() val timeBeforeCall = now() try { val request = Request.Builder().run { @@ -174,7 +169,7 @@ class PaymentExternalSystemAdapterImpl( override fun name() = properties.accountName fun timeToDead(deadline: Long): Long { - return deadline - now() - requestAverageProcessingTime.toMillis() - 200 + return deadline - now() } } public fun now() = System.currentTimeMillis() \ No newline at end of file From 76511a37560356fb4f32539cbaae2916bd84f8bc Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Thu, 13 Nov 2025 21:17:22 +0300 Subject: [PATCH 03/41] fix: try to fix client timeout --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3d9587cd0..c8b23fe7f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM maven:3.9.9-eclipse-temurin-17 AS build +FROM eclipse-temurin:17-alpine-3.22 AS build WORKDIR /app COPY pom.xml . @@ -6,8 +6,6 @@ RUN mvn dependency:go-offline COPY src src RUN mvn package -FROM eclipse-temurin:17-alpine-3.22 - COPY --from=build /app/target/*.jar /high-load-course.jar CMD ["java", "-jar", "/high-load-course.jar"] From f25a3a5a312df765418d0ba430b3551bd645ef41 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Thu, 13 Nov 2025 21:19:44 +0300 Subject: [PATCH 04/41] fix: try to fix client timeout --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c8b23fe7f..3d9587cd0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM eclipse-temurin:17-alpine-3.22 AS build +FROM maven:3.9.9-eclipse-temurin-17 AS build WORKDIR /app COPY pom.xml . @@ -6,6 +6,8 @@ RUN mvn dependency:go-offline COPY src src RUN mvn package +FROM eclipse-temurin:17-alpine-3.22 + COPY --from=build /app/target/*.jar /high-load-course.jar CMD ["java", "-jar", "/high-load-course.jar"] From 6a90fe47630dc018f433565b90fb77342a66a446 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Thu, 13 Nov 2025 21:22:45 +0300 Subject: [PATCH 05/41] fix: try to fix client timeout --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3d9587cd0..de31b9fe7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,4 +11,3 @@ FROM eclipse-temurin:17-alpine-3.22 COPY --from=build /app/target/*.jar /high-load-course.jar CMD ["java", "-jar", "/high-load-course.jar"] - From 524e42babcba02b3f6a6733a7c74b7a9a3023fca Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Thu, 13 Nov 2025 21:23:39 +0300 Subject: [PATCH 06/41] fix: try to fix client timeout --- src/main/kotlin/ru/quipy/exceptions/RateLimitWasBreached.kt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/main/kotlin/ru/quipy/exceptions/RateLimitWasBreached.kt diff --git a/src/main/kotlin/ru/quipy/exceptions/RateLimitWasBreached.kt b/src/main/kotlin/ru/quipy/exceptions/RateLimitWasBreached.kt new file mode 100644 index 000000000..8469ea9a3 --- /dev/null +++ b/src/main/kotlin/ru/quipy/exceptions/RateLimitWasBreached.kt @@ -0,0 +1,4 @@ +package ru.quipy.exceptions + +class RateLimitWasBreached : RuntimeException() { +} \ No newline at end of file From b184e9c0c3c1d6a47d93a9c80fbf3f63c58d2fdd Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 14 Nov 2025 15:30:13 +0300 Subject: [PATCH 07/41] fix: try to fix client timeout --- .../logic/PaymentExternalServiceImpl.kt | 163 +++++++++--------- 1 file changed, 83 insertions(+), 80 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index fd368f079..f0e84098c 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -8,7 +8,6 @@ import okhttp3.Request import okhttp3.RequestBody import okio.IOException import org.slf4j.LoggerFactory -import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService import ru.quipy.payments.api.PaymentAggregate import java.io.InterruptedIOException @@ -60,106 +59,110 @@ class PaymentExternalSystemAdapterImpl( logger.info("[$accountName] Submit: $paymentId , txId: $transactionId") - if (!parallelLimiter.tryAcquire(timeToDead(deadline), TimeUnit.MILLISECONDS)) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "deadline was expired") + var retryCount = 0 + while (true) { + val remaining = deadline - now() + if (remaining <= 0) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "deadline expired") + } + return } - return - } - val timeBeforeCall = now() - try { - val request = Request.Builder().run { - url("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount") - post(emptyBody) - }.build() - - var isCompletedRequest = false - var retryCount = 0 - if (timeToDead(deadline) < 0) { + if (!parallelLimiter.tryAcquire(remaining, TimeUnit.MILLISECONDS)) { paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "deadline was expired") + it.logProcessing(false, now(), transactionId, "parallel limiter timeout") } return } - while (!isCompletedRequest && now() < deadline) { - if (timeToDead(deadline) < 0) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "deadline was expired") - } - return - } - client.newCall(request).execute().use { response -> - val body = try { - mapper.readValue(response.body?.string(), ExternalSysResponse::class.java) - } catch (e: Exception) { - logger.error("[$accountName] [ERROR] Payment processed for txId: $transactionId, payment: $paymentId, result code: ${response.code}, reason: ${response.body?.string()}") - ExternalSysResponse( - transactionId.toString(), - paymentId.toString(), - false, - e.message - ) - } - isCompletedRequest = if (!body.result && !(response.code >= 500 || response.code == 429)) { - if (retryCount < 3) { - retryCount++ - val backoffTime = (2.0.pow(retryCount.toDouble()) * 10 + Random().nextLong(0, 10)).toLong() - Thread.sleep(backoffTime) - continue - } else { - true - } - } else { - true + + val timeBeforeCall = now() + var shouldRetry = false + try { + val perCallTimeoutMs = remaining.coerceAtMost(1100) + val request = Request.Builder() + .url( + "http://$paymentProviderHostPort/external/process" + + "?serviceName=$serviceName&token=$token&accountName=$accountName" + + "&transactionId=$transactionId&paymentId=$paymentId&amount=$amount" + ) + .post(emptyBody) + .build() + + val call = client.newCall(request) + call.timeout().timeout(perCallTimeoutMs, TimeUnit.MILLISECONDS) + + call.execute().use { response -> + val rawBody = response.body?.string() + val parsed = try { + mapper.readValue(rawBody, ExternalSysResponse::class.java) + } catch (ex: Exception) { + ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) } - logger.warn("[$accountName] Payment processed for txId: $transactionId, payment: $paymentId, succeeded: ${body.result}, message: ${body.message}") + shouldRetry = !parsed.result && (response.code == 429 || response.code >= 500) - // Здесь мы обновляем состояние оплаты в зависимости от результата в базе данных оплат. - // Это требуется сделать ВО ВСЕХ ИСХОДАХ (успешная оплата / неуспешная / ошибочная ситуация) paymentESService.update(paymentId) { - it.logProcessing(body.result, now(), transactionId, reason = body.message) + it.logProcessing(parsed.result, now(), transactionId, parsed.message) + } + + if (parsed.result) { + return } } - } + } catch (e: SocketTimeoutException) { + logger.warn("[$accountName] attempt ${retryCount + 1} timeout: $paymentId", e) + shouldRetry = true + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "socket timeout") + } + } catch (e: InterruptedIOException) { + logger.warn("[$accountName] interrupted: $paymentId", e) + shouldRetry = true - if (timeToDead(deadline) < 0) { + // Здесь мы обновляем состояние оплаты в зависимости от результата в базе данных оплат. + // Это требуется сделать ВО ВСЕХ ИСХОДАХ (успешная оплата / неуспешная / ошибочная ситуация) paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "deadline was expired") + it.logProcessing(false, now(), transactionId, "interrupted IO") } - } - } catch (e: Exception) { - when (e) { - is SocketTimeoutException -> { - logger.error("[$accountName] Payment timeout for txId: $transactionId, payment: $paymentId", e) - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, reason = "Request timeout.") - } + } catch (e: IOException) { + logger.warn("[$accountName] io error: $paymentId", e) + shouldRetry = true + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "io exception") } - is InterruptedIOException -> { - logger.error("[$accountName] Server timeout for txId: $transactionId, payment: $paymentId", e) - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, reason = "Server timeout") - } + } catch (e: Exception) { + logger.error("[$accountName] non-retriable error: $paymentId", e) + shouldRetry = false + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, e.message) } - is IOException -> { - logger.error("[$accountName] Server timeout for txId: $transactionId, payment: $paymentId", e) - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, reason = "Server timeout") - } + } finally { + timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) + parallelLimiter.release() + } + + if (!shouldRetry) { + return + } + + retryCount++ + if (retryCount >= 3) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Max attempts reached") } - else -> { - logger.error("[$accountName] Payment failed for txId: $transactionId, payment: $paymentId", e) + return + } - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, reason = e.message) - } + val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + kotlin.random.Random.nextLong(10)) + val capped = backoff.coerceAtMost(deadline - now() - 5) + if (capped <= 0) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Deadline expired") } + return } - } finally { - timer.record(now()-timeBeforeCall, TimeUnit.MILLISECONDS) - parallelLimiter.release() + Thread.sleep(capped) } } From d314b72190c911f54a1794a2ba7e04ae4fe83078 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 19 Dec 2025 19:58:51 +0300 Subject: [PATCH 08/41] feature: added retry counter and dashboard for this metric --- .../dashboards/ServicesStatistic.json | 98 +++++++++++++++++++ .../logic/PaymentExternalServiceImpl.kt | 6 +- 2 files changed, 102 insertions(+), 2 deletions(-) diff --git a/grafana/provisioning/dashboards/ServicesStatistic.json b/grafana/provisioning/dashboards/ServicesStatistic.json index eb6f028d2..1e46c11bc 100644 --- a/grafana/provisioning/dashboards/ServicesStatistic.json +++ b/grafana/provisioning/dashboards/ServicesStatistic.json @@ -38,6 +38,104 @@ }, "id": 27, "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "bars", + "fillOpacity": 23, + "gradientMode": "hue", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 109, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(increase(payment_external_retry_count_total[30s])) by (accountName)", + "legendFormat": "{{accountName}}", + "range": true, + "refId": "A" + } + ], + "title": "Payment Retry Count (30s)", + "type": "timeseries" + }, { "datasource": { "type": "prometheus", diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index f0e84098c..3ef90a416 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -37,6 +37,7 @@ class PaymentExternalSystemAdapterImpl( } private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) + private val retryCounter = meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime @@ -153,7 +154,6 @@ class PaymentExternalSystemAdapterImpl( } return } - val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + kotlin.random.Random.nextLong(10)) val capped = backoff.coerceAtMost(deadline - now() - 5) if (capped <= 0) { @@ -162,6 +162,8 @@ class PaymentExternalSystemAdapterImpl( } return } + + retryCounter.increment() Thread.sleep(capped) } } @@ -175,4 +177,4 @@ class PaymentExternalSystemAdapterImpl( return deadline - now() } } -public fun now() = System.currentTimeMillis() \ No newline at end of file +fun now() = System.currentTimeMillis() \ No newline at end of file From 4f8e2ff56da17c24890210b009a936909167fe1b Mon Sep 17 00:00:00 2001 From: 21092004Goda Date: Sat, 20 Dec 2025 00:10:08 +0300 Subject: [PATCH 09/41] =?UTF-8?q?=D0=91=D0=B0=D0=BD=20=D0=B8=20=D0=B2?= =?UTF-8?q?=D1=81=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Conflicts: # src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt --- .../ru/quipy/apigateway/APIController.kt | 3 +- .../payments/config/PaymentAccountsConfig.kt | 89 ++++++++++++++++--- .../ru/quipy/payments/logic/OrderPayer.kt | 25 ++---- src/main/resources/application.properties | 7 +- test-local-run.http | 36 ++++++-- 5 files changed, 124 insertions(+), 36 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index c07d96b7a..8f81acc8d 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -16,8 +16,7 @@ import java.util.* class APIController( private val orderRepository: OrderRepository, private val orderPayer: OrderPayer, - @field:Qualifier("parallelLimiter") - private val rateLimiter: RateLimiter = LeakingBucketRateLimiter(8, Duration.ofSeconds(1), 30) + private val rateLimiter: LeakingBucketRateLimiter ) { val logger: Logger = LoggerFactory.getLogger(APIController::class.java) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index e10bd805a..2e2014c5c 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -7,6 +7,10 @@ import io.micrometer.core.instrument.MeterRegistry import org.springframework.beans.factory.annotation.Value import org.springframework.context.annotation.Bean import org.springframework.context.annotation.Configuration +import ru.quipy.common.utils.CallerBlockingRejectedExecutionHandler +import ru.quipy.common.utils.LeakingBucketRateLimiter +import ru.quipy.common.utils.NamedThreadFactory +import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService import ru.quipy.payments.api.PaymentAggregate import ru.quipy.payments.logic.PaymentAccountProperties @@ -17,8 +21,12 @@ import java.net.URI import java.net.http.HttpClient import java.net.http.HttpRequest import java.net.http.HttpResponse +import java.time.Duration import java.util.* +import java.util.concurrent.LinkedBlockingQueue import java.util.concurrent.Semaphore +import java.util.concurrent.ThreadPoolExecutor +import java.util.concurrent.TimeUnit @Configuration @@ -28,6 +36,8 @@ class PaymentAccountsConfig { private val mapper = ObjectMapper().registerKotlinModule().registerModules(JavaTimeModule()) } + var rateCheckWindow: Duration = Duration.ofMillis(1000) + @Value("\${payment.hostPort}") lateinit var paymentProviderHostPort: String @@ -41,23 +51,82 @@ class PaymentAccountsConfig { lateinit var allowedAccounts: List @Bean - fun accountAdapters( - paymentService: EventSourcingService, - meterRegistry: MeterRegistry, - ): List { + fun warehouseIfUnfinishedWork( + accountProperties: List, + @Value("\${payment.maximumPoolSize}") + maximumPoolSize: Int + ): ThreadPoolExecutor { + val poolSize = 100 + val temp = ThreadPoolExecutor( + poolSize, + poolSize, + 0, + TimeUnit.MILLISECONDS, + LinkedBlockingQueue(maximumPoolSize), + NamedThreadFactory("payment-submission-executor"), + CallerBlockingRejectedExecutionHandler() + ) + return temp + } + + @Bean + fun parallelLimiter( + accountProperties: List + ): Semaphore = + Semaphore(accountProperties.minOf { it.parallelRequests }) + + @Bean + fun burstRateLimiter( + accountProperties: List, + @Value("#{'\${payment.processingTimeMillis}'.split(',')}") + processingTimeMillis: Int + ): LeakingBucketRateLimiter = + LeakingBucketRateLimiter( + rate = accountProperties.minOf { it.rateLimitPerSec }.toLong(), + window = rateCheckWindow, + bucketSize = (((processingTimeMillis - accountProperties.maxOf { it.averageProcessingTime } + .toMillis()) / accountProperties.maxOf { it.averageProcessingTime } + .toMillis()) * accountProperties.minOf { it.rateLimitPerSec }.toLong()).toInt() + ) + + @Bean + fun smoothOutIncoming( + accountProperties: List, + ): SlidingWindowRateLimiter = + SlidingWindowRateLimiter( + rate = accountProperties.minOf { it.rateLimitPerSec }.toLong(), + window = rateCheckWindow, + ) + + + @Bean + fun accountProperties(): List { val request = HttpRequest.newBuilder() .uri(URI("http://${paymentProviderHostPort}/external/accounts?serviceName=$serviceName&token=$token")) + .timeout(Duration.ofSeconds(10)) .GET() .build() val resp = javaClient.send(request, HttpResponse.BodyHandlers.ofString()) - println("\nPayment accounts list:") - return mapper.readValue>( + val accounts: List = mapper.readValue>( resp.body(), - mapper.typeFactory.constructCollectionType(List::class.java, PaymentAccountProperties::class.java) - ) - .filter { it.accountName in allowedAccounts } + mapper.typeFactory.constructCollectionType( + List::class.java, + PaymentAccountProperties::class.java + ) + ).filter { it.accountName in allowedAccounts } + + return accounts + } + + @Bean + fun accountAdapters( + paymentService: EventSourcingService, + meterRegistry: MeterRegistry, + accountProperties: List + ): List { + return accountProperties .map { it.copy(enabled = true) } .onEach(::println) .map { @@ -67,7 +136,7 @@ class PaymentAccountsConfig { paymentProviderHostPort, token, meterRegistry, - Semaphore(it.parallelRequests) + parallelLimiter(accountProperties) ) } } diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index 359aa0a59..7c943d0ca 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -3,6 +3,7 @@ package ru.quipy.payments.logic import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.beans.factory.annotation.Autowired +import org.springframework.beans.factory.annotation.Qualifier import org.springframework.stereotype.Service import ru.quipy.common.utils.CallerBlockingRejectedExecutionHandler import ru.quipy.common.utils.NamedThreadFactory @@ -18,17 +19,15 @@ import java.util.concurrent.ThreadPoolExecutor import java.util.concurrent.TimeUnit @Service -class OrderPayer { +class OrderPayer( + val rateLimiter: SlidingWindowRateLimiter, + @Qualifier("warehouseIfUnfinishedWork") + val paymentExecutor: ThreadPoolExecutor +) { companion object { val logger: Logger = LoggerFactory.getLogger(OrderPayer::class.java) } - private val rateLimit: SlidingWindowRateLimiter by lazy { - SlidingWindowRateLimiter( - rate = 8, - window = Duration.ofMillis(1000), - ) - } @Autowired private lateinit var paymentESService: EventSourcingService @@ -36,19 +35,9 @@ class OrderPayer { @Autowired private lateinit var paymentService: PaymentService - private val paymentExecutor = ThreadPoolExecutor( - 16, - 16, - 0, - TimeUnit.MILLISECONDS, - LinkedBlockingQueue(8_000), - NamedThreadFactory("payment-submission-executor"), - CallerBlockingRejectedExecutionHandler() - ) - fun processPayment(orderId: UUID, amount: Int, paymentId: UUID, deadline: Long): Long { val createdAt = System.currentTimeMillis() - if (!rateLimit.tickBlocking(deadline- System.currentTimeMillis())) { + if (!rateLimiter.tickBlocking(deadline- System.currentTimeMillis())) { throw TooManyRequestsException(deadline) } paymentExecutor.submit { diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 3170b6dad..0b511f2c3 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -26,6 +26,11 @@ payment.service-name=${PAYMENT_SERVICE_NAME} payment.token=${PAYMENT_TOKEN} # payment.accounts=${PAYMENT_ACCOUNTS:acc-12,acc-20} # payment.accounts=${PAYMENT_ACCOUNTS:acc-3} -payment.accounts=${PAYMENT_ACCOUNTS:acc-7} +#payment.accounts=${PAYMENT_ACCOUNTS:acc-7} # payment.accounts=${PAYMENT_ACCOUNTS:acc-18} payment.hostPort=${PAYMENT_HOST:localhost}:${PAYMENT_PORT:1234} + +# For 8 case +payment.accounts=${PAYMENT_ACCOUNTS:acc-9} +payment.processingTimeMillis=20000 +payment.maximumPoolSize=8000 diff --git a/test-local-run.http b/test-local-run.http index dc8dbeedf..9cde39e20 100644 --- a/test-local-run.http +++ b/test-local-run.http @@ -1,15 +1,41 @@ -### Run test locally +### Test 6 run... POST http://localhost:1234/test/run Content-Type: application/json { "serviceName": "{{serviceName}}", "token": "{{token}}", - "ratePerSecond": 1, - "testCount": 100, - "processingTimeMillis": 80000 + "ratePerSecond": 100, + "testCount": 5000, + "processingTimeMillis": 20000 } +### Test 7 run... +POST http://localhost:1234/test/run +Content-Type: application/json + +{ + "serviceName": "{{serviceName}}", + "token": "{{token}}", + "ratePerSecond": 100, + "testCount": 5000, + "processingTimeMillis": 20000 +} + +### Test 8 run... +POST http://localhost:1234/test/run +Content-Type: application/json + +{ + "serviceName": "{{serviceName}}", + "token": "{{token}}", + "ratePerSecond": 100, + "testCount": 5000, + "processingTimeMillis": 20000 +} + + + ### Stop running test to save time and resources # @timeout 120 -POST http://localhost:4321/test/stop/"{{serviceName}}" \ No newline at end of file +POST http://localhost:4321/test/stop/"{{serviceName}}" From c40fdda6d0721934e08907cb1e20031f36f8bbc5 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 26 Dec 2025 20:39:43 +0300 Subject: [PATCH 10/41] hotfix --- .../payments/config/PaymentAccountsConfig.kt | 6 +- .../logic/PaymentExternalServiceImpl.kt | 290 +++++++++++------- src/main/resources/application.properties | 7 +- 3 files changed, 183 insertions(+), 120 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 2e2014c5c..70f96132f 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -124,7 +124,8 @@ class PaymentAccountsConfig { fun accountAdapters( paymentService: EventSourcingService, meterRegistry: MeterRegistry, - accountProperties: List + accountProperties: List, + rateLimiter: SlidingWindowRateLimiter ): List { return accountProperties .map { it.copy(enabled = true) } @@ -136,7 +137,8 @@ class PaymentAccountsConfig { paymentProviderHostPort, token, meterRegistry, - parallelLimiter(accountProperties) + parallelLimiter(accountProperties), + rateLimiter ) } } diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 3ef90a416..3584b840d 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -3,20 +3,26 @@ package ru.quipy.payments.logic import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.registerKotlinModule import io.micrometer.core.instrument.MeterRegistry -import okhttp3.OkHttpClient -import okhttp3.Request -import okhttp3.RequestBody -import okio.IOException +import okio.EOFException import org.slf4j.LoggerFactory +import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService +import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate import java.io.InterruptedIOException import java.net.SocketTimeoutException +import java.net.URI +import java.net.http.HttpClient +import java.net.http.HttpRequest +import java.net.http.HttpResponse import java.time.Duration import java.util.* +import java.util.concurrent.Executors +import java.util.concurrent.ScheduledExecutorService import java.util.concurrent.Semaphore import java.util.concurrent.TimeUnit import kotlin.math.pow +import kotlin.random.Random // Advice: always treat time as a Duration @@ -26,155 +32,209 @@ class PaymentExternalSystemAdapterImpl( private val paymentProviderHostPort: String, private val token: String, meterRegistry: MeterRegistry, - private val parallelLimiter: Semaphore + private val parallelLimiter: Semaphore, + private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { - companion object { val logger = LoggerFactory.getLogger(PaymentExternalSystemAdapter::class.java) - - val emptyBody = RequestBody.create(null, ByteArray(0)) val mapper = ObjectMapper().registerKotlinModule() } - private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) - private val retryCounter = meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) + private val time_95_percentile = 20_000L + + private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) + + // 2025-11-20T20:30:35.780+03:00 INFO 56644 --- [alhost:1234/...] ru.quipy.core.EventSourcingService : Optimistic lock exception. Failed to save event records id: [7dca693e-e811-4b7f-8bce-23e13d952c04-4] + private val startedRequests = + meterRegistry.counter("payment.processing.started", "accountName", properties.accountName) + private val requestsRetried = + meterRegistry.counter("payment.request.retried", "accountName", properties.accountName) + private val timer = + meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests - private val client = OkHttpClient.Builder() - .callTimeout(1100, TimeUnit.MILLISECONDS).build() + + + private val httpClient = HttpClient + .newBuilder() + .executor(Executors.newFixedThreadPool(parallelRequests)) + .version(HttpClient.Version.HTTP_2) + .build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { logger.warn("[$accountName] Submitting payment request for payment $paymentId") - val transactionId = UUID.randomUUID() // Вне зависимости от исхода оплаты важно отметить что она была отправлена. // Это требуется сделать ВО ВСЕХ СЛУЧАЯХ, поскольку эта информация используется сервисом тестирования. - paymentESService.update(paymentId) { - it.logSubmission(success = true, transactionId, now(), Duration.ofMillis(now() - paymentStartedAt)) - } - - logger.info("[$accountName] Submit: $paymentId , txId: $transactionId") - - var retryCount = 0 - while (true) { - val remaining = deadline - now() - if (remaining <= 0) { + try { paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "deadline expired") + it.logSubmission( + success = true, + transactionId, + now(), + Duration.ofMillis(now() - paymentStartedAt) + ) } - return + logger.info("[$accountName] Log submission recorded for $paymentId") + } catch (e: Exception) { + logger.error("[$accountName] Failed to record log submission for $paymentId", e) } - if (!parallelLimiter.tryAcquire(remaining, TimeUnit.MILLISECONDS)) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "parallel limiter timeout") - } - return + logger.info("[$accountName] Submit: $paymentId , txId: $transactionId") + + val remaining = deadline - now() + val minRequiredTime = requestAverageProcessingTime.toMillis() + if (remaining < minRequiredTime) { + logger.warn("[$accountName] Not enough time for payment $paymentId: ${remaining}ms remaining, need ${minRequiredTime}ms") + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "not enough time") } + val retryAfterMs = minRequiredTime - remaining + Random.nextLong(100) + throw TooManyRequestsException(retryAfterMs) + } - val timeBeforeCall = now() - var shouldRetry = false - try { - val perCallTimeoutMs = remaining.coerceAtMost(1100) - val request = Request.Builder() - .url( - "http://$paymentProviderHostPort/external/process" + - "?serviceName=$serviceName&token=$token&accountName=$accountName" + - "&transactionId=$transactionId&paymentId=$paymentId&amount=$amount" - ) - .post(emptyBody) - .build() + val requestTimeout = minOf( + time_95_percentile, + requestAverageProcessingTime.toMillis() + ).coerceAtLeast(100) - val call = client.newCall(request) - call.timeout().timeout(perCallTimeoutMs, TimeUnit.MILLISECONDS) + val request = HttpRequest.newBuilder() + .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) + .timeout(Duration.ofMillis(requestTimeout)) + .POST(HttpRequest.BodyPublishers.noBody()) + .build() - call.execute().use { response -> - val rawBody = response.body?.string() - val parsed = try { - mapper.readValue(rawBody, ExternalSysResponse::class.java) - } catch (ex: Exception) { - ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) - } + completeAction(0, request, paymentId, transactionId, deadline) + } - shouldRetry = !parsed.result && (response.code == 429 || response.code >= 500) + override fun price() = properties.price - paymentESService.update(paymentId) { - it.logProcessing(parsed.result, now(), transactionId, parsed.message) - } + override fun isEnabled() = properties.enabled - if (parsed.result) { - return - } - } - } catch (e: SocketTimeoutException) { - logger.warn("[$accountName] attempt ${retryCount + 1} timeout: $paymentId", e) - shouldRetry = true - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "socket timeout") - } - } catch (e: InterruptedIOException) { - logger.warn("[$accountName] interrupted: $paymentId", e) - shouldRetry = true + override fun name() = properties.accountName - // Здесь мы обновляем состояние оплаты в зависимости от результата в базе данных оплат. - // Это требуется сделать ВО ВСЕХ ИСХОДАХ (успешная оплата / неуспешная / ошибочная ситуация) - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "interrupted IO") - } - } catch (e: IOException) { - logger.warn("[$accountName] io error: $paymentId", e) - shouldRetry = true - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "io exception") - } - } catch (e: Exception) { - logger.error("[$accountName] non-retriable error: $paymentId", e) - shouldRetry = false - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, e.message) - } - } finally { - timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) + fun completeAction( + retryCount: Long, + request: HttpRequest, + paymentId: UUID, + transactionId: UUID, + deadline: Long + ) { + val timeBeforeCall = now() + parallelLimiter.acquire() + startedRequests.increment() + httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) + .whenComplete { response, throwable -> parallelLimiter.release() + if (throwable != null) { + val e = throwable.cause + var isRetriable = true + when (throwable.cause) { + is SocketTimeoutException -> { + logger.warn("[$accountName] attempt ${retryCount + 1} timeout: $paymentId", e) + } + + is InterruptedIOException -> { + logger.warn("[$accountName] interrupted: $paymentId", e) + } + + is EOFException -> { + logger.warn("[$accountName] eof exception in: $paymentId", e) + } + + else -> { + logger.warn("[$accountName] io error: $paymentId", e) + isRetriable = false + } + } + + if (retryCount + 1 >= 3) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Max attempts reached") + } + } else { + val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + Random.nextLong(10)) + val capped = backoff.coerceAtMost(deadline - now() - 5) + if (capped <= 0) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Deadline expired") + } + } else { + if (isRetriable) { + scheduleRetry( + retryCount, request, paymentId, transactionId, deadline, capped + ) + } else { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Non-retriable exception") + } + } + } + } + } else { + try { + logger.warn("Free space in semaphore: {}", parallelLimiter.availablePermits()) + logger.info( + "success in callback for payment: {}, retry count: {}, in time: {}", + paymentId, + retryCount, + now() + ) + val rawBody = response.body() + val parsed = try { + mapper.readValue(rawBody, ExternalSysResponse::class.java) + } catch (ex: Exception) { + ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) + } + + paymentESService.update(paymentId) { + it.logProcessing(parsed.result, now(), transactionId, parsed.message) + } + timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) + } catch (e: Exception) { + logger.error("[$accountName] Error processing payment $paymentId", e) + } + } } + } - if (!shouldRetry) { - return + private fun scheduleRetry( + retryCount: Long, request: HttpRequest, paymentId: UUID, + transactionId: UUID, deadline: Long, delay: Long + ) { + requestsRetried.increment() + retryExecutor.scheduleWithFixedDelay({ + while (!rateLimiter.tick() && now() < deadline) { } + if (now() >= deadline) + throw TooManyRequestsException(10) - retryCount++ - if (retryCount >= 3) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Max attempts reached") - } - return - } - val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + kotlin.random.Random.nextLong(10)) - val capped = backoff.coerceAtMost(deadline - now() - 5) - if (capped <= 0) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Deadline expired") + logger.info("Completing retry. All retry count - {}", requestsRetried.count()) + val remainingTime = deadline - now() + if (remainingTime < requestAverageProcessingTime.toMillis()) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Not enough time for retry") + } + } catch (e: Exception) { + logger.error("[$accountName] Failed to record retry failure for $paymentId", e) } - return + } else { + val newRequestTimeout = remainingTime.coerceIn(time_95_percentile, requestAverageProcessingTime.toMillis() * 2) + val newRequest = HttpRequest.newBuilder() + .uri(request.uri()) + .timeout(Duration.ofMillis(newRequestTimeout)) + .POST(HttpRequest.BodyPublishers.noBody()) + .build() + completeAction(retryCount + 1, newRequest, paymentId, transactionId, deadline) } - - retryCounter.increment() - Thread.sleep(capped) - } - } - - override fun price() = properties.price - - override fun isEnabled() = properties.enabled - - override fun name() = properties.accountName - fun timeToDead(deadline: Long): Long { - return deadline - now() + }, 0, delay, TimeUnit.MILLISECONDS) } } + fun now() = System.currentTimeMillis() \ No newline at end of file diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 0b511f2c3..475ed1bd1 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -31,6 +31,7 @@ payment.token=${PAYMENT_TOKEN} payment.hostPort=${PAYMENT_HOST:localhost}:${PAYMENT_PORT:1234} # For 8 case -payment.accounts=${PAYMENT_ACCOUNTS:acc-9} -payment.processingTimeMillis=20000 -payment.maximumPoolSize=8000 +#payment.accounts=${PAYMENT_ACCOUNTS:acc-9} +payment.accounts=${PAYMENT_ACCOUNTS:acc-12} +payment.processingTimeMillis=50000 +payment.maximumPoolSize=50_000 From 3bd2333721438f08c5ca05d23275e936a2e7945e Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 26 Dec 2025 20:56:30 +0300 Subject: [PATCH 11/41] hotfix --- .../kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 70f96132f..6d1120032 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -53,8 +53,6 @@ class PaymentAccountsConfig { @Bean fun warehouseIfUnfinishedWork( accountProperties: List, - @Value("\${payment.maximumPoolSize}") - maximumPoolSize: Int ): ThreadPoolExecutor { val poolSize = 100 val temp = ThreadPoolExecutor( @@ -62,7 +60,7 @@ class PaymentAccountsConfig { poolSize, 0, TimeUnit.MILLISECONDS, - LinkedBlockingQueue(maximumPoolSize), + LinkedBlockingQueue(50_000), NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) From 75b0854e02ddf63bc0143ddb9b5392a8bd615800 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Tue, 6 Jan 2026 11:50:16 +0300 Subject: [PATCH 12/41] refactoring: added logging info for payment config --- .../payments/config/PaymentAccountsConfig.kt | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 6d1120032..a495d8459 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -4,6 +4,8 @@ import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.fasterxml.jackson.module.kotlin.registerKotlinModule import io.micrometer.core.instrument.MeterRegistry +import org.slf4j.Logger +import org.slf4j.LoggerFactory import org.springframework.beans.factory.annotation.Value import org.springframework.context.annotation.Bean import org.springframework.context.annotation.Configuration @@ -28,13 +30,13 @@ import java.util.concurrent.Semaphore import java.util.concurrent.ThreadPoolExecutor import java.util.concurrent.TimeUnit - @Configuration class PaymentAccountsConfig { companion object { private val javaClient = HttpClient.newBuilder().build() private val mapper = ObjectMapper().registerKotlinModule().registerModules(JavaTimeModule()) } + val logger: Logger = LoggerFactory.getLogger(PaymentAccountsConfig::class.java) var rateCheckWindow: Duration = Duration.ofMillis(1000) @@ -54,48 +56,60 @@ class PaymentAccountsConfig { fun warehouseIfUnfinishedWork( accountProperties: List, ): ThreadPoolExecutor { - val poolSize = 100 - val temp = ThreadPoolExecutor( - poolSize, - poolSize, + val corePoolSize = 100 + val maximumPoolSize = 100 + val queueSize = 50_000 + val keepAliveTime = 0 + logger.info("Thread Pool Properties: core pool size - {}, maximum pool size - {}, queue size - {}, keepAliveTime - {}", corePoolSize, maximumPoolSize, queueSize, keepAliveTime) + return ThreadPoolExecutor( + corePoolSize, + maximumPoolSize, 0, TimeUnit.MILLISECONDS, LinkedBlockingQueue(50_000), NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) - return temp } @Bean fun parallelLimiter( accountProperties: List - ): Semaphore = - Semaphore(accountProperties.minOf { it.parallelRequests }) + ): Semaphore { + val parallelRequests = accountProperties.minOf { it.parallelRequests } + logger.info("Semaphore Properties: permits count - {}", parallelRequests) + return Semaphore(parallelRequests) + } @Bean fun burstRateLimiter( accountProperties: List, @Value("#{'\${payment.processingTimeMillis}'.split(',')}") processingTimeMillis: Int - ): LeakingBucketRateLimiter = - LeakingBucketRateLimiter( - rate = accountProperties.minOf { it.rateLimitPerSec }.toLong(), + ): LeakingBucketRateLimiter { + val bucketSize = (((processingTimeMillis - accountProperties.maxOf { it.averageProcessingTime } + .toMillis()) / accountProperties.maxOf { it.averageProcessingTime } + .toMillis()) * accountProperties.minOf { it.rateLimitPerSec }.toLong()).toInt() + val rate = accountProperties.minOf { it.rateLimitPerSec }.toLong() + logger.info("Burst Rate Limiter Properties: bucket size - {}, rate - {}", bucketSize, rate) + return LeakingBucketRateLimiter( + rate = rate, window = rateCheckWindow, - bucketSize = (((processingTimeMillis - accountProperties.maxOf { it.averageProcessingTime } - .toMillis()) / accountProperties.maxOf { it.averageProcessingTime } - .toMillis()) * accountProperties.minOf { it.rateLimitPerSec }.toLong()).toInt() + bucketSize = bucketSize ) + } @Bean fun smoothOutIncoming( accountProperties: List, - ): SlidingWindowRateLimiter = - SlidingWindowRateLimiter( - rate = accountProperties.minOf { it.rateLimitPerSec }.toLong(), + ): SlidingWindowRateLimiter { + val rate = accountProperties.minOf { it.rateLimitPerSec }.toLong() + logger.info("Incoming Rate Limiter Properties: rate - {}", rate) + return SlidingWindowRateLimiter( + rate = rate, window = rateCheckWindow, ) - + } @Bean fun accountProperties(): List { From 128318dc553a6954d603d9423c8e2168ab9e4410 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Tue, 6 Jan 2026 12:02:40 +0300 Subject: [PATCH 13/41] refactoring: refactored 429 throwing --- src/main/kotlin/ru/quipy/apigateway/APIController.kt | 6 ++---- .../ru/quipy/common/utils/LeakingBucketRateLimiter.kt | 10 ++++++++++ .../quipy/payments/logic/PaymentExternalServiceImpl.kt | 2 -- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 8f81acc8d..e87e6bc8e 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -2,14 +2,12 @@ package ru.quipy.apigateway import org.slf4j.Logger import org.slf4j.LoggerFactory -import org.springframework.beans.factory.annotation.Qualifier import org.springframework.web.bind.annotation.* import ru.quipy.common.utils.LeakingBucketRateLimiter -import ru.quipy.common.utils.RateLimiter import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer -import java.time.Duration +import ru.quipy.payments.logic.now import java.util.* @RestController @@ -58,7 +56,7 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { - if (!rateLimiter.tick()) { + if (!rateLimiter.tickBlocking(deadline - now())) { throw TooManyRequestsException(deadline) } val paymentId = UUID.randomUUID() diff --git a/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt b/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt index 383c047f7..0423d78d9 100644 --- a/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt +++ b/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt @@ -6,6 +6,7 @@ import kotlinx.coroutines.delay import kotlinx.coroutines.launch import org.slf4j.Logger import org.slf4j.LoggerFactory +import ru.quipy.payments.logic.now import java.time.Duration import java.util.concurrent.Executors import java.util.concurrent.LinkedBlockingQueue @@ -22,6 +23,15 @@ class LeakingBucketRateLimiter( return queue.offer(1) } + fun tickBlocking(timeout: Long): Boolean { + val startedAt = now() + var checkBlocking = tick() + while (now() - startedAt < timeout && checkBlocking) { + checkBlocking = tick() + } + return checkBlocking + } + private val releaseJob = rateLimiterScope.launch { while (true) { delay(window.toMillis()) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 3584b840d..3d364d2a0 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -94,8 +94,6 @@ class PaymentExternalSystemAdapterImpl( paymentESService.update(paymentId) { it.logProcessing(false, now(), transactionId, "not enough time") } - val retryAfterMs = minRequiredTime - remaining + Random.nextLong(100) - throw TooManyRequestsException(retryAfterMs) } val requestTimeout = minOf( From 9e7d6519293b5d2b3b73ca663e156623f224cc77 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Tue, 6 Jan 2026 12:09:55 +0300 Subject: [PATCH 14/41] hotfix --- .../kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt b/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt index 0423d78d9..3f4c2cf97 100644 --- a/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt +++ b/src/main/kotlin/ru/quipy/common/utils/LeakingBucketRateLimiter.kt @@ -26,7 +26,7 @@ class LeakingBucketRateLimiter( fun tickBlocking(timeout: Long): Boolean { val startedAt = now() var checkBlocking = tick() - while (now() - startedAt < timeout && checkBlocking) { + while (now() - startedAt < timeout && !checkBlocking) { checkBlocking = tick() } return checkBlocking From a96ed896a21938e69db588ad76da3fab5d1df2b9 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Tue, 6 Jan 2026 12:29:28 +0300 Subject: [PATCH 15/41] fix: fixed from scheduleWithFixedDelay retry on schedule retry --- .../quipy/payments/logic/PaymentExternalServiceImpl.kt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 3d364d2a0..6610d32c3 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -206,11 +206,11 @@ class PaymentExternalSystemAdapterImpl( transactionId: UUID, deadline: Long, delay: Long ) { requestsRetried.increment() - retryExecutor.scheduleWithFixedDelay({ - while (!rateLimiter.tick() && now() < deadline) { - } - if (now() >= deadline) + retryExecutor.schedule({ + + if (rateLimiter.tickBlocking(timeout = deadline - now() - time_95_percentile)) { throw TooManyRequestsException(10) + } logger.info("Completing retry. All retry count - {}", requestsRetried.count()) val remainingTime = deadline - now() @@ -231,7 +231,7 @@ class PaymentExternalSystemAdapterImpl( .build() completeAction(retryCount + 1, newRequest, paymentId, transactionId, deadline) } - }, 0, delay, TimeUnit.MILLISECONDS) + }, delay, TimeUnit.MILLISECONDS) } } From 7d745eb669063f3b3cd01fc0fe91057d2edcfba0 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Tue, 6 Jan 2026 12:39:16 +0300 Subject: [PATCH 16/41] fix: removed burst checking --- src/main/kotlin/ru/quipy/apigateway/APIController.kt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index e87e6bc8e..12d55502d 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -4,10 +4,8 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.web.bind.annotation.* import ru.quipy.common.utils.LeakingBucketRateLimiter -import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer -import ru.quipy.payments.logic.now import java.util.* @RestController @@ -56,9 +54,6 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { - if (!rateLimiter.tickBlocking(deadline - now())) { - throw TooManyRequestsException(deadline) - } val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) From ecb00745141239416c9c9c43762e98c98bba3e61 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Tue, 6 Jan 2026 12:58:14 +0300 Subject: [PATCH 17/41] refactoring: refactored timeout and ticking for complete action --- .../ru/quipy/payments/logic/OrderPayer.kt | 10 --------- .../logic/PaymentExternalServiceImpl.kt | 22 +++++++++---------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index 7c943d0ca..b8a2cf5bc 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -5,18 +5,11 @@ import org.slf4j.LoggerFactory import org.springframework.beans.factory.annotation.Autowired import org.springframework.beans.factory.annotation.Qualifier import org.springframework.stereotype.Service -import ru.quipy.common.utils.CallerBlockingRejectedExecutionHandler -import ru.quipy.common.utils.NamedThreadFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService -import ru.quipy.exceptions.RateLimitWasBreached -import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate -import java.time.Duration import java.util.* -import java.util.concurrent.LinkedBlockingQueue import java.util.concurrent.ThreadPoolExecutor -import java.util.concurrent.TimeUnit @Service class OrderPayer( @@ -37,9 +30,6 @@ class OrderPayer( fun processPayment(orderId: UUID, amount: Int, paymentId: UUID, deadline: Long): Long { val createdAt = System.currentTimeMillis() - if (!rateLimiter.tickBlocking(deadline- System.currentTimeMillis())) { - throw TooManyRequestsException(deadline) - } paymentExecutor.submit { val createdEvent = paymentESService.create { it.create( diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 6610d32c3..aaf50fe51 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -3,12 +3,13 @@ package ru.quipy.payments.logic import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.registerKotlinModule import io.micrometer.core.instrument.MeterRegistry -import okio.EOFException import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate +import java.io.EOFException +import java.io.IOException import java.io.InterruptedIOException import java.net.SocketTimeoutException import java.net.URI @@ -96,14 +97,9 @@ class PaymentExternalSystemAdapterImpl( } } - val requestTimeout = minOf( - time_95_percentile, - requestAverageProcessingTime.toMillis() - ).coerceAtLeast(100) - val request = HttpRequest.newBuilder() .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) - .timeout(Duration.ofMillis(requestTimeout)) + .timeout(Duration.ofMillis(time_95_percentile)) .POST(HttpRequest.BodyPublishers.noBody()) .build() @@ -125,6 +121,9 @@ class PaymentExternalSystemAdapterImpl( ) { val timeBeforeCall = now() parallelLimiter.acquire() + if (rateLimiter.tickBlocking(timeout = deadline - now() - time_95_percentile)) { + throw TooManyRequestsException(10) + } startedRequests.increment() httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> @@ -145,6 +144,10 @@ class PaymentExternalSystemAdapterImpl( logger.warn("[$accountName] eof exception in: $paymentId", e) } + is IOException -> { + logger.warn("[$accountName] received stream exception in: $paymentId", e) + } + else -> { logger.warn("[$accountName] io error: $paymentId", e) isRetriable = false @@ -207,11 +210,6 @@ class PaymentExternalSystemAdapterImpl( ) { requestsRetried.increment() retryExecutor.schedule({ - - if (rateLimiter.tickBlocking(timeout = deadline - now() - time_95_percentile)) { - throw TooManyRequestsException(10) - } - logger.info("Completing retry. All retry count - {}", requestsRetried.count()) val remainingTime = deadline - now() if (remainingTime < requestAverageProcessingTime.toMillis()) { From cd5918e4465e9cce25fbac6492d69f3492977d99 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Tue, 6 Jan 2026 13:12:12 +0300 Subject: [PATCH 18/41] fix: fixed tickBlocking --- .../ru/quipy/payments/logic/PaymentExternalServiceImpl.kt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index aaf50fe51..bceb1ed77 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -121,7 +121,8 @@ class PaymentExternalSystemAdapterImpl( ) { val timeBeforeCall = now() parallelLimiter.acquire() - if (rateLimiter.tickBlocking(timeout = deadline - now() - time_95_percentile)) { + if (!rateLimiter.tickBlocking(timeout = deadline - now() - time_95_percentile)) { + parallelLimiter.release() throw TooManyRequestsException(10) } startedRequests.increment() From 100d0042d6dc822d98c338f2699be5721f7a7114 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sat, 28 Feb 2026 20:36:25 +0300 Subject: [PATCH 19/41] fix: removed deadline check before account call and added metrics with dashboards --- .../dashboards/ServicesStatistic.json | 685 ++++++++++++++++++ pom.xml | 1 + .../ru/quipy/apigateway/APIController.kt | 15 +- .../payments/config/PaymentAccountsConfig.kt | 11 +- .../ru/quipy/payments/logic/OrderPayer.kt | 13 +- .../logic/PaymentExternalServiceImpl.kt | 60 +- src/main/resources/application.properties | 6 +- 7 files changed, 749 insertions(+), 42 deletions(-) diff --git a/grafana/provisioning/dashboards/ServicesStatistic.json b/grafana/provisioning/dashboards/ServicesStatistic.json index 1e46c11bc..62eba720e 100644 --- a/grafana/provisioning/dashboards/ServicesStatistic.json +++ b/grafana/provisioning/dashboards/ServicesStatistic.json @@ -3825,6 +3825,691 @@ ], "title": "Payment Processing Latency", "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 110, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Текущее число активных (выполняемых) задач в ThreadPoolExecutor платежей", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Активные задачи", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 111, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "payment_executor_active_tasks", + "legendFormat": "Active tasks", + "range": true, + "refId": "A" + } + ], + "title": "Executor — Параллельные задачи (active threads)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Текущее число задач, ожидающих в очереди ThreadPoolExecutor", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Задачи в очереди", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 10000 + }, + { + "color": "red", + "value": 40000 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "payment_executor_queue_size", + "legendFormat": "Queue size", + "range": true, + "refId": "A" + } + ], + "title": "Executor — Задачи в очереди (queue size)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Число HTTP-запросов, отправленных на платёжный аккаунт и ожидающих ответа (in-flight)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "In-flight запросы", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 113, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "payment_account_inflight_requests", + "legendFormat": "{{accountName}}", + "range": true, + "refId": "A" + } + ], + "title": "In-flight запросы на платёжные аккаунты", + "type": "timeseries" + } + ], + "title": "Payment Executor & Inflight Metrics", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 120, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Число запросов, обрабатываемых API контроллером в данный момент", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Запросы", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 121, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "api_gateway_controller_requests", + "legendFormat": "{{accountName}}", + "range": true, + "refId": "A" + } + ], + "title": "API Gateway — Запросы контроллера", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Число запросов, находящихся на ретрае в данный момент (по аккаунту)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Retry запросы", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 5 + }, + { + "color": "red", + "value": 20 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 122, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "payment_account_retry_requests", + "legendFormat": "{{accountName}}", + "range": true, + "refId": "A" + } + ], + "title": "Payment Account — Retry запросы", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Латентность выполнения задач в OrderPayer — время от создания до завершения (перцентили)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Латентность (с)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 123, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum(rate(order_payer_exec_latency_seconds_bucket[1m])) by (le, accountName))", + "legendFormat": "p50 {{accountName}}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(order_payer_exec_latency_seconds_bucket[1m])) by (le, accountName))", + "legendFormat": "p90 {{accountName}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(order_payer_exec_latency_seconds_bucket[1m])) by (le, accountName))", + "legendFormat": "p95 {{accountName}}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(order_payer_exec_latency_seconds_bucket[1m])) by (le, accountName))", + "legendFormat": "p99 {{accountName}}", + "range": true, + "refId": "D" + } + ], + "title": "Order Payer — Латентность выполнения задач (перцентили)", + "type": "timeseries" + } + ], + "title": "API Gateway, Retry & Order Payer Metrics", + "type": "row" }], "preload": false, "refresh": "5s", diff --git a/pom.xml b/pom.xml index e244cb6aa..554b0bdcd 100644 --- a/pom.xml +++ b/pom.xml @@ -171,6 +171,7 @@ -Xjsr305=strict + -Xannotation-default-target=param-property spring diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 12d55502d..f9230556e 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -1,21 +1,32 @@ package ru.quipy.apigateway +import io.micrometer.core.instrument.MeterRegistry +import io.micrometer.core.instrument.Tag import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.web.bind.annotation.* import ru.quipy.common.utils.LeakingBucketRateLimiter import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer +import ru.quipy.payments.logic.PaymentAccountProperties import java.util.* +import java.util.concurrent.atomic.AtomicInteger @RestController class APIController( private val orderRepository: OrderRepository, private val orderPayer: OrderPayer, - private val rateLimiter: LeakingBucketRateLimiter + private val rateLimiter: LeakingBucketRateLimiter, + meterRegistry: MeterRegistry, + accountProperties: List ) { val logger: Logger = LoggerFactory.getLogger(APIController::class.java) + val controllerRequests = AtomicInteger(0) + + init { + meterRegistry.gauge("api.gateway.controller.requests", listOf(Tag.of("accountName", accountProperties.joinToString { it.accountName + " " })), controllerRequests) { it.toDouble() } + } @PostMapping("/users") fun createUser(@RequestBody req: CreateUserRequest): User { @@ -54,6 +65,7 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { + controllerRequests.incrementAndGet() val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) @@ -61,6 +73,7 @@ class APIController( } ?: throw IllegalArgumentException("No such order $orderId") val createdAt = orderPayer.processPayment(orderId, order.price, paymentId, deadline) + controllerRequests.decrementAndGet() return PaymentSubmissionDto(createdAt, paymentId) } diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index a495d8459..85d433f7e 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -3,7 +3,6 @@ package ru.quipy.payments.config import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule import com.fasterxml.jackson.module.kotlin.registerKotlinModule -import io.micrometer.core.instrument.MeterRegistry import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.beans.factory.annotation.Value @@ -55,13 +54,14 @@ class PaymentAccountsConfig { @Bean fun warehouseIfUnfinishedWork( accountProperties: List, + meterRegistry: io.micrometer.core.instrument.MeterRegistry, ): ThreadPoolExecutor { val corePoolSize = 100 val maximumPoolSize = 100 val queueSize = 50_000 val keepAliveTime = 0 logger.info("Thread Pool Properties: core pool size - {}, maximum pool size - {}, queue size - {}, keepAliveTime - {}", corePoolSize, maximumPoolSize, queueSize, keepAliveTime) - return ThreadPoolExecutor( + val executor = ThreadPoolExecutor( corePoolSize, maximumPoolSize, 0, @@ -70,6 +70,11 @@ class PaymentAccountsConfig { NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) + + meterRegistry.gauge("payment.executor.active.tasks", executor) { it.activeCount.toDouble() } + meterRegistry.gauge("payment.executor.queue.size", executor) { it.queue.size.toDouble() } + + return executor } @Bean @@ -135,7 +140,7 @@ class PaymentAccountsConfig { @Bean fun accountAdapters( paymentService: EventSourcingService, - meterRegistry: MeterRegistry, + meterRegistry: io.micrometer.core.instrument.MeterRegistry, accountProperties: List, rateLimiter: SlidingWindowRateLimiter ): List { diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index b8a2cf5bc..12aee0c49 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -1,5 +1,6 @@ package ru.quipy.payments.logic +import io.micrometer.core.instrument.MeterRegistry import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.beans.factory.annotation.Autowired @@ -10,18 +11,23 @@ import ru.quipy.core.EventSourcingService import ru.quipy.payments.api.PaymentAggregate import java.util.* import java.util.concurrent.ThreadPoolExecutor +import java.util.concurrent.TimeUnit @Service class OrderPayer( val rateLimiter: SlidingWindowRateLimiter, @Qualifier("warehouseIfUnfinishedWork") - val paymentExecutor: ThreadPoolExecutor -) { + val paymentExecutor: ThreadPoolExecutor, + meterRegistry: MeterRegistry, + accountProperties: List + ) { companion object { val logger: Logger = LoggerFactory.getLogger(OrderPayer::class.java) } + val inExecTimer = meterRegistry.timer("order.payer.exec.latency", "accountName", accountProperties.joinToString { it.accountName + " " }) + @Autowired private lateinit var paymentESService: EventSourcingService @@ -38,9 +44,10 @@ class OrderPayer( amount ) } - logger.trace("Payment ${createdEvent.paymentId} for order $orderId created.") + logger.trace("Payment {} for order {} created.", createdEvent.paymentId, orderId) paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) + inExecTimer.record(now() - createdAt, TimeUnit.MILLISECONDS) } return createdAt } diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index bceb1ed77..d439f55f8 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -3,6 +3,8 @@ package ru.quipy.payments.logic import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.registerKotlinModule import io.micrometer.core.instrument.MeterRegistry +import io.micrometer.core.instrument.Tag +import org.slf4j.Logger import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService @@ -22,6 +24,7 @@ import java.util.concurrent.Executors import java.util.concurrent.ScheduledExecutorService import java.util.concurrent.Semaphore import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicInteger import kotlin.math.pow import kotlin.random.Random @@ -36,30 +39,21 @@ class PaymentExternalSystemAdapterImpl( private val parallelLimiter: Semaphore, private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { - companion object { - val logger = LoggerFactory.getLogger(PaymentExternalSystemAdapter::class.java) - val mapper = ObjectMapper().registerKotlinModule() - } - - private val time_95_percentile = 20_000L - - private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) - // 2025-11-20T20:30:35.780+03:00 INFO 56644 --- [alhost:1234/...] ru.quipy.core.EventSourcingService : Optimistic lock exception. Failed to save event records id: [7dca693e-e811-4b7f-8bce-23e13d952c04-4] - private val startedRequests = - meterRegistry.counter("payment.processing.started", "accountName", properties.accountName) - private val requestsRetried = - meterRegistry.counter("payment.request.retried", "accountName", properties.accountName) - private val timer = - meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) + private val time95Percentile = 20_000L private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests - - - + private val inFlightRequests = AtomicInteger(0) + private val retryRequests = AtomicInteger(0) + init { + meterRegistry.gauge("payment.account.inflight.requests", listOf(Tag.of("accountName", properties.accountName)), inFlightRequests) { it.toDouble() } + meterRegistry.gauge("payment.account.retry.requests", listOf(Tag.of("accountName", properties.accountName)), retryRequests) { it.toDouble() } + } + private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) + private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) private val httpClient = HttpClient .newBuilder() .executor(Executors.newFixedThreadPool(parallelRequests)) @@ -88,18 +82,9 @@ class PaymentExternalSystemAdapterImpl( logger.info("[$accountName] Submit: $paymentId , txId: $transactionId") - val remaining = deadline - now() - val minRequiredTime = requestAverageProcessingTime.toMillis() - if (remaining < minRequiredTime) { - logger.warn("[$accountName] Not enough time for payment $paymentId: ${remaining}ms remaining, need ${minRequiredTime}ms") - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "not enough time") - } - } - val request = HttpRequest.newBuilder() .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) - .timeout(Duration.ofMillis(time_95_percentile)) + .timeout(Duration.ofMillis(time95Percentile)) .POST(HttpRequest.BodyPublishers.noBody()) .build() @@ -121,14 +106,16 @@ class PaymentExternalSystemAdapterImpl( ) { val timeBeforeCall = now() parallelLimiter.acquire() - if (!rateLimiter.tickBlocking(timeout = deadline - now() - time_95_percentile)) { + if (!rateLimiter.tickBlocking(timeout = deadline - now() - time95Percentile)) { parallelLimiter.release() throw TooManyRequestsException(10) } - startedRequests.increment() + inFlightRequests.incrementAndGet() httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> + logger.info("On completion callback for payment: {}, retry count: {}, inFlight count: {}, in time: {}", paymentId, retryCount, inFlightRequests, now()) parallelLimiter.release() + inFlightRequests.decrementAndGet() if (throwable != null) { val e = throwable.cause var isRetriable = true @@ -209,9 +196,9 @@ class PaymentExternalSystemAdapterImpl( retryCount: Long, request: HttpRequest, paymentId: UUID, transactionId: UUID, deadline: Long, delay: Long ) { - requestsRetried.increment() + retryRequests.incrementAndGet() retryExecutor.schedule({ - logger.info("Completing retry. All retry count - {}", requestsRetried.count()) + logger.info("Completing retry. All retry count - {}", retryRequests ) val remainingTime = deadline - now() if (remainingTime < requestAverageProcessingTime.toMillis()) { try { @@ -222,7 +209,7 @@ class PaymentExternalSystemAdapterImpl( logger.error("[$accountName] Failed to record retry failure for $paymentId", e) } } else { - val newRequestTimeout = remainingTime.coerceIn(time_95_percentile, requestAverageProcessingTime.toMillis() * 2) + val newRequestTimeout = remainingTime.coerceIn(time95Percentile, requestAverageProcessingTime.toMillis() * 2) val newRequest = HttpRequest.newBuilder() .uri(request.uri()) .timeout(Duration.ofMillis(newRequestTimeout)) @@ -230,8 +217,15 @@ class PaymentExternalSystemAdapterImpl( .build() completeAction(retryCount + 1, newRequest, paymentId, transactionId, deadline) } + retryRequests.decrementAndGet() }, delay, TimeUnit.MILLISECONDS) } + + companion object { + val logger: Logger = LoggerFactory.getLogger(PaymentExternalSystemAdapter::class.java) + val mapper = ObjectMapper().registerKotlinModule() + } } + fun now() = System.currentTimeMillis() \ No newline at end of file diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 475ed1bd1..a1c53a908 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -32,6 +32,8 @@ payment.hostPort=${PAYMENT_HOST:localhost}:${PAYMENT_PORT:1234} # For 8 case #payment.accounts=${PAYMENT_ACCOUNTS:acc-9} -payment.accounts=${PAYMENT_ACCOUNTS:acc-12} -payment.processingTimeMillis=50000 +payment.accounts=${PAYMENT_ACCOUNTS:acc-13} +payment.processingTimeMillis=1000 payment.maximumPoolSize=50_000 + + From c9c0a6ef3bf58dc1212876bea94b6878d8858fd3 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sat, 28 Feb 2026 22:20:25 +0300 Subject: [PATCH 20/41] fix: removed all checkers --- .../kotlin/ru/quipy/apigateway/APIController.kt | 2 -- .../quipy/payments/config/PaymentAccountsConfig.kt | 8 ++++---- .../kotlin/ru/quipy/payments/logic/OrderPayer.kt | 2 -- .../payments/logic/PaymentExternalServiceImpl.kt | 13 +------------ 4 files changed, 5 insertions(+), 20 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index f9230556e..6a4885f44 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -5,7 +5,6 @@ import io.micrometer.core.instrument.Tag import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.web.bind.annotation.* -import ru.quipy.common.utils.LeakingBucketRateLimiter import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer import ru.quipy.payments.logic.PaymentAccountProperties @@ -16,7 +15,6 @@ import java.util.concurrent.atomic.AtomicInteger class APIController( private val orderRepository: OrderRepository, private val orderPayer: OrderPayer, - private val rateLimiter: LeakingBucketRateLimiter, meterRegistry: MeterRegistry, accountProperties: List ) { diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 85d433f7e..96dd61821 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -56,9 +56,9 @@ class PaymentAccountsConfig { accountProperties: List, meterRegistry: io.micrometer.core.instrument.MeterRegistry, ): ThreadPoolExecutor { - val corePoolSize = 100 - val maximumPoolSize = 100 - val queueSize = 50_000 + val corePoolSize = 1000 + val maximumPoolSize = 1000 + val queueSize = 100_000 val keepAliveTime = 0 logger.info("Thread Pool Properties: core pool size - {}, maximum pool size - {}, queue size - {}, keepAliveTime - {}", corePoolSize, maximumPoolSize, queueSize, keepAliveTime) val executor = ThreadPoolExecutor( @@ -66,7 +66,7 @@ class PaymentAccountsConfig { maximumPoolSize, 0, TimeUnit.MILLISECONDS, - LinkedBlockingQueue(50_000), + LinkedBlockingQueue(100_000), NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index 12aee0c49..426499ec2 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -11,7 +11,6 @@ import ru.quipy.core.EventSourcingService import ru.quipy.payments.api.PaymentAggregate import java.util.* import java.util.concurrent.ThreadPoolExecutor -import java.util.concurrent.TimeUnit @Service class OrderPayer( @@ -47,7 +46,6 @@ class OrderPayer( logger.trace("Payment {} for order {} created.", createdEvent.paymentId, orderId) paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) - inExecTimer.record(now() - createdAt, TimeUnit.MILLISECONDS) } return createdAt } diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index d439f55f8..fe5b20937 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -8,7 +8,6 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService -import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate import java.io.EOFException import java.io.IOException @@ -40,7 +39,7 @@ class PaymentExternalSystemAdapterImpl( private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { - private val time95Percentile = 20_000L + private val time95Percentile = 200L private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime @@ -105,17 +104,9 @@ class PaymentExternalSystemAdapterImpl( deadline: Long ) { val timeBeforeCall = now() - parallelLimiter.acquire() - if (!rateLimiter.tickBlocking(timeout = deadline - now() - time95Percentile)) { - parallelLimiter.release() - throw TooManyRequestsException(10) - } - inFlightRequests.incrementAndGet() httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> logger.info("On completion callback for payment: {}, retry count: {}, inFlight count: {}, in time: {}", paymentId, retryCount, inFlightRequests, now()) - parallelLimiter.release() - inFlightRequests.decrementAndGet() if (throwable != null) { val e = throwable.cause var isRetriable = true @@ -196,7 +187,6 @@ class PaymentExternalSystemAdapterImpl( retryCount: Long, request: HttpRequest, paymentId: UUID, transactionId: UUID, deadline: Long, delay: Long ) { - retryRequests.incrementAndGet() retryExecutor.schedule({ logger.info("Completing retry. All retry count - {}", retryRequests ) val remainingTime = deadline - now() @@ -217,7 +207,6 @@ class PaymentExternalSystemAdapterImpl( .build() completeAction(retryCount + 1, newRequest, paymentId, transactionId, deadline) } - retryRequests.decrementAndGet() }, delay, TimeUnit.MILLISECONDS) } From 0d5b23cce3f02052f0060cf071c32ff9eca6d863 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sat, 28 Feb 2026 22:50:10 +0300 Subject: [PATCH 21/41] fix: removed all time-spent optional operations --- .../ru/quipy/apigateway/APIController.kt | 2 -- .../payments/config/PaymentAccountsConfig.kt | 4 --- .../ru/quipy/payments/logic/OrderPayer.kt | 2 -- .../logic/PaymentExternalServiceImpl.kt | 27 +++---------------- 4 files changed, 4 insertions(+), 31 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 6a4885f44..e2d0aa290 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -63,7 +63,6 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { - controllerRequests.incrementAndGet() val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) @@ -71,7 +70,6 @@ class APIController( } ?: throw IllegalArgumentException("No such order $orderId") val createdAt = orderPayer.processPayment(orderId, order.price, paymentId, deadline) - controllerRequests.decrementAndGet() return PaymentSubmissionDto(createdAt, paymentId) } diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 96dd61821..d164fd4b7 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -70,10 +70,6 @@ class PaymentAccountsConfig { NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) - - meterRegistry.gauge("payment.executor.active.tasks", executor) { it.activeCount.toDouble() } - meterRegistry.gauge("payment.executor.queue.size", executor) { it.queue.size.toDouble() } - return executor } diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index 426499ec2..d944a82b5 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -43,8 +43,6 @@ class OrderPayer( amount ) } - logger.trace("Payment {} for order {} created.", createdEvent.paymentId, orderId) - paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) } return createdAt diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index fe5b20937..e438cceaa 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -39,7 +39,7 @@ class PaymentExternalSystemAdapterImpl( private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { - private val time95Percentile = 200L + private val time95Percentile = 20L private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime @@ -51,16 +51,18 @@ class PaymentExternalSystemAdapterImpl( meterRegistry.gauge("payment.account.inflight.requests", listOf(Tag.of("accountName", properties.accountName)), inFlightRequests) { it.toDouble() } meterRegistry.gauge("payment.account.retry.requests", listOf(Tag.of("accountName", properties.accountName)), retryRequests) { it.toDouble() } } + + private val processingTimeMillis = 1000L private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) private val httpClient = HttpClient .newBuilder() .executor(Executors.newFixedThreadPool(parallelRequests)) .version(HttpClient.Version.HTTP_2) + .connectTimeout(Duration.ofMillis(processingTimeMillis / 2)) .build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { - logger.warn("[$accountName] Submitting payment request for payment $paymentId") val transactionId = UUID.randomUUID() // Вне зависимости от исхода оплаты важно отметить что она была отправлена. @@ -74,13 +76,9 @@ class PaymentExternalSystemAdapterImpl( Duration.ofMillis(now() - paymentStartedAt) ) } - logger.info("[$accountName] Log submission recorded for $paymentId") } catch (e: Exception) { logger.error("[$accountName] Failed to record log submission for $paymentId", e) } - - logger.info("[$accountName] Submit: $paymentId , txId: $transactionId") - val request = HttpRequest.newBuilder() .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) .timeout(Duration.ofMillis(time95Percentile)) @@ -106,27 +104,18 @@ class PaymentExternalSystemAdapterImpl( val timeBeforeCall = now() httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> - logger.info("On completion callback for payment: {}, retry count: {}, inFlight count: {}, in time: {}", paymentId, retryCount, inFlightRequests, now()) if (throwable != null) { val e = throwable.cause var isRetriable = true when (throwable.cause) { is SocketTimeoutException -> { - logger.warn("[$accountName] attempt ${retryCount + 1} timeout: $paymentId", e) } - is InterruptedIOException -> { - logger.warn("[$accountName] interrupted: $paymentId", e) } - is EOFException -> { - logger.warn("[$accountName] eof exception in: $paymentId", e) } - is IOException -> { - logger.warn("[$accountName] received stream exception in: $paymentId", e) } - else -> { logger.warn("[$accountName] io error: $paymentId", e) isRetriable = false @@ -158,13 +147,6 @@ class PaymentExternalSystemAdapterImpl( } } else { try { - logger.warn("Free space in semaphore: {}", parallelLimiter.availablePermits()) - logger.info( - "success in callback for payment: {}, retry count: {}, in time: {}", - paymentId, - retryCount, - now() - ) val rawBody = response.body() val parsed = try { mapper.readValue(rawBody, ExternalSysResponse::class.java) @@ -188,7 +170,6 @@ class PaymentExternalSystemAdapterImpl( transactionId: UUID, deadline: Long, delay: Long ) { retryExecutor.schedule({ - logger.info("Completing retry. All retry count - {}", retryRequests ) val remainingTime = deadline - now() if (remainingTime < requestAverageProcessingTime.toMillis()) { try { From afe54dc5967c02350424f9620a7ebc36fe82a07e Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 6 Mar 2026 18:40:56 +0300 Subject: [PATCH 22/41] fix: returned metrics --- .../payments/config/PaymentAccountsConfig.kt | 4 +++ .../ru/quipy/payments/logic/OrderPayer.kt | 27 ++++++++++++++----- .../logic/PaymentExternalServiceImpl.kt | 8 +++--- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index d164fd4b7..96dd61821 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -70,6 +70,10 @@ class PaymentAccountsConfig { NamedThreadFactory("payment-submission-executor"), CallerBlockingRejectedExecutionHandler() ) + + meterRegistry.gauge("payment.executor.active.tasks", executor) { it.activeCount.toDouble() } + meterRegistry.gauge("payment.executor.queue.size", executor) { it.queue.size.toDouble() } + return executor } diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index d944a82b5..f1d928379 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -1,5 +1,6 @@ package ru.quipy.payments.logic +import io.micrometer.core.instrument.Counter import io.micrometer.core.instrument.MeterRegistry import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -11,6 +12,7 @@ import ru.quipy.core.EventSourcingService import ru.quipy.payments.api.PaymentAggregate import java.util.* import java.util.concurrent.ThreadPoolExecutor +import java.util.concurrent.TimeUnit @Service class OrderPayer( @@ -27,6 +29,10 @@ class OrderPayer( val inExecTimer = meterRegistry.timer("order.payer.exec.latency", "accountName", accountProperties.joinToString { it.accountName + " " }) + private val plannedCounter: Counter = meterRegistry.counter("payment.processing.planned") + private val startedCounter: Counter = meterRegistry.counter("payment.processing.started") + private val completedCounter: Counter = meterRegistry.counter("payment.processing.completed") + @Autowired private lateinit var paymentESService: EventSourcingService @@ -35,15 +41,22 @@ class OrderPayer( fun processPayment(orderId: UUID, amount: Int, paymentId: UUID, deadline: Long): Long { val createdAt = System.currentTimeMillis() + plannedCounter.increment() paymentExecutor.submit { - val createdEvent = paymentESService.create { - it.create( - paymentId, - orderId, - amount - ) + startedCounter.increment() + try { + val createdEvent = paymentESService.create { + it.create( + paymentId, + orderId, + amount + ) + } + inExecTimer.record(now()-createdAt, TimeUnit.MILLISECONDS) + paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) + } finally { + completedCounter.increment() } - paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) } return createdAt } diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index e438cceaa..245a3e74f 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -39,7 +39,7 @@ class PaymentExternalSystemAdapterImpl( private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { - private val time95Percentile = 20L + private val time95Percentile = 100L private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime @@ -54,6 +54,7 @@ class PaymentExternalSystemAdapterImpl( private val processingTimeMillis = 1000L private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) + private val retryCounter = meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) private val httpClient = HttpClient .newBuilder() @@ -104,7 +105,8 @@ class PaymentExternalSystemAdapterImpl( val timeBeforeCall = now() httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> - if (throwable != null) { + timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) + if (throwable != null) { val e = throwable.cause var isRetriable = true when (throwable.cause) { @@ -135,6 +137,7 @@ class PaymentExternalSystemAdapterImpl( } } else { if (isRetriable) { + retryCounter.increment() scheduleRetry( retryCount, request, paymentId, transactionId, deadline, capped ) @@ -157,7 +160,6 @@ class PaymentExternalSystemAdapterImpl( paymentESService.update(paymentId) { it.logProcessing(parsed.result, now(), transactionId, parsed.message) } - timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) } catch (e: Exception) { logger.error("[$accountName] Error processing payment $paymentId", e) } From 9f5cffb51b90b47db73428cc2aa5aebc281e73db Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 6 Mar 2026 19:04:12 +0300 Subject: [PATCH 23/41] fix: returned metrics --- src/main/kotlin/ru/quipy/apigateway/APIController.kt | 1 + .../quipy/payments/logic/PaymentExternalServiceImpl.kt | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index e2d0aa290..875f3dc00 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -63,6 +63,7 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { + controllerRequests.incrementAndGet() val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 245a3e74f..7895b3580 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -39,7 +39,7 @@ class PaymentExternalSystemAdapterImpl( private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { - private val time95Percentile = 100L + private val time95Percentile = 1000L private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime @@ -102,9 +102,14 @@ class PaymentExternalSystemAdapterImpl( transactionId: UUID, deadline: Long ) { + inFlightRequests.incrementAndGet() val timeBeforeCall = now() httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> + inFlightRequests.decrementAndGet() + if (retryCount > 0) { + retryRequests.decrementAndGet() + } timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) if (throwable != null) { val e = throwable.cause @@ -137,6 +142,7 @@ class PaymentExternalSystemAdapterImpl( } } else { if (isRetriable) { + retryRequests.incrementAndGet() retryCounter.increment() scheduleRetry( retryCount, request, paymentId, transactionId, deadline, capped @@ -174,6 +180,7 @@ class PaymentExternalSystemAdapterImpl( retryExecutor.schedule({ val remainingTime = deadline - now() if (remainingTime < requestAverageProcessingTime.toMillis()) { + retryRequests.decrementAndGet() try { paymentESService.update(paymentId) { it.logProcessing(false, now(), transactionId, "Not enough time for retry") From 523a4073e0ffd34148d2184f4cadda4e259d0af2 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sun, 8 Mar 2026 20:17:43 +0300 Subject: [PATCH 24/41] fix: returned semaphore and added async bombardier processing logic --- .../dashboards/ServicesStatistic.json | 146 ++++++++++++++++++ .../provisioning/datasources/datasource.yml | 2 + .../logic/PaymentAggregateCommands.kt | 3 +- .../logic/PaymentExternalServiceImpl.kt | 138 ++++++++++------- src/main/resources/application.properties | 3 +- 5 files changed, 237 insertions(+), 55 deletions(-) diff --git a/grafana/provisioning/dashboards/ServicesStatistic.json b/grafana/provisioning/dashboards/ServicesStatistic.json index 62eba720e..a77405988 100644 --- a/grafana/provisioning/dashboards/ServicesStatistic.json +++ b/grafana/provisioning/dashboards/ServicesStatistic.json @@ -843,6 +843,152 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "description": "Время ожидания задачи в очереди перед обработкой (перцентили)", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Латентность (с)", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 130, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum(rate(bombardier_in_queue_latency_seconds_bucket[30s])) by (le))", + "legendFormat": "p50", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.75, sum(rate(bombardier_in_queue_latency_seconds_bucket[30s])) by (le))", + "legendFormat": "p75", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(bombardier_in_queue_latency_seconds_bucket[30s])) by (le))", + "legendFormat": "p90", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(bombardier_in_queue_latency_seconds_bucket[30s])) by (le))", + "legendFormat": "p95", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(bombardier_in_queue_latency_seconds_bucket[30s])) by (le))", + "legendFormat": "p99", + "range": true, + "refId": "E" + } + ], + "title": "Bombardier — In Queue Latency (перцентили)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "fieldConfig": { "defaults": { "color": { diff --git a/grafana/provisioning/datasources/datasource.yml b/grafana/provisioning/datasources/datasource.yml index b37f2a42c..ea5d482ab 100644 --- a/grafana/provisioning/datasources/datasource.yml +++ b/grafana/provisioning/datasources/datasource.yml @@ -13,6 +13,8 @@ datasources: - name: Prometheus # datasource type. Required type: prometheus + # specific unique identifier to match dashboard references + uid: PBFA97CFB590B2093 # access mode. direct or proxy. Required access: proxy # org id. will default to orgId 1 if not specified diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt index 2389e3cb3..01f7284a5 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt @@ -1,5 +1,6 @@ package ru.quipy.payments.logic +import io.micrometer.core.instrument.Metrics import ru.quipy.payments.api.PaymentCreatedEvent import ru.quipy.payments.api.PaymentProcessedEvent import ru.quipy.payments.api.PaymentSubmittedEvent @@ -29,7 +30,7 @@ fun PaymentAggregateState.logProcessing( ): PaymentProcessedEvent { val submittedAt = this.submissions[transactionId ?: UUID.randomUUID()]?.timeStarted ?: 0 val spentInQueueDuration = this.submissions[transactionId ?: UUID.randomUUID()]?.spentInQueue ?: Duration.ofMillis(0) - + Metrics.timer("bombardier.in.queue.latency").record(spentInQueueDuration) return PaymentProcessedEvent( this.getId(), success, this.orderId, submittedAt, processedAt, this.amount!!, transactionId, reason, spentInQueueDuration ) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 7895b3580..ecf786b13 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -4,6 +4,9 @@ import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.registerKotlinModule import io.micrometer.core.instrument.MeterRegistry import io.micrometer.core.instrument.Tag +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch import org.slf4j.Logger import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter @@ -39,7 +42,6 @@ class PaymentExternalSystemAdapterImpl( private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { - private val time95Percentile = 1000L private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime @@ -47,20 +49,32 @@ class PaymentExternalSystemAdapterImpl( private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) private val retryRequests = AtomicInteger(0) + private val scope = CoroutineScope(Dispatchers.Default) + init { - meterRegistry.gauge("payment.account.inflight.requests", listOf(Tag.of("accountName", properties.accountName)), inFlightRequests) { it.toDouble() } - meterRegistry.gauge("payment.account.retry.requests", listOf(Tag.of("accountName", properties.accountName)), retryRequests) { it.toDouble() } + meterRegistry.gauge( + "payment.account.inflight.requests", + listOf(Tag.of("accountName", properties.accountName)), + inFlightRequests + ) { it.toDouble() } + meterRegistry.gauge( + "payment.account.retry.requests", + listOf(Tag.of("accountName", properties.accountName)), + retryRequests + ) { it.toDouble() } } private val processingTimeMillis = 1000L - private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) - private val retryCounter = meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) + private val timer = + meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) + private val retryCounter = + meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) private val httpClient = HttpClient .newBuilder() - .executor(Executors.newFixedThreadPool(parallelRequests)) + .executor(Executors.newFixedThreadPool(10_000_000)) .version(HttpClient.Version.HTTP_2) - .connectTimeout(Duration.ofMillis(processingTimeMillis / 2)) + //.connectTimeout(Duration.ofMillis(processingTimeMillis / 2)) .build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { @@ -68,7 +82,8 @@ class PaymentExternalSystemAdapterImpl( // Вне зависимости от исхода оплаты важно отметить что она была отправлена. // Это требуется сделать ВО ВСЕХ СЛУЧАЯХ, поскольку эта информация используется сервисом тестирования. - try { + try { + scope.launch { paymentESService.update(paymentId) { it.logSubmission( success = true, @@ -77,15 +92,16 @@ class PaymentExternalSystemAdapterImpl( Duration.ofMillis(now() - paymentStartedAt) ) } - } catch (e: Exception) { - logger.error("[$accountName] Failed to record log submission for $paymentId", e) } + } catch (e: Exception) { + logger.error("[$accountName] Failed to record log submission for $paymentId", e) + } val request = HttpRequest.newBuilder() .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) - .timeout(Duration.ofMillis(time95Percentile)) + //.timeout(Duration.ofMillis(time95Percentile)) .POST(HttpRequest.BodyPublishers.noBody()) .build() - + parallelLimiter.acquire() completeAction(0, request, paymentId, transactionId, deadline) } @@ -106,70 +122,83 @@ class PaymentExternalSystemAdapterImpl( val timeBeforeCall = now() httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> + parallelLimiter.release() inFlightRequests.decrementAndGet() if (retryCount > 0) { retryRequests.decrementAndGet() } timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) if (throwable != null) { - val e = throwable.cause - var isRetriable = true - when (throwable.cause) { - is SocketTimeoutException -> { - } - is InterruptedIOException -> { - } - is EOFException -> { - } - is IOException -> { - } - else -> { - logger.warn("[$accountName] io error: $paymentId", e) - isRetriable = false - } + val e = throwable.cause + var isRetriable = true + when (throwable.cause) { + is SocketTimeoutException -> { } - if (retryCount + 1 >= 3) { + is InterruptedIOException -> { + } + + is EOFException -> { + } + + is IOException -> { + } + + else -> { + logger.warn("[$accountName] io error: $paymentId", e) + isRetriable = false + } + } + + if (retryCount + 1 >= 3) { + scope.launch { paymentESService.update(paymentId) { it.logProcessing(false, now(), transactionId, "Max attempts reached") } - } else { - val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + Random.nextLong(10)) - val capped = backoff.coerceAtMost(deadline - now() - 5) - if (capped <= 0) { + } + } else { + val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + Random.nextLong(10)) + val capped = backoff.coerceAtMost(deadline - now() - 5) + if (capped <= 0) { + scope.launch { paymentESService.update(paymentId) { it.logProcessing(false, now(), transactionId, "Deadline expired") } + } + } else { + if (isRetriable) { + retryRequests.incrementAndGet() + retryCounter.increment() + scheduleRetry( + retryCount, request, paymentId, transactionId, deadline, capped + ) } else { - if (isRetriable) { - retryRequests.incrementAndGet() - retryCounter.increment() - scheduleRetry( - retryCount, request, paymentId, transactionId, deadline, capped - ) - } else { + scope.launch { paymentESService.update(paymentId) { it.logProcessing(false, now(), transactionId, "Non-retriable exception") } } } } - } else { - try { - val rawBody = response.body() - val parsed = try { - mapper.readValue(rawBody, ExternalSysResponse::class.java) - } catch (ex: Exception) { - ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) - } + } + } else { + try { + val rawBody = response.body() + val parsed = try { + mapper.readValue(rawBody, ExternalSysResponse::class.java) + } catch (ex: Exception) { + ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) + } + scope.launch { paymentESService.update(paymentId) { it.logProcessing(parsed.result, now(), transactionId, parsed.message) } - } catch (e: Exception) { - logger.error("[$accountName] Error processing payment $paymentId", e) } + } catch (e: Exception) { + logger.error("[$accountName] Error processing payment $paymentId", e) } + } } } @@ -182,19 +211,22 @@ class PaymentExternalSystemAdapterImpl( if (remainingTime < requestAverageProcessingTime.toMillis()) { retryRequests.decrementAndGet() try { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Not enough time for retry") + scope.launch { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Not enough time for retry") + } } } catch (e: Exception) { logger.error("[$accountName] Failed to record retry failure for $paymentId", e) } } else { - val newRequestTimeout = remainingTime.coerceIn(time95Percentile, requestAverageProcessingTime.toMillis() * 2) + //val newRequestTimeout = remainingTime.coerceIn(time95Percentile, requestAverageProcessingTime.toMillis() * 2) val newRequest = HttpRequest.newBuilder() .uri(request.uri()) - .timeout(Duration.ofMillis(newRequestTimeout)) + //.timeout(Duration.ofMillis(newRequestTimeout)) .POST(HttpRequest.BodyPublishers.noBody()) .build() + parallelLimiter.acquire() completeAction(retryCount + 1, newRequest, paymentId, transactionId, deadline) } }, delay, TimeUnit.MILLISECONDS) diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index a1c53a908..ad9001958 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -15,10 +15,11 @@ event.sourcing.sagas-enabled=false spring.datasource.hikari.jdbc-url=jdbc:postgresql://${POSTGRES_ADDRESS:localhost}:${POSTGRES_PORT:65432}/postgres spring.datasource.hikari.username=tiny_es spring.datasource.hikari.password=tiny_es - +server.jetty.max-connections=10000000 spring.datasource.hikari.leak-detection-threshold=2000 management.metrics.web.server.request.autotime.percentiles=0.95 management.metrics.export.prometheus.enabled=true +management.metrics.distribution.percentiles-histogram.bombardier.in.queue.latency=true management.endpoints.web.exposure.include=prometheus,health,info management.metrics.distribution.percentiles-histogram.http.server.requests=true management.metrics.distribution.percentiles-histogram.payment.external.system.request.latency=true From 38b0d2d4bd48b60c3dc945f2c1f826cf0a7f3c74 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sun, 8 Mar 2026 20:45:22 +0300 Subject: [PATCH 25/41] fix: fixed optimistic log error and fixed pool size for http2 client --- .../logic/PaymentExternalServiceImpl.kt | 61 ++++++++++++------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index ecf786b13..08c0a3954 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -6,6 +6,7 @@ import io.micrometer.core.instrument.MeterRegistry import io.micrometer.core.instrument.Tag import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.delay import kotlinx.coroutines.launch import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -72,9 +73,9 @@ class PaymentExternalSystemAdapterImpl( private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) private val httpClient = HttpClient .newBuilder() - .executor(Executors.newFixedThreadPool(10_000_000)) + .executor(Executors.newFixedThreadPool(parallelRequests)) .version(HttpClient.Version.HTTP_2) - //.connectTimeout(Duration.ofMillis(processingTimeMillis / 2)) + .connectTimeout(Duration.ofMillis(processingTimeMillis / 2)) .build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { @@ -84,14 +85,14 @@ class PaymentExternalSystemAdapterImpl( // Это требуется сделать ВО ВСЕХ СЛУЧАЯХ, поскольку эта информация используется сервисом тестирования. try { scope.launch { - paymentESService.update(paymentId) { - it.logSubmission( - success = true, - transactionId, - now(), - Duration.ofMillis(now() - paymentStartedAt) - ) - } + paymentESService.update(paymentId) { + it.logSubmission( + success = true, + transactionId, + now(), + Duration.ofMillis(now() - paymentStartedAt) + ) + } } } catch (e: Exception) { logger.error("[$accountName] Failed to record log submission for $paymentId", e) @@ -152,6 +153,7 @@ class PaymentExternalSystemAdapterImpl( if (retryCount + 1 >= 3) { scope.launch { + updateWithRetry(paymentId, false, now(), "Max attempts reached") paymentESService.update(paymentId) { it.logProcessing(false, now(), transactionId, "Max attempts reached") } @@ -161,9 +163,7 @@ class PaymentExternalSystemAdapterImpl( val capped = backoff.coerceAtMost(deadline - now() - 5) if (capped <= 0) { scope.launch { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Deadline expired") - } + updateWithRetry(paymentId, false, now(), "Deadline exceeded, no time for retry") } } else { if (isRetriable) { @@ -174,9 +174,7 @@ class PaymentExternalSystemAdapterImpl( ) } else { scope.launch { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Non-retriable exception") - } + updateWithRetry(paymentId, false, now(), "Non-retriable exception") } } } @@ -189,11 +187,8 @@ class PaymentExternalSystemAdapterImpl( } catch (ex: Exception) { ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) } - scope.launch { - paymentESService.update(paymentId) { - it.logProcessing(parsed.result, now(), transactionId, parsed.message) - } + updateWithRetry(paymentId, parsed.result, now(), parsed.message ?: "No message") } } catch (e: Exception) { logger.error("[$accountName] Error processing payment $paymentId", e) @@ -212,9 +207,7 @@ class PaymentExternalSystemAdapterImpl( retryRequests.decrementAndGet() try { scope.launch { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Not enough time for retry") - } + updateWithRetry(paymentId, false, now(), "Not enough time for retry") } } catch (e: Exception) { logger.error("[$accountName] Failed to record retry failure for $paymentId", e) @@ -232,6 +225,28 @@ class PaymentExternalSystemAdapterImpl( }, delay, TimeUnit.MILLISECONDS) } + private suspend fun updateWithRetry( + paymentId: UUID, + result: Boolean, + processedAt: Long, + reason: String, + maxAttempts: Int = 5, + ) { + repeat(maxAttempts) { attempt -> + try { + paymentESService.update(paymentId) { + it.logProcessing(result, processedAt, paymentId, reason) + } + } catch (_: Exception) { + if (attempt == maxAttempts - 1) { + return + } + val delay = (10L * (attempt + 1)) + Random.nextLong(5) + delay(delay) + } + } + } + companion object { val logger: Logger = LoggerFactory.getLogger(PaymentExternalSystemAdapter::class.java) val mapper = ObjectMapper().registerKotlinModule() From fbd4e1f9587930b6d2d07d946a3156e0e6a96465 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 13 Mar 2026 18:31:03 +0300 Subject: [PATCH 26/41] hotfix: returned rate limiters --- .../ru/quipy/apigateway/APIController.kt | 8 ++++++- .../logic/PaymentExternalServiceImpl.kt | 24 ++++++++++++------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 875f3dc00..e545c1722 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -5,6 +5,8 @@ import io.micrometer.core.instrument.Tag import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.web.bind.annotation.* +import ru.quipy.common.utils.LeakingBucketRateLimiter +import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer import ru.quipy.payments.logic.PaymentAccountProperties @@ -16,7 +18,8 @@ class APIController( private val orderRepository: OrderRepository, private val orderPayer: OrderPayer, meterRegistry: MeterRegistry, - accountProperties: List + accountProperties: List, + val rateLimiter: LeakingBucketRateLimiter ) { val logger: Logger = LoggerFactory.getLogger(APIController::class.java) @@ -64,6 +67,9 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { controllerRequests.incrementAndGet() + if (rateLimiter.tickBlocking(10)) { + throw TooManyRequestsException(10) + } val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 08c0a3954..fe4f2b717 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -12,6 +12,7 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService +import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate import java.io.EOFException import java.io.IOException @@ -46,6 +47,7 @@ class PaymentExternalSystemAdapterImpl( private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime + private val time95Percentile = 40; private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) @@ -85,14 +87,14 @@ class PaymentExternalSystemAdapterImpl( // Это требуется сделать ВО ВСЕХ СЛУЧАЯХ, поскольку эта информация используется сервисом тестирования. try { scope.launch { - paymentESService.update(paymentId) { - it.logSubmission( - success = true, - transactionId, - now(), - Duration.ofMillis(now() - paymentStartedAt) - ) - } + paymentESService.update(paymentId) { + it.logSubmission( + success = true, + transactionId, + now(), + Duration.ofMillis(now() - paymentStartedAt) + ) + } } } catch (e: Exception) { logger.error("[$accountName] Failed to record log submission for $paymentId", e) @@ -121,6 +123,10 @@ class PaymentExternalSystemAdapterImpl( ) { inFlightRequests.incrementAndGet() val timeBeforeCall = now() + if (!rateLimiter.tickBlocking(timeout = deadline - now() - time95Percentile)) { + parallelLimiter.release() + throw TooManyRequestsException(10) + } httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> parallelLimiter.release() @@ -163,7 +169,7 @@ class PaymentExternalSystemAdapterImpl( val capped = backoff.coerceAtMost(deadline - now() - 5) if (capped <= 0) { scope.launch { - updateWithRetry(paymentId, false, now(), "Deadline exceeded, no time for retry") + updateWithRetry(paymentId, false, now(), "Deadline exceeded, no time for retry") } } else { if (isRetriable) { From 89775aa1d8480c82202ea39a311ca80688e1452e Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 13 Mar 2026 18:32:58 +0300 Subject: [PATCH 27/41] hotfix: returned rate limiters --- src/main/kotlin/ru/quipy/apigateway/APIController.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index e545c1722..8b4d7ee3f 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -67,7 +67,7 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { controllerRequests.incrementAndGet() - if (rateLimiter.tickBlocking(10)) { + if (!rateLimiter.tickBlocking(10)) { throw TooManyRequestsException(10) } val paymentId = UUID.randomUUID() From ad371fcbeff63a9dfcf79a4201532aa88a17e1be Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 13 Mar 2026 18:55:51 +0300 Subject: [PATCH 28/41] fix: fixed submission call --- .../payments/config/PaymentAccountsConfig.kt | 4 +- .../logic/PaymentAggregateCommands.kt | 5 +- .../logic/PaymentExternalServiceImpl.kt | 46 ++++++------------- 3 files changed, 17 insertions(+), 38 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 96dd61821..008052d3d 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -92,9 +92,7 @@ class PaymentAccountsConfig { @Value("#{'\${payment.processingTimeMillis}'.split(',')}") processingTimeMillis: Int ): LeakingBucketRateLimiter { - val bucketSize = (((processingTimeMillis - accountProperties.maxOf { it.averageProcessingTime } - .toMillis()) / accountProperties.maxOf { it.averageProcessingTime } - .toMillis()) * accountProperties.minOf { it.rateLimitPerSec }.toLong()).toInt() + val bucketSize = 5000 val rate = accountProperties.minOf { it.rateLimitPerSec }.toLong() logger.info("Burst Rate Limiter Properties: bucket size - {}, rate - {}", bucketSize, rate) return LeakingBucketRateLimiter( diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt index 01f7284a5..46252f86e 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt @@ -28,8 +28,9 @@ fun PaymentAggregateState.logProcessing( transactionId: UUID? = null, reason: String? = null ): PaymentProcessedEvent { - val submittedAt = this.submissions[transactionId ?: UUID.randomUUID()]?.timeStarted ?: 0 - val spentInQueueDuration = this.submissions[transactionId ?: UUID.randomUUID()]?.spentInQueue ?: Duration.ofMillis(0) + val submission = this.submissions[transactionId ?: UUID.randomUUID()] + val submittedAt = submission?.timeStarted ?: 0 + val spentInQueueDuration = submission?.spentInQueue ?: Duration.ofMillis(0) Metrics.timer("bombardier.in.queue.latency").record(spentInQueueDuration) return PaymentProcessedEvent( this.getId(), success, this.orderId, submittedAt, processedAt, this.amount!!, transactionId, reason, spentInQueueDuration diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index fe4f2b717..23cf60f48 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -6,7 +6,6 @@ import io.micrometer.core.instrument.MeterRegistry import io.micrometer.core.instrument.Tag import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.delay import kotlinx.coroutines.launch import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -158,18 +157,15 @@ class PaymentExternalSystemAdapterImpl( } if (retryCount + 1 >= 3) { - scope.launch { - updateWithRetry(paymentId, false, now(), "Max attempts reached") paymentESService.update(paymentId) { it.logProcessing(false, now(), transactionId, "Max attempts reached") } - } } else { val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + Random.nextLong(10)) val capped = backoff.coerceAtMost(deadline - now() - 5) if (capped <= 0) { - scope.launch { - updateWithRetry(paymentId, false, now(), "Deadline exceeded, no time for retry") + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Deadline expired") } } else { if (isRetriable) { @@ -179,8 +175,8 @@ class PaymentExternalSystemAdapterImpl( retryCount, request, paymentId, transactionId, deadline, capped ) } else { - scope.launch { - updateWithRetry(paymentId, false, now(), "Non-retriable exception") + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Non-retriable exception") } } } @@ -193,8 +189,8 @@ class PaymentExternalSystemAdapterImpl( } catch (ex: Exception) { ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) } - scope.launch { - updateWithRetry(paymentId, parsed.result, now(), parsed.message ?: "No message") + paymentESService.update(paymentId) { + it.logProcessing(parsed.result, now(), transactionId, parsed.message) } } catch (e: Exception) { logger.error("[$accountName] Error processing payment $paymentId", e) @@ -212,8 +208,13 @@ class PaymentExternalSystemAdapterImpl( if (remainingTime < requestAverageProcessingTime.toMillis()) { retryRequests.decrementAndGet() try { - scope.launch { - updateWithRetry(paymentId, false, now(), "Not enough time for retry") + paymentESService.update(paymentId) { + it.logProcessing( + false, + now(), + transactionId, + "Not enough time for retry" + ) } } catch (e: Exception) { logger.error("[$accountName] Failed to record retry failure for $paymentId", e) @@ -231,27 +232,6 @@ class PaymentExternalSystemAdapterImpl( }, delay, TimeUnit.MILLISECONDS) } - private suspend fun updateWithRetry( - paymentId: UUID, - result: Boolean, - processedAt: Long, - reason: String, - maxAttempts: Int = 5, - ) { - repeat(maxAttempts) { attempt -> - try { - paymentESService.update(paymentId) { - it.logProcessing(result, processedAt, paymentId, reason) - } - } catch (_: Exception) { - if (attempt == maxAttempts - 1) { - return - } - val delay = (10L * (attempt + 1)) + Random.nextLong(5) - delay(delay) - } - } - } companion object { val logger: Logger = LoggerFactory.getLogger(PaymentExternalSystemAdapter::class.java) From fcc903492dcea97df3ef14002bae9fbaf96cc248 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 13 Mar 2026 19:14:32 +0300 Subject: [PATCH 29/41] hotfix: fixed optimistic lock exception --- .../ru/quipy/payments/logic/PaymentAggregateCommands.kt | 2 +- .../quipy/payments/logic/PaymentExternalServiceImpl.kt | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt index 46252f86e..26132f1c2 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt @@ -28,7 +28,7 @@ fun PaymentAggregateState.logProcessing( transactionId: UUID? = null, reason: String? = null ): PaymentProcessedEvent { - val submission = this.submissions[transactionId ?: UUID.randomUUID()] + val submission = transactionId?.let { this.submissions[it] } val submittedAt = submission?.timeStarted ?: 0 val spentInQueueDuration = submission?.spentInQueue ?: Duration.ofMillis(0) Metrics.timer("bombardier.in.queue.latency").record(spentInQueueDuration) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 23cf60f48..7f3dc149a 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -6,7 +6,6 @@ import io.micrometer.core.instrument.MeterRegistry import io.micrometer.core.instrument.Tag import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.launch import org.slf4j.Logger import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter @@ -46,7 +45,7 @@ class PaymentExternalSystemAdapterImpl( private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime - private val time95Percentile = 40; + private val time95Percentile = 40 private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) @@ -71,7 +70,7 @@ class PaymentExternalSystemAdapterImpl( meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) private val retryCounter = meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) - private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(properties.parallelRequests) + private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(10) private val httpClient = HttpClient .newBuilder() .executor(Executors.newFixedThreadPool(parallelRequests)) @@ -85,15 +84,13 @@ class PaymentExternalSystemAdapterImpl( // Вне зависимости от исхода оплаты важно отметить что она была отправлена. // Это требуется сделать ВО ВСЕХ СЛУЧАЯХ, поскольку эта информация используется сервисом тестирования. try { - scope.launch { - paymentESService.update(paymentId) { + paymentESService.update(paymentId) { it.logSubmission( success = true, transactionId, now(), Duration.ofMillis(now() - paymentStartedAt) ) - } } } catch (e: Exception) { logger.error("[$accountName] Failed to record log submission for $paymentId", e) From 0aa76f9c78c1dd6e2a05f5bb8ad98e031cfc3552 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 13 Mar 2026 19:31:11 +0300 Subject: [PATCH 30/41] hotfix: remove call burst limiter --- src/main/kotlin/ru/quipy/apigateway/APIController.kt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 8b4d7ee3f..1ead611bc 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -6,7 +6,6 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.web.bind.annotation.* import ru.quipy.common.utils.LeakingBucketRateLimiter -import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer import ru.quipy.payments.logic.PaymentAccountProperties @@ -67,9 +66,6 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { controllerRequests.incrementAndGet() - if (!rateLimiter.tickBlocking(10)) { - throw TooManyRequestsException(10) - } val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) From 91a02126044ffd75b61cb7fc3193edf47ae3f778 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 13 Mar 2026 19:42:57 +0300 Subject: [PATCH 31/41] fix: fixed time for blocking --- src/main/kotlin/ru/quipy/apigateway/APIController.kt | 4 ++++ .../ru/quipy/payments/logic/PaymentExternalServiceImpl.kt | 8 ++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 1ead611bc..8b4d7ee3f 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -6,6 +6,7 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.web.bind.annotation.* import ru.quipy.common.utils.LeakingBucketRateLimiter +import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer import ru.quipy.payments.logic.PaymentAccountProperties @@ -66,6 +67,9 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { controllerRequests.incrementAndGet() + if (!rateLimiter.tickBlocking(10)) { + throw TooManyRequestsException(10) + } val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 7f3dc149a..16982e02f 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -45,7 +45,7 @@ class PaymentExternalSystemAdapterImpl( private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime - private val time95Percentile = 40 + private val time95Percentile = 40L private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) @@ -119,7 +119,7 @@ class PaymentExternalSystemAdapterImpl( ) { inFlightRequests.incrementAndGet() val timeBeforeCall = now() - if (!rateLimiter.tickBlocking(timeout = deadline - now() - time95Percentile)) { + if (!rateLimiter.tickBlocking(time95Percentile)) { parallelLimiter.release() throw TooManyRequestsException(10) } @@ -217,10 +217,10 @@ class PaymentExternalSystemAdapterImpl( logger.error("[$accountName] Failed to record retry failure for $paymentId", e) } } else { - //val newRequestTimeout = remainingTime.coerceIn(time95Percentile, requestAverageProcessingTime.toMillis() * 2) + val newRequestTimeout = remainingTime.coerceIn(time95Percentile, requestAverageProcessingTime.toMillis() * 2) val newRequest = HttpRequest.newBuilder() .uri(request.uri()) - //.timeout(Duration.ofMillis(newRequestTimeout)) + .timeout(Duration.ofMillis(50)) .POST(HttpRequest.BodyPublishers.noBody()) .build() parallelLimiter.acquire() From 3a2635467abae546b266de4eb693f65e17afac52 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Fri, 13 Mar 2026 19:51:17 +0300 Subject: [PATCH 32/41] fix: removed timeout --- .../kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 16982e02f..25667a5c9 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -220,7 +220,6 @@ class PaymentExternalSystemAdapterImpl( val newRequestTimeout = remainingTime.coerceIn(time95Percentile, requestAverageProcessingTime.toMillis() * 2) val newRequest = HttpRequest.newBuilder() .uri(request.uri()) - .timeout(Duration.ofMillis(50)) .POST(HttpRequest.BodyPublishers.noBody()) .build() parallelLimiter.acquire() From 17824847e623fa2c8590c423bd5fb7b8f15e2085 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sat, 14 Mar 2026 19:57:51 +0300 Subject: [PATCH 33/41] fix: removed db calling --- .../logic/PaymentAggregateCommands.kt | 4 ++- .../logic/PaymentExternalServiceImpl.kt | 26 +++++++++---------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt index 26132f1c2..8aafdaa33 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentAggregateCommands.kt @@ -28,10 +28,12 @@ fun PaymentAggregateState.logProcessing( transactionId: UUID? = null, reason: String? = null ): PaymentProcessedEvent { + val startCallInMap = now() val submission = transactionId?.let { this.submissions[it] } + val endCallInMap = now() val submittedAt = submission?.timeStarted ?: 0 val spentInQueueDuration = submission?.spentInQueue ?: Duration.ofMillis(0) - Metrics.timer("bombardier.in.queue.latency").record(spentInQueueDuration) + Metrics.timer("bombardier.in.queue.latency").record(Duration.ofMillis(endCallInMap-startCallInMap)) return PaymentProcessedEvent( this.getId(), success, this.orderId, submittedAt, processedAt, this.amount!!, transactionId, reason, spentInQueueDuration ) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 25667a5c9..394ca2255 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -45,7 +45,7 @@ class PaymentExternalSystemAdapterImpl( private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime - private val time95Percentile = 40L + private val time95Percentile = 100L private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) @@ -83,18 +83,18 @@ class PaymentExternalSystemAdapterImpl( // Вне зависимости от исхода оплаты важно отметить что она была отправлена. // Это требуется сделать ВО ВСЕХ СЛУЧАЯХ, поскольку эта информация используется сервисом тестирования. - try { - paymentESService.update(paymentId) { - it.logSubmission( - success = true, - transactionId, - now(), - Duration.ofMillis(now() - paymentStartedAt) - ) - } - } catch (e: Exception) { - logger.error("[$accountName] Failed to record log submission for $paymentId", e) - } +// try { +// paymentESService.update(paymentId) { +// it.logSubmission( +// success = true, +// transactionId, +// now(), +// Duration.ofMillis(now() - paymentStartedAt) +// ) +// } +// } catch (e: Exception) { +// logger.error("[$accountName] Failed to record log submission for $paymentId", e) +// } val request = HttpRequest.newBuilder() .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) //.timeout(Duration.ofMillis(time95Percentile)) From e0dae4f244abae6b3f4e493cdc3c73ce32a7cb7a Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sat, 14 Mar 2026 20:18:44 +0300 Subject: [PATCH 34/41] fix: fixed timeout --- .../ru/quipy/payments/config/PaymentAccountsConfig.kt | 2 +- src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt | 2 +- .../quipy/payments/logic/PaymentExternalServiceImpl.kt | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 008052d3d..c5f506c6a 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -92,7 +92,7 @@ class PaymentAccountsConfig { @Value("#{'\${payment.processingTimeMillis}'.split(',')}") processingTimeMillis: Int ): LeakingBucketRateLimiter { - val bucketSize = 5000 + val bucketSize = 6000 val rate = accountProperties.minOf { it.rateLimitPerSec }.toLong() logger.info("Burst Rate Limiter Properties: bucket size - {}, rate - {}", bucketSize, rate) return LeakingBucketRateLimiter( diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index f1d928379..4e464460d 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -45,7 +45,7 @@ class OrderPayer( paymentExecutor.submit { startedCounter.increment() try { - val createdEvent = paymentESService.create { + paymentESService.create { it.create( paymentId, orderId, diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 394ca2255..15e00345d 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -45,7 +45,8 @@ class PaymentExternalSystemAdapterImpl( private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime - private val time95Percentile = 100L + private val processingTimeMillis = 1000L + private val time95Percentile = processingTimeMillis / 3 private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) @@ -65,7 +66,6 @@ class PaymentExternalSystemAdapterImpl( ) { it.toDouble() } } - private val processingTimeMillis = 1000L private val timer = meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) private val retryCounter = @@ -97,7 +97,7 @@ class PaymentExternalSystemAdapterImpl( // } val request = HttpRequest.newBuilder() .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) - //.timeout(Duration.ofMillis(time95Percentile)) + .timeout(Duration.ofMillis(time95Percentile)) .POST(HttpRequest.BodyPublishers.noBody()) .build() parallelLimiter.acquire() @@ -119,8 +119,7 @@ class PaymentExternalSystemAdapterImpl( ) { inFlightRequests.incrementAndGet() val timeBeforeCall = now() - if (!rateLimiter.tickBlocking(time95Percentile)) { - parallelLimiter.release() + if (!rateLimiter.tick()) { throw TooManyRequestsException(10) } httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) @@ -221,6 +220,7 @@ class PaymentExternalSystemAdapterImpl( val newRequest = HttpRequest.newBuilder() .uri(request.uri()) .POST(HttpRequest.BodyPublishers.noBody()) + .timeout(Duration.ofMillis(newRequestTimeout)) .build() parallelLimiter.acquire() completeAction(retryCount + 1, newRequest, paymentId, transactionId, deadline) From 0289539e135977324606ce2200305da877c0eaf9 Mon Sep 17 00:00:00 2001 From: vaycheslav Date: Sat, 14 Mar 2026 20:18:44 +0300 Subject: [PATCH 35/41] =?UTF-8?q?First=20start:=20=E2=96=84=EF=B8=BB?= =?UTF-8?q?=E3=83=87=E2=95=90=E2=95=90=E2=94=81=E4=B8=80=20++++?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../payments/config/PaymentAccountsConfig.kt | 8 +- .../logic/PaymentExternalServiceImpl.kt | 162 ++++++++++++++---- .../ru/quipy/payments/logic/PaymentService.kt | 2 + src/main/resources/application.properties | 2 +- test-local-run.http | 31 +--- 5 files changed, 144 insertions(+), 61 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index c5f506c6a..0e5ddee6c 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -143,7 +143,13 @@ class PaymentAccountsConfig { rateLimiter: SlidingWindowRateLimiter ): List { return accountProperties - .map { it.copy(enabled = true) } + .map { + if (it.accountName == "acc-22") { + it.copy(enabled = true, hedgingEnabled = true, hedgeDelayMillis = 400L) + } else { + it.copy(enabled = true) + } + } .onEach(::println) .map { PaymentExternalSystemAdapterImpl( diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 15e00345d..38a0789e1 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -26,6 +26,7 @@ import java.util.concurrent.Executors import java.util.concurrent.ScheduledExecutorService import java.util.concurrent.Semaphore import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicBoolean import java.util.concurrent.atomic.AtomicInteger import kotlin.math.pow import kotlin.random.Random @@ -45,14 +46,16 @@ class PaymentExternalSystemAdapterImpl( private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime - private val processingTimeMillis = 1000L - private val time95Percentile = processingTimeMillis / 3 + private val connectTimeoutMillis = (requestAverageProcessingTime.toMillis() / 2).coerceAtLeast(100L) private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) private val retryRequests = AtomicInteger(0) private val scope = CoroutineScope(Dispatchers.Default) + private val hedgeEnabled = properties.hedgingEnabled + private val hedgeDelayMillis = properties.hedgeDelayMillis ?: (requestAverageProcessingTime.toMillis() / 2).coerceAtLeast(50L) + init { meterRegistry.gauge( "payment.account.inflight.requests", @@ -75,7 +78,7 @@ class PaymentExternalSystemAdapterImpl( .newBuilder() .executor(Executors.newFixedThreadPool(parallelRequests)) .version(HttpClient.Version.HTTP_2) - .connectTimeout(Duration.ofMillis(processingTimeMillis / 2)) + .connectTimeout(Duration.ofMillis(connectTimeoutMillis)) .build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { @@ -95,13 +98,61 @@ class PaymentExternalSystemAdapterImpl( // } catch (e: Exception) { // logger.error("[$accountName] Failed to record log submission for $paymentId", e) // } + val remainingTimeAtStart = deadline - now() + val initialTimeout = when { + remainingTimeAtStart <= 50L -> 50L + else -> remainingTimeAtStart.coerceAtMost(requestAverageProcessingTime.toMillis() * 2) + } + val request = HttpRequest.newBuilder() .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) - .timeout(Duration.ofMillis(time95Percentile)) + .timeout(Duration.ofMillis(initialTimeout)) .POST(HttpRequest.BodyPublishers.noBody()) .build() + val completed = AtomicBoolean(false) + parallelLimiter.acquire() - completeAction(0, request, paymentId, transactionId, deadline) + sendAttempt(0, request, paymentId, transactionId, deadline, completed, allowHedge = true) + + if (hedgeEnabled) { + try { + val nowForHedge = now() + var remaining = deadline - nowForHedge + val desiredDelay = remaining - requestAverageProcessingTime.toMillis() - 5L + val computedDelay = when { + desiredDelay <= 0L -> 0L + else -> desiredDelay + } + val scheduleDelay = hedgeDelayMillis.coerceAtMost(computedDelay) + + retryExecutor.schedule({ + try { + if (completed.get()) return@schedule + val nowBeforeHedge = now() + val remainingBeforeHedge = deadline - nowBeforeHedge + if (remainingBeforeHedge <= 50L) return@schedule + + val hedgeTimeout = remainingBeforeHedge.coerceAtMost(requestAverageProcessingTime.toMillis() * 2) + val hedgeRequest = HttpRequest.newBuilder() + .uri(request.uri()) + .POST(HttpRequest.BodyPublishers.noBody()) + .timeout(Duration.ofMillis(hedgeTimeout)) + .build() + + if (!rateLimiter.tick()) { + return@schedule + } + + parallelLimiter.acquire() + sendAttempt(0, hedgeRequest, paymentId, transactionId, deadline, completed, allowHedge = false) + } catch (e: Exception) { + logger.warn("[$accountName] Failed to send hedged request for $paymentId", e) + } + }, scheduleDelay, TimeUnit.MILLISECONDS) + } catch (e: Exception) { + logger.warn("[$accountName] Failed to schedule hedged request", e) + } + } } override fun price() = properties.price @@ -110,16 +161,20 @@ class PaymentExternalSystemAdapterImpl( override fun name() = properties.accountName - fun completeAction( + fun sendAttempt( retryCount: Long, request: HttpRequest, paymentId: UUID, transactionId: UUID, - deadline: Long + deadline: Long, + completed: AtomicBoolean, + allowHedge: Boolean ) { inFlightRequests.incrementAndGet() val timeBeforeCall = now() if (!rateLimiter.tick()) { + parallelLimiter.release() + inFlightRequests.decrementAndGet() throw TooManyRequestsException(10) } httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) @@ -130,6 +185,11 @@ class PaymentExternalSystemAdapterImpl( retryRequests.decrementAndGet() } timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) + + if (completed.get()) { + return@whenComplete + } + if (throwable != null) { val e = throwable.cause var isRetriable = true @@ -153,26 +213,46 @@ class PaymentExternalSystemAdapterImpl( } if (retryCount + 1 >= 3) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Max attempts reached") + if (completed.compareAndSet(false, true)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Max attempts reached") + } + } catch (ex: Exception) { + logger.error("[$accountName] Failed to record max attempts for $paymentId", ex) } + } } else { val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + Random.nextLong(10)) val capped = backoff.coerceAtMost(deadline - now() - 5) if (capped <= 0) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Deadline expired") + if (completed.compareAndSet(false, true)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Deadline expired") + } + } catch (ex: Exception) { + logger.error("[$accountName] Failed to record deadline expiry for $paymentId", ex) + } } } else { if (isRetriable) { - retryRequests.incrementAndGet() - retryCounter.increment() - scheduleRetry( - retryCount, request, paymentId, transactionId, deadline, capped - ) + if (!completed.get()) { + retryRequests.incrementAndGet() + retryCounter.increment() + scheduleRetry( + retryCount, request, paymentId, transactionId, deadline, capped, completed + ) + } } else { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Non-retriable exception") + if (completed.compareAndSet(false, true)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Non-retriable exception") + } + } catch (ex: Exception) { + logger.error("[$accountName] Failed to record non-retriable for $paymentId", ex) + } } } } @@ -185,8 +265,14 @@ class PaymentExternalSystemAdapterImpl( } catch (ex: Exception) { ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) } - paymentESService.update(paymentId) { - it.logProcessing(parsed.result, now(), transactionId, parsed.message) + if (completed.compareAndSet(false, true)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(parsed.result, now(), transactionId, parsed.message) + } + } catch (e: Exception) { + logger.error("[$accountName] Error updating ES for payment $paymentId", e) + } } } catch (e: Exception) { logger.error("[$accountName] Error processing payment $paymentId", e) @@ -197,33 +283,47 @@ class PaymentExternalSystemAdapterImpl( private fun scheduleRetry( retryCount: Long, request: HttpRequest, paymentId: UUID, - transactionId: UUID, deadline: Long, delay: Long + transactionId: UUID, deadline: Long, delay: Long, completed: AtomicBoolean ) { retryExecutor.schedule({ + if (completed.get()) return@schedule val remainingTime = deadline - now() if (remainingTime < requestAverageProcessingTime.toMillis()) { - retryRequests.decrementAndGet() try { - paymentESService.update(paymentId) { - it.logProcessing( - false, - now(), - transactionId, - "Not enough time for retry" - ) + if (completed.compareAndSet(false, true)) { + paymentESService.update(paymentId) { + it.logProcessing( + false, + now(), + transactionId, + "Not enough time for retry" + ) + } } } catch (e: Exception) { logger.error("[$accountName] Failed to record retry failure for $paymentId", e) } } else { - val newRequestTimeout = remainingTime.coerceIn(time95Percentile, requestAverageProcessingTime.toMillis() * 2) + val newRequestTimeout = remainingTime.coerceIn(50L, requestAverageProcessingTime.toMillis() * 2) val newRequest = HttpRequest.newBuilder() .uri(request.uri()) .POST(HttpRequest.BodyPublishers.noBody()) .timeout(Duration.ofMillis(newRequestTimeout)) .build() + if (!rateLimiter.tick()) { + if (deadline - now() <= 0 && completed.compareAndSet(false, true)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Rate limit prevented retry and deadline expired") + } + } catch (e: Exception) { + logger.error("[$accountName] Failed to record rate-limited retry for $paymentId", e) + } + } + return@schedule + } parallelLimiter.acquire() - completeAction(retryCount + 1, newRequest, paymentId, transactionId, deadline) + sendAttempt(retryCount + 1, newRequest, paymentId, transactionId, deadline, completed, allowHedge = false) } }, delay, TimeUnit.MILLISECONDS) } diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentService.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentService.kt index 255db77dd..abe4ee41c 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentService.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentService.kt @@ -37,6 +37,8 @@ data class PaymentAccountProperties( val price: Int, val averageProcessingTime: Duration = Duration.ofSeconds(11), val enabled: Boolean, + val hedgingEnabled: Boolean = false, + val hedgeDelayMillis: Long? = null ) /** diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index ad9001958..80e0a0369 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -33,7 +33,7 @@ payment.hostPort=${PAYMENT_HOST:localhost}:${PAYMENT_PORT:1234} # For 8 case #payment.accounts=${PAYMENT_ACCOUNTS:acc-9} -payment.accounts=${PAYMENT_ACCOUNTS:acc-13} +payment.accounts=${PAYMENT_ACCOUNTS:acc-22} payment.processingTimeMillis=1000 payment.maximumPoolSize=50_000 diff --git a/test-local-run.http b/test-local-run.http index 9cde39e20..7e8744fd6 100644 --- a/test-local-run.http +++ b/test-local-run.http @@ -1,4 +1,4 @@ -### Test 6 run... +### Test 11 run... POST http://localhost:1234/test/run Content-Type: application/json @@ -6,35 +6,10 @@ Content-Type: application/json "serviceName": "{{serviceName}}", "token": "{{token}}", "ratePerSecond": 100, - "testCount": 5000, - "processingTimeMillis": 20000 + "testCount": 20000, + "processingTimeMillis": 1500 } -### Test 7 run... -POST http://localhost:1234/test/run -Content-Type: application/json - -{ - "serviceName": "{{serviceName}}", - "token": "{{token}}", - "ratePerSecond": 100, - "testCount": 5000, - "processingTimeMillis": 20000 -} - -### Test 8 run... -POST http://localhost:1234/test/run -Content-Type: application/json - -{ - "serviceName": "{{serviceName}}", - "token": "{{token}}", - "ratePerSecond": 100, - "testCount": 5000, - "processingTimeMillis": 20000 -} - - ### Stop running test to save time and resources # @timeout 120 From e6664231f86a656f15c69d6b6702d2b24eb77b71 Mon Sep 17 00:00:00 2001 From: 21092004Goda Date: Sun, 15 Mar 2026 15:07:39 +0300 Subject: [PATCH 36/41] =?UTF-8?q?First=20start:=20=E2=96=84=EF=B8=BB?= =?UTF-8?q?=E3=83=87=E2=95=90=E2=95=90=E2=94=81=E4=B8=80=20-=20-=20-?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- test-on-prem-run.http | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index de31b9fe7..e8458bcec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ RUN mvn dependency:go-offline COPY src src RUN mvn package -FROM eclipse-temurin:17-alpine-3.22 +FROM openjdk:17-jdk-slim-bullseye COPY --from=build /app/target/*.jar /high-load-course.jar diff --git a/test-on-prem-run.http b/test-on-prem-run.http index dd5765987..ca62a0174 100644 --- a/test-on-prem-run.http +++ b/test-on-prem-run.http @@ -7,10 +7,10 @@ Content-Type: application/json "serviceName": "{{serviceName}}", "token": "{{token}}", "branch": "main", - "accounts": "acc-12,acc-20", - "ratePerSecond": 2, - "testCount": 10, - "processingTimeMillis": 80000, + "accounts": "acc-22", + "ratePerSecond": 100, + "testCount": 20000, + "processingTimeMillis": 1500, "onPremises": true } From 4ee670986ff40fccc5be8786bf80cb0ec5e3c65f Mon Sep 17 00:00:00 2001 From: 21092004Goda Date: Sun, 15 Mar 2026 15:12:49 +0300 Subject: [PATCH 37/41] =?UTF-8?q?First=20start:=20=E2=96=84=EF=B8=BB?= =?UTF-8?q?=E3=83=87=E2=95=90=E2=95=90=E2=94=81=E4=B8=80=20-=20-=20-=20+?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index e8458bcec..de31b9fe7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,7 +6,7 @@ RUN mvn dependency:go-offline COPY src src RUN mvn package -FROM openjdk:17-jdk-slim-bullseye +FROM eclipse-temurin:17-alpine-3.22 COPY --from=build /app/target/*.jar /high-load-course.jar From 0b28277f16e8eb2ab47b322e399e5b2eeb64fea2 Mon Sep 17 00:00:00 2001 From: 21092004Goda Date: Fri, 20 Mar 2026 00:35:16 +0300 Subject: [PATCH 38/41] =?UTF-8?q?First=20start:=20=E2=96=84=EF=B8=BB?= =?UTF-8?q?=E3=83=87=E2=95=90=E2=95=90=E2=94=81=E4=B8=80=20-=20-=20-=20?= =?UTF-8?q?=D0=BC=D0=B8=D0=BC=D0=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ru/quipy/apigateway/APIController.kt | 8 +- .../apigateway/GlobalExceptionHandler.kt | 2 +- .../payments/config/PaymentAccountsConfig.kt | 36 +++--- .../logic/PaymentExternalServiceImpl.kt | 110 +++++------------- 4 files changed, 47 insertions(+), 109 deletions(-) diff --git a/src/main/kotlin/ru/quipy/apigateway/APIController.kt b/src/main/kotlin/ru/quipy/apigateway/APIController.kt index 8b4d7ee3f..875f3dc00 100644 --- a/src/main/kotlin/ru/quipy/apigateway/APIController.kt +++ b/src/main/kotlin/ru/quipy/apigateway/APIController.kt @@ -5,8 +5,6 @@ import io.micrometer.core.instrument.Tag import org.slf4j.Logger import org.slf4j.LoggerFactory import org.springframework.web.bind.annotation.* -import ru.quipy.common.utils.LeakingBucketRateLimiter -import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.orders.repository.OrderRepository import ru.quipy.payments.logic.OrderPayer import ru.quipy.payments.logic.PaymentAccountProperties @@ -18,8 +16,7 @@ class APIController( private val orderRepository: OrderRepository, private val orderPayer: OrderPayer, meterRegistry: MeterRegistry, - accountProperties: List, - val rateLimiter: LeakingBucketRateLimiter + accountProperties: List ) { val logger: Logger = LoggerFactory.getLogger(APIController::class.java) @@ -67,9 +64,6 @@ class APIController( @PostMapping("/orders/{orderId}/payment") fun payOrder(@PathVariable orderId: UUID, @RequestParam deadline: Long): PaymentSubmissionDto { controllerRequests.incrementAndGet() - if (!rateLimiter.tickBlocking(10)) { - throw TooManyRequestsException(10) - } val paymentId = UUID.randomUUID() val order = orderRepository.findById(orderId)?.let { orderRepository.save(it.copy(status = OrderStatus.PAYMENT_IN_PROGRESS)) diff --git a/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt b/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt index 5679ea372..30c28fbca 100644 --- a/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt +++ b/src/main/kotlin/ru/quipy/apigateway/GlobalExceptionHandler.kt @@ -28,6 +28,6 @@ class GlobalExceptionHandler( } @ExceptionHandler(TooManyRequestsException::class) fun handleTooManyRequestsRetriable(exception: TooManyRequestsException): ResponseEntity { - return ResponseEntity.status(HttpStatus.TOO_MANY_REQUESTS).header("Retry-After", "1").body("to many requests") + return ResponseEntity.status(HttpStatus.TOO_MANY_REQUESTS).header("Retry-After", "1000").body("to many requests") } } \ No newline at end of file diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 0e5ddee6c..8a7d36243 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -86,21 +86,21 @@ class PaymentAccountsConfig { return Semaphore(parallelRequests) } - @Bean - fun burstRateLimiter( - accountProperties: List, - @Value("#{'\${payment.processingTimeMillis}'.split(',')}") - processingTimeMillis: Int - ): LeakingBucketRateLimiter { - val bucketSize = 6000 - val rate = accountProperties.minOf { it.rateLimitPerSec }.toLong() - logger.info("Burst Rate Limiter Properties: bucket size - {}, rate - {}", bucketSize, rate) - return LeakingBucketRateLimiter( - rate = rate, - window = rateCheckWindow, - bucketSize = bucketSize - ) - } +// @Bean +// fun burstRateLimiter( +// accountProperties: List, +// @Value("#{'\${payment.processingTimeMillis}'.split(',')}") +// processingTimeMillis: Int +// ): LeakingBucketRateLimiter { +// val bucketSize = 6000 +// val rate = accountProperties.minOf { it.rateLimitPerSec }.toLong() +// logger.info("Burst Rate Limiter Properties: bucket size - {}, rate - {}", bucketSize, rate) +// return LeakingBucketRateLimiter( +// rate = rate, +// window = rateCheckWindow, +// bucketSize = bucketSize +// ) +// } @Bean fun smoothOutIncoming( @@ -139,8 +139,7 @@ class PaymentAccountsConfig { fun accountAdapters( paymentService: EventSourcingService, meterRegistry: io.micrometer.core.instrument.MeterRegistry, - accountProperties: List, - rateLimiter: SlidingWindowRateLimiter + accountProperties: List ): List { return accountProperties .map { @@ -158,8 +157,7 @@ class PaymentAccountsConfig { paymentProviderHostPort, token, meterRegistry, - parallelLimiter(accountProperties), - rateLimiter + parallelLimiter(accountProperties) ) } } diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 38a0789e1..81f1f4b40 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -39,8 +39,7 @@ class PaymentExternalSystemAdapterImpl( private val paymentProviderHostPort: String, private val token: String, meterRegistry: MeterRegistry, - private val parallelLimiter: Semaphore, - private val rateLimiter: SlidingWindowRateLimiter + private val parallelLimiter: Semaphore ) : PaymentExternalSystemAdapter { private val serviceName = properties.serviceName @@ -139,10 +138,6 @@ class PaymentExternalSystemAdapterImpl( .timeout(Duration.ofMillis(hedgeTimeout)) .build() - if (!rateLimiter.tick()) { - return@schedule - } - parallelLimiter.acquire() sendAttempt(0, hedgeRequest, paymentId, transactionId, deadline, completed, allowHedge = false) } catch (e: Exception) { @@ -172,88 +167,54 @@ class PaymentExternalSystemAdapterImpl( ) { inFlightRequests.incrementAndGet() val timeBeforeCall = now() - if (!rateLimiter.tick()) { - parallelLimiter.release() - inFlightRequests.decrementAndGet() - throw TooManyRequestsException(10) - } httpClient.sendAsync(request, HttpResponse.BodyHandlers.ofString()) .whenComplete { response, throwable -> parallelLimiter.release() inFlightRequests.decrementAndGet() - if (retryCount > 0) { - retryRequests.decrementAndGet() - } + if (retryCount > 0) retryRequests.decrementAndGet() timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) - if (completed.get()) { - return@whenComplete - } + if (completed.get()) return@whenComplete if (throwable != null) { val e = throwable.cause - var isRetriable = true - when (throwable.cause) { - is SocketTimeoutException -> { - } - - is InterruptedIOException -> { - } - - is EOFException -> { - } - - is IOException -> { - } - - else -> { - logger.warn("[$accountName] io error: $paymentId", e) - isRetriable = false - } + val isRetriable = when (throwable.cause) { + is SocketTimeoutException, is InterruptedIOException, is EOFException, is IOException -> true + else -> false } - if (retryCount + 1 >= 3) { + if (retryCount + 1 >= 2) { // после 2 попыток бросаем исключение if (completed.compareAndSet(false, true)) { try { paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Max attempts reached") + it.logProcessing(false, now(), transactionId, "Too many attempts") } } catch (ex: Exception) { logger.error("[$accountName] Failed to record max attempts for $paymentId", ex) } + throw TooManyRequestsException(1000L) } - } else { + return@whenComplete + } + + if (isRetriable) { + retryRequests.incrementAndGet() + retryCounter.increment() val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + Random.nextLong(10)) val capped = backoff.coerceAtMost(deadline - now() - 5) - if (capped <= 0) { + if (capped > 0) { + scheduleRetry(retryCount, request, paymentId, transactionId, deadline, capped, completed) + } else { if (completed.compareAndSet(false, true)) { - try { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Deadline expired") - } - } catch (ex: Exception) { - logger.error("[$accountName] Failed to record deadline expiry for $paymentId", ex) + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Deadline expired") } } - } else { - if (isRetriable) { - if (!completed.get()) { - retryRequests.incrementAndGet() - retryCounter.increment() - scheduleRetry( - retryCount, request, paymentId, transactionId, deadline, capped, completed - ) - } - } else { - if (completed.compareAndSet(false, true)) { - try { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Non-retriable exception") - } - } catch (ex: Exception) { - logger.error("[$accountName] Failed to record non-retriable for $paymentId", ex) - } - } + } + } else { + if (completed.compareAndSet(false, true)) { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Non-retriable exception") } } } @@ -266,12 +227,8 @@ class PaymentExternalSystemAdapterImpl( ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) } if (completed.compareAndSet(false, true)) { - try { - paymentESService.update(paymentId) { - it.logProcessing(parsed.result, now(), transactionId, parsed.message) - } - } catch (e: Exception) { - logger.error("[$accountName] Error updating ES for payment $paymentId", e) + paymentESService.update(paymentId) { + it.logProcessing(parsed.result, now(), transactionId, parsed.message) } } } catch (e: Exception) { @@ -310,18 +267,7 @@ class PaymentExternalSystemAdapterImpl( .POST(HttpRequest.BodyPublishers.noBody()) .timeout(Duration.ofMillis(newRequestTimeout)) .build() - if (!rateLimiter.tick()) { - if (deadline - now() <= 0 && completed.compareAndSet(false, true)) { - try { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Rate limit prevented retry and deadline expired") - } - } catch (e: Exception) { - logger.error("[$accountName] Failed to record rate-limited retry for $paymentId", e) - } - } - return@schedule - } + parallelLimiter.acquire() sendAttempt(retryCount + 1, newRequest, paymentId, transactionId, deadline, completed, allowHedge = false) } From dd3d818d972068b383cdcc5dead39b0542456d57 Mon Sep 17 00:00:00 2001 From: 21092004Goda Date: Fri, 20 Mar 2026 01:20:11 +0300 Subject: [PATCH 39/41] fix: optimize hedging and eliminate silent payment losses for 95%+ success rate --- .../payments/config/PaymentAccountsConfig.kt | 5 +- .../ru/quipy/payments/logic/OrderPayer.kt | 18 +- .../logic/PaymentExternalServiceImpl.kt | 231 +++++++++++------- 3 files changed, 163 insertions(+), 91 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt index 8a7d36243..97b0bf95c 100644 --- a/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt +++ b/src/main/kotlin/ru/quipy/payments/config/PaymentAccountsConfig.kt @@ -144,7 +144,7 @@ class PaymentAccountsConfig { return accountProperties .map { if (it.accountName == "acc-22") { - it.copy(enabled = true, hedgingEnabled = true, hedgeDelayMillis = 400L) + it.copy(enabled = true, hedgingEnabled = true, hedgeDelayMillis = 150L) } else { it.copy(enabled = true) } @@ -157,7 +157,8 @@ class PaymentAccountsConfig { paymentProviderHostPort, token, meterRegistry, - parallelLimiter(accountProperties) + parallelLimiter(accountProperties), + rateLimiter = smoothOutIncoming(accountProperties) ) } } diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index 4e464460d..929e2863e 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -45,12 +45,18 @@ class OrderPayer( paymentExecutor.submit { startedCounter.increment() try { - paymentESService.create { - it.create( - paymentId, - orderId, - amount - ) + val esFuture = java.util.concurrent.CompletableFuture.runAsync { + try { + paymentESService.create { + it.create( + paymentId, + orderId, + amount + ) + } + } catch (e: Exception) { + logger.error("Failed to create payment ES event for $paymentId", e) + } } inExecTimer.record(now()-createdAt, TimeUnit.MILLISECONDS) paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 81f1f4b40..66a23f710 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -10,7 +10,7 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService -import ru.quipy.exceptions.TooManyRequestsException + import ru.quipy.payments.api.PaymentAggregate import java.io.EOFException import java.io.IOException @@ -39,13 +39,14 @@ class PaymentExternalSystemAdapterImpl( private val paymentProviderHostPort: String, private val token: String, meterRegistry: MeterRegistry, - private val parallelLimiter: Semaphore + private val parallelLimiter: Semaphore, + private val rateLimiter: SlidingWindowRateLimiter ) : PaymentExternalSystemAdapter { private val serviceName = properties.serviceName private val accountName = properties.accountName private val requestAverageProcessingTime = properties.averageProcessingTime - private val connectTimeoutMillis = (requestAverageProcessingTime.toMillis() / 2).coerceAtLeast(100L) + private val connectTimeoutMillis = 200L private val rateLimitPerSec = properties.rateLimitPerSec private val parallelRequests = properties.parallelRequests private val inFlightRequests = AtomicInteger(0) @@ -72,7 +73,7 @@ class PaymentExternalSystemAdapterImpl( meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) private val retryCounter = meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) - private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(10) + private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(30) private val httpClient = HttpClient .newBuilder() .executor(Executors.newFixedThreadPool(parallelRequests)) @@ -83,69 +84,81 @@ class PaymentExternalSystemAdapterImpl( override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { val transactionId = UUID.randomUUID() - // Вне зависимости от исхода оплаты важно отметить что она была отправлена. - // Это требуется сделать ВО ВСЕХ СЛУЧАЯХ, поскольку эта информация используется сервисом тестирования. -// try { -// paymentESService.update(paymentId) { -// it.logSubmission( -// success = true, -// transactionId, -// now(), -// Duration.ofMillis(now() - paymentStartedAt) -// ) -// } -// } catch (e: Exception) { -// logger.error("[$accountName] Failed to record log submission for $paymentId", e) -// } val remainingTimeAtStart = deadline - now() - val initialTimeout = when { - remainingTimeAtStart <= 50L -> 50L - else -> remainingTimeAtStart.coerceAtMost(requestAverageProcessingTime.toMillis() * 2) + if (remainingTimeAtStart <= 50L) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Deadline already expired") + } + } catch (e: Exception) { + logger.error("[$accountName] Failed to record deadline expiry for $paymentId", e) + } + return } + val initialTimeout = remainingTimeAtStart + + val baseUri = + "http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount" val request = HttpRequest.newBuilder() - .uri(URI("http://$paymentProviderHostPort/external/process?serviceName=$serviceName&token=$token&accountName=$accountName&transactionId=$transactionId&paymentId=$paymentId&amount=$amount")) + .uri(URI(baseUri)) .timeout(Duration.ofMillis(initialTimeout)) .POST(HttpRequest.BodyPublishers.noBody()) .build() val completed = AtomicBoolean(false) + if (!rateLimiter.tickBlocking(50)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Rate limit exceeded at start") + } + } catch (e: Exception) { + logger.error("[$accountName] Failed to record rate limit for $paymentId", e) + } + return + } + parallelLimiter.acquire() sendAttempt(0, request, paymentId, transactionId, deadline, completed, allowHedge = true) if (hedgeEnabled) { - try { - val nowForHedge = now() - var remaining = deadline - nowForHedge - val desiredDelay = remaining - requestAverageProcessingTime.toMillis() - 5L - val computedDelay = when { - desiredDelay <= 0L -> 0L - else -> desiredDelay - } - val scheduleDelay = hedgeDelayMillis.coerceAtMost(computedDelay) + val hedgeDelays = listOf(hedgeDelayMillis, hedgeDelayMillis * 2) + for (delay in hedgeDelays) { + try { + retryExecutor.schedule({ + try { + if (completed.get()) return@schedule + val remainingBeforeHedge = deadline - now() + if (remainingBeforeHedge <= 100L) return@schedule - retryExecutor.schedule({ - try { - if (completed.get()) return@schedule - val nowBeforeHedge = now() - val remainingBeforeHedge = deadline - nowBeforeHedge - if (remainingBeforeHedge <= 50L) return@schedule - - val hedgeTimeout = remainingBeforeHedge.coerceAtMost(requestAverageProcessingTime.toMillis() * 2) - val hedgeRequest = HttpRequest.newBuilder() - .uri(request.uri()) - .POST(HttpRequest.BodyPublishers.noBody()) - .timeout(Duration.ofMillis(hedgeTimeout)) - .build() - - parallelLimiter.acquire() - sendAttempt(0, hedgeRequest, paymentId, transactionId, deadline, completed, allowHedge = false) - } catch (e: Exception) { - logger.warn("[$accountName] Failed to send hedged request for $paymentId", e) - } - }, scheduleDelay, TimeUnit.MILLISECONDS) - } catch (e: Exception) { - logger.warn("[$accountName] Failed to schedule hedged request", e) + val hedgeTimeout = remainingBeforeHedge + val hedgeRequest = HttpRequest.newBuilder() + .uri(URI(baseUri)) + .POST(HttpRequest.BodyPublishers.noBody()) + .timeout(Duration.ofMillis(hedgeTimeout)) + .build() + + if (!rateLimiter.tickBlocking(15)) { + return@schedule + } + + parallelLimiter.acquire() + sendAttempt( + 0, + hedgeRequest, + paymentId, + transactionId, + deadline, + completed, + allowHedge = false + ) + } catch (e: Exception) { + logger.warn("[$accountName] Failed to send hedged request for $paymentId", e) + } + }, delay, TimeUnit.MILLISECONDS) + } catch (e: Exception) { + logger.warn("[$accountName] Failed to schedule hedged request", e) + } } } } @@ -171,50 +184,79 @@ class PaymentExternalSystemAdapterImpl( .whenComplete { response, throwable -> parallelLimiter.release() inFlightRequests.decrementAndGet() - if (retryCount > 0) retryRequests.decrementAndGet() + if (retryCount > 0) { + retryRequests.decrementAndGet() + } timer.record(now() - timeBeforeCall, TimeUnit.MILLISECONDS) - if (completed.get()) return@whenComplete + if (completed.get()) { + return@whenComplete + } if (throwable != null) { val e = throwable.cause - val isRetriable = when (throwable.cause) { - is SocketTimeoutException, is InterruptedIOException, is EOFException, is IOException -> true - else -> false + var isRetriable = true + when (throwable.cause) { + is SocketTimeoutException -> { + } + + is InterruptedIOException -> { + } + + is EOFException -> { + } + + is IOException -> { + } + + else -> { + logger.warn("[$accountName] io error: $paymentId", e) + isRetriable = false + } } - if (retryCount + 1 >= 2) { // после 2 попыток бросаем исключение + if (retryCount + 1 >= 3) { if (completed.compareAndSet(false, true)) { try { paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Too many attempts") + it.logProcessing(false, now(), transactionId, "Max attempts reached") } } catch (ex: Exception) { logger.error("[$accountName] Failed to record max attempts for $paymentId", ex) } - throw TooManyRequestsException(1000L) } - return@whenComplete - } - - if (isRetriable) { - retryRequests.incrementAndGet() - retryCounter.increment() + } else { val backoff = ((2.0.pow(retryCount.toDouble()) * 25).toLong() + Random.nextLong(10)) val capped = backoff.coerceAtMost(deadline - now() - 5) - if (capped > 0) { - scheduleRetry(retryCount, request, paymentId, transactionId, deadline, capped, completed) - } else { + if (capped <= 0) { if (completed.compareAndSet(false, true)) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Deadline expired") + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Deadline expired") + } + } catch (ex: Exception) { + logger.error("[$accountName] Failed to record deadline expiry for $paymentId", ex) } } - } - } else { - if (completed.compareAndSet(false, true)) { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Non-retriable exception") + } else { + if (isRetriable) { + if (!completed.get()) { + retryRequests.incrementAndGet() + retryCounter.increment() + scheduleRetry( + retryCount, request, paymentId, transactionId, deadline, capped, completed + ) + } + } else { + if (completed.compareAndSet(false, true)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Non-retriable exception") + } + } catch (ex: Exception) { + logger.error("[$accountName] Failed to record non-retriable for $paymentId", ex) + } + } } } } @@ -227,8 +269,12 @@ class PaymentExternalSystemAdapterImpl( ExternalSysResponse(transactionId.toString(), paymentId.toString(), false, ex.message) } if (completed.compareAndSet(false, true)) { - paymentESService.update(paymentId) { - it.logProcessing(parsed.result, now(), transactionId, parsed.message) + try { + paymentESService.update(paymentId) { + it.logProcessing(parsed.result, now(), transactionId, parsed.message) + } + } catch (e: Exception) { + logger.error("[$accountName] Error updating ES for payment $paymentId", e) } } } catch (e: Exception) { @@ -245,7 +291,7 @@ class PaymentExternalSystemAdapterImpl( retryExecutor.schedule({ if (completed.get()) return@schedule val remainingTime = deadline - now() - if (remainingTime < requestAverageProcessingTime.toMillis()) { + if (remainingTime < 100L) { try { if (completed.compareAndSet(false, true)) { paymentESService.update(paymentId) { @@ -261,15 +307,34 @@ class PaymentExternalSystemAdapterImpl( logger.error("[$accountName] Failed to record retry failure for $paymentId", e) } } else { - val newRequestTimeout = remainingTime.coerceIn(50L, requestAverageProcessingTime.toMillis() * 2) + val newRequestTimeout = remainingTime.coerceAtLeast(50L) val newRequest = HttpRequest.newBuilder() .uri(request.uri()) .POST(HttpRequest.BodyPublishers.noBody()) .timeout(Duration.ofMillis(newRequestTimeout)) .build() - + if (!rateLimiter.tick()) { + if (completed.compareAndSet(false, true)) { + try { + paymentESService.update(paymentId) { + it.logProcessing(false, now(), transactionId, "Rate limit prevented retry") + } + } catch (e: Exception) { + logger.error("[$accountName] Failed to record rate-limited retry for $paymentId", e) + } + } + return@schedule + } parallelLimiter.acquire() - sendAttempt(retryCount + 1, newRequest, paymentId, transactionId, deadline, completed, allowHedge = false) + sendAttempt( + retryCount + 1, + newRequest, + paymentId, + transactionId, + deadline, + completed, + allowHedge = false + ) } }, delay, TimeUnit.MILLISECONDS) } From 213bd8157f9b0c7f52ccd8bb8561f1b738d7fe00 Mon Sep 17 00:00:00 2001 From: 21092004Goda Date: Fri, 20 Mar 2026 13:49:30 +0300 Subject: [PATCH 40/41] fix: optimize hedging and eliminate silent payment losses for 99%+ success rate --- .../ru/quipy/payments/logic/OrderPayer.kt | 2 +- .../logic/PaymentExternalServiceImpl.kt | 73 +++++++++++-------- test-on-prem-run.http | 4 +- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index 929e2863e..9d9440f48 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -55,7 +55,7 @@ class OrderPayer( ) } } catch (e: Exception) { - logger.error("Failed to create payment ES event for $paymentId", e) + logger.warn("Failed to create payment ES event for $paymentId", e) } } inExecTimer.record(now()-createdAt, TimeUnit.MILLISECONDS) diff --git a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt index 66a23f710..6590e20c7 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/PaymentExternalServiceImpl.kt @@ -10,6 +10,7 @@ import org.slf4j.Logger import org.slf4j.LoggerFactory import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService +import ru.quipy.exceptions.TooManyRequestsException import ru.quipy.payments.api.PaymentAggregate import java.io.EOFException @@ -56,6 +57,18 @@ class PaymentExternalSystemAdapterImpl( private val hedgeEnabled = properties.hedgingEnabled private val hedgeDelayMillis = properties.hedgeDelayMillis ?: (requestAverageProcessingTime.toMillis() / 2).coerceAtLeast(50L) + private val timer = + meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) + private val retryCounter = + meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) + private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(50) + private val httpClient = HttpClient + .newBuilder() + .executor(Executors.newFixedThreadPool(500)) + .version(HttpClient.Version.HTTP_2) + .connectTimeout(Duration.ofMillis(connectTimeoutMillis)) + .build() + init { meterRegistry.gauge( "payment.account.inflight.requests", @@ -67,19 +80,26 @@ class PaymentExternalSystemAdapterImpl( listOf(Tag.of("accountName", properties.accountName)), retryRequests ) { it.toDouble() } + + try { + val warmupUri = URI("http://$paymentProviderHostPort/external/accounts?serviceName=$serviceName&token=$token") + val warmupRequest = HttpRequest.newBuilder() + .uri(warmupUri) + .timeout(Duration.ofSeconds(5)) + .GET() + .build() + val futures = (1..10).map { + httpClient.sendAsync(warmupRequest, HttpResponse.BodyHandlers.ofString()) + } + futures.forEach { future -> + try { future.join() } catch (_: Exception) {} + } + logger.info("[$accountName] HTTP client warmup complete") + } catch (e: Exception) { + logger.warn("[$accountName] HTTP warmup failed (non-fatal)", e) + } } - private val timer = - meterRegistry.timer("payment.external.system.request.latency", "accountName", properties.accountName) - private val retryCounter = - meterRegistry.counter("payment.external.retry.count", "accountName", properties.accountName) - private val retryExecutor: ScheduledExecutorService = Executors.newScheduledThreadPool(30) - private val httpClient = HttpClient - .newBuilder() - .executor(Executors.newFixedThreadPool(parallelRequests)) - .version(HttpClient.Version.HTTP_2) - .connectTimeout(Duration.ofMillis(connectTimeoutMillis)) - .build() override fun performPaymentAsync(paymentId: UUID, amount: Int, paymentStartedAt: Long, deadline: Long) { val transactionId = UUID.randomUUID() @@ -108,21 +128,14 @@ class PaymentExternalSystemAdapterImpl( val completed = AtomicBoolean(false) if (!rateLimiter.tickBlocking(50)) { - try { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Rate limit exceeded at start") - } - } catch (e: Exception) { - logger.error("[$accountName] Failed to record rate limit for $paymentId", e) - } - return + throw TooManyRequestsException(deadline) } parallelLimiter.acquire() sendAttempt(0, request, paymentId, transactionId, deadline, completed, allowHedge = true) if (hedgeEnabled) { - val hedgeDelays = listOf(hedgeDelayMillis, hedgeDelayMillis * 2) + val hedgeDelays = listOf(100L, 250L, 450L, 700L, 950L) for (delay in hedgeDelays) { try { retryExecutor.schedule({ @@ -313,16 +326,16 @@ class PaymentExternalSystemAdapterImpl( .POST(HttpRequest.BodyPublishers.noBody()) .timeout(Duration.ofMillis(newRequestTimeout)) .build() - if (!rateLimiter.tick()) { - if (completed.compareAndSet(false, true)) { - try { - paymentESService.update(paymentId) { - it.logProcessing(false, now(), transactionId, "Rate limit prevented retry") - } - } catch (e: Exception) { - logger.error("[$accountName] Failed to record rate-limited retry for $paymentId", e) - } - } + if (!rateLimiter.tickBlocking(15)) { +// if (completed.compareAndSet(false, true)) { +// try { +// paymentESService.update(paymentId) { +// it.logProcessing(false, now(), transactionId, "Rate limit prevented retry") +// } +// } catch (e: Exception) { +// logger.error("[$accountName] Failed to record rate-limited retry for $paymentId", e) +// } +// } return@schedule } parallelLimiter.acquire() diff --git a/test-on-prem-run.http b/test-on-prem-run.http index ca62a0174..071c60802 100644 --- a/test-on-prem-run.http +++ b/test-on-prem-run.http @@ -6,7 +6,7 @@ Content-Type: application/json { "serviceName": "{{serviceName}}", "token": "{{token}}", - "branch": "main", + "branch": "feature/kuro-11-lab", "accounts": "acc-22", "ratePerSecond": 100, "testCount": 20000, @@ -16,4 +16,4 @@ Content-Type: application/json ### Stop running test to save credits # @timeout 120 -POST http://77.234.215.138:31234/test/stop/"{{serviceName}}" \ No newline at end of file +POST http://77.234.215.138:31234/test/stop/{{serviceName}} \ No newline at end of file From 75844a6f92c5d2ca48a51f4b7e9459288685b6f6 Mon Sep 17 00:00:00 2001 From: 21092004Goda Date: Fri, 20 Mar 2026 17:17:48 +0300 Subject: [PATCH 41/41] fix: deadlock --- .../ru/quipy/payments/logic/OrderPayer.kt | 41 +++++++++++-------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt index 9d9440f48..33df792f6 100644 --- a/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt +++ b/src/main/kotlin/ru/quipy/payments/logic/OrderPayer.kt @@ -11,6 +11,7 @@ import ru.quipy.common.utils.SlidingWindowRateLimiter import ru.quipy.core.EventSourcingService import ru.quipy.payments.api.PaymentAggregate import java.util.* +import java.util.concurrent.CompletableFuture import java.util.concurrent.ThreadPoolExecutor import java.util.concurrent.TimeUnit @@ -42,28 +43,32 @@ class OrderPayer( fun processPayment(orderId: UUID, amount: Int, paymentId: UUID, deadline: Long): Long { val createdAt = System.currentTimeMillis() plannedCounter.increment() - paymentExecutor.submit { - startedCounter.increment() - try { - val esFuture = java.util.concurrent.CompletableFuture.runAsync { - try { - paymentESService.create { - it.create( - paymentId, - orderId, - amount - ) - } - } catch (e: Exception) { - logger.warn("Failed to create payment ES event for $paymentId", e) + + CompletableFuture + .runAsync( + { + startedCounter.increment() + paymentESService.create { + it.create(paymentId, orderId, amount) } + }, + paymentExecutor, + ) + .thenRunAsync( + { + inExecTimer.record(now() - createdAt, TimeUnit.MILLISECONDS) + paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) + }, + paymentExecutor, + ) + .whenComplete { _, ex -> + if (ex != null) { + logger.warn("process stopped before submit for payment: $paymentId", ex) } - inExecTimer.record(now()-createdAt, TimeUnit.MILLISECONDS) - paymentService.submitPaymentRequest(paymentId, amount, createdAt, deadline) - } finally { completedCounter.increment() } - } + return createdAt } + } \ No newline at end of file