From d114008eb619a8f9cc919d17ea0a63120a48099f Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Wed, 11 Feb 2026 15:40:15 +0100 Subject: [PATCH] :sparkles: Add rlimit notifications and reports Also improve the current mattermost reporting for make it more easily to filter --- .../resources/app/templates/error-list.tmpl | 1 + .../app/templates/error-report.v5.tmpl | 40 ++++++++++ backend/resources/rlimit.edn | 4 +- backend/src/app/http/debug.clj | 10 ++- backend/src/app/loggers/audit.clj | 14 ++-- backend/src/app/loggers/database.clj | 52 +++++++++++-- backend/src/app/loggers/mattermost.clj | 77 +++++++++++++------ backend/src/app/main.clj | 8 +- backend/src/app/rpc.clj | 20 ++--- backend/src/app/rpc/rlimit.clj | 50 +++++++----- backend/test/backend_tests/helpers.clj | 14 ++-- 11 files changed, 217 insertions(+), 73 deletions(-) create mode 100644 backend/resources/app/templates/error-report.v5.tmpl diff --git a/backend/resources/app/templates/error-list.tmpl b/backend/resources/app/templates/error-list.tmpl index 3008acda9c..043a29034e 100644 --- a/backend/resources/app/templates/error-list.tmpl +++ b/backend/resources/app/templates/error-list.tmpl @@ -12,6 +12,7 @@ penpot - error list [BACKEND ERRORS] [FRONTEND ERRORS] + [RLIMIT REPORTS]
diff --git a/backend/resources/app/templates/error-report.v5.tmpl b/backend/resources/app/templates/error-report.v5.tmpl new file mode 100644 index 0000000000..64daf2ca2c --- /dev/null +++ b/backend/resources/app/templates/error-report.v5.tmpl @@ -0,0 +1,40 @@ + {% extends "app/templates/base.tmpl" %} + +{% block title %} +Report: {{hint|abbreviate:150}} - {{id}} - Penpot Rate Limit Report +{% endblock %} + +{% block content %} + +
+
+
+ +
+

Hint:
{{hint}}

+

Reported at:
{{created-at}}

+

Report ID:
{{id}}

+
+
+ +
+
CONTEXT:
+
+
{{context}}
+
+
+ +
+
RESULT:
+
+
{{result}}
+
+
+
+
+{% endblock %} diff --git a/backend/resources/rlimit.edn b/backend/resources/rlimit.edn index 2574f60f9b..118f30f70a 100644 --- a/backend/resources/rlimit.edn +++ b/backend/resources/rlimit.edn @@ -3,9 +3,9 @@ {:default [[:default :window "200000/h"]] - ;; #{:command/get-teams} + ;; #{:main/get-teams} ;; [[:burst :bucket "5/5/5s"]] - ;; #{:command/get-profile} + ;; #{:main/get-profile} ;; [[:burst :bucket "60/60/1m"]] } diff --git a/backend/src/app/http/debug.clj b/backend/src/app/http/debug.clj index 8005a82784..35650f9a10 100644 --- a/backend/src/app/http/debug.clj +++ b/backend/src/app/http/debug.clj @@ -240,6 +240,13 @@ (tmpl/render (-> content (assoc :id id) (assoc :version 4) + (assoc :created-at (ct/format-inst created-at :rfc1123)))))) + + (render-template-v5 [{:keys [content id created-at]}] + (-> (io/resource "app/templates/error-report.v5.tmpl") + (tmpl/render (-> content + (assoc :id id) + (assoc :version 5) (assoc :created-at (ct/format-inst created-at :rfc1123))))))] (if-let [report (get-report request)] @@ -247,7 +254,8 @@ 1 (render-template-v1 report) 2 (render-template-v2 report) 3 (render-template-v3 report) - 4 (render-template-v4 report))] + 4 (render-template-v4 report) + 5 (render-template-v5 report))] {::yres/status 200 ::yres/body result ::yres/headers {"content-type" "text/html; charset=utf-8" diff --git a/backend/src/app/loggers/audit.clj b/backend/src/app/loggers/audit.clj index cfe3a1769e..c374b432f9 100644 --- a/backend/src/app/loggers/audit.clj +++ b/backend/src/app/loggers/audit.clj @@ -151,20 +151,22 @@ uuid/zero) props (-> (or (::replace-props resultm) - (-> params - (merge (::props resultm)) - (dissoc :profile-id) - (dissoc :type))) + (merge params (::props resultm))) (clean-props)) context (merge (::context resultm) (prepare-context-from-request request)) - ip-addr (inet/parse-request request)] + ip-addr (inet/parse-request request) + module (get cfg ::rpc/module)] {::type (or (::type resultm) (::rpc/type cfg)) ::name (or (::name resultm) - (::sv/name mdata)) + (let [sname (::sv/name mdata)] + (if (not= module "main") + (str module "-" sname) + sname))) + ::profile-id profile-id ::ip-addr ip-addr ::props props diff --git a/backend/src/app/loggers/database.clj b/backend/src/app/loggers/database.clj index 9dc02febad..5f4704168f 100644 --- a/backend/src/app/loggers/database.clj +++ b/backend/src/app/loggers/database.clj @@ -15,6 +15,7 @@ [app.config :as cf] [app.db :as db] [app.loggers.audit :as audit] + [app.rpc.rlimit :as-alias rlimit] [clojure.spec.alpha :as s] [integrant.core :as ig] [promesa.exec :as px] @@ -41,7 +42,7 @@ (or (instance? java.util.concurrent.CompletionException cause) (instance? java.util.concurrent.ExecutionException cause))) -(defn record->report +(defn- log-record->report [{:keys [::l/context ::l/message ::l/props ::l/logger ::l/level ::l/cause] :as record}] (assert (l/valid-record? record) "expectd valid log record") (let [data (if (concurrent-exception? cause) @@ -86,16 +87,16 @@ [{:keys [::db/pool]} {:keys [::l/id] :as record}] (try (let [uri (cf/get :public-uri) - report (-> record record->report d/without-nils)] + report (-> record log-record->report d/without-nils)] (l/dbg :hint "registering error on database" - :id id + :id (str id) :src "logging" :uri (str uri "/dbg/error/" id)) (persist-on-database! pool id 3 report)) (catch Throwable cause (l/warn :hint "unexpected exception on database error logger" :cause cause)))) -(defn- event->report +(defn- audit-event->report [{:keys [::audit/context ::audit/props ::audit/ip-addr] :as record}] (let [context (reduce-kv (fn [context k v] @@ -125,15 +126,51 @@ [{:keys [::db/pool]} {:keys [::audit/id] :as event}] (try (let [uri (cf/get :public-uri) - report (-> event event->report d/without-nils)] + report (-> event audit-event->report d/without-nils)] (l/dbg :hint "registering error on database" - :id id + :id (str id) :src "audit-log" :uri (str uri "/dbg/error/" id)) (persist-on-database! pool id 4 report)) (catch Throwable cause (l/warn :hint "unexpected exception on database error logger" :cause cause)))) +(defn- rlimit-event->report + [event] + (let [context + (-> {} + (assoc :rlimit/uid (::rlimit/uid event)) + (assoc :rlimit/method (::rlimit/method event)) + (assoc :backend/tenant (cf/get :tenant)) + (assoc :backend/host (cf/get :host)) + (assoc :backend/public-uri (str (cf/get :public-uri))) + (assoc :backend/version (:full cf/version))) + + result + (->> (::rlimit/results event) + (mapv (fn [result] + (-> (into (sorted-map) result) + (dissoc ::rlimit/method)))))] + + {:hint (str "Rate Limit Rejection: " (::rlimit/method event) " for " (::rlimit/uid event)) + :context (-> (into (sorted-map) context) + (pp/pprint-str :length 50)) + :result (pp/pprint-str result :length 50)})) + +(defn- handle-rlimit-event + "Convert the log record into a report object and persist it on the database" + [{:keys [::db/pool]} {:keys [::rlimit/id] :as event}] + (try + (let [uri (cf/get :public-uri) + report (-> event rlimit-event->report d/without-nils)] + (l/dbg :hint "registering rate limit rejection" + :id (str id) + :src "rlimit" + :uri (str uri "/dbg/error/" id)) + (persist-on-database! pool id 5 report)) + (catch Throwable cause + (l/warn :hint "unexpected exception on database error logger" :cause cause)))) + (defmethod ig/assert-key ::reporter [_ params] (assert (db/pool? (::db/pool params)) "expect valid database pool")) @@ -154,6 +191,9 @@ (::audit/id item) (handle-audit-event cfg item) + (::rlimit/id item) + (handle-rlimit-event cfg item) + :else (l/warn :hint "received unexpected item" :item item)) diff --git a/backend/src/app/loggers/mattermost.clj b/backend/src/app/loggers/mattermost.clj index 73fa239ec1..c2fd3d1dfb 100644 --- a/backend/src/app/loggers/mattermost.clj +++ b/backend/src/app/loggers/mattermost.clj @@ -9,10 +9,12 @@ (:require [app.common.exceptions :as ex] [app.common.logging :as l] + [app.common.pprint :as pp] [app.common.uri :as u] [app.config :as cf] [app.http.client :as http] [app.loggers.audit :as audit] + [app.rpc.rlimit :as-alias rlimit] [app.util.json :as json] [integrant.core :as ig] [promesa.exec :as px] @@ -22,21 +24,28 @@ (defn- send-mattermost-notification! [cfg {:keys [id] :as report}] + (let [type (get report :type) + text (str "#" type " | " (get report :hint) "\n" + (when id + (str (u/join (cf/get :public-uri) "/dbg/error/" id) " ")) - - (let [url (u/join (cf/get :public-uri) "/dbg/error/" id) - - text (str "Exception: " url " " (when-let [pid (:profile-id report)] - (str "(pid: #uuid-" pid ")")) + (if (uuid? pid) + (str "(pid: #uuid-" pid ")") + (str "(pid: #ip-" pid ")"))) "\n" "- host: #" (:host report) "\n" "- tenant: #" (:tenant report) "\n" "- origin: #" (:origin report) "\n" - "- href: `" (:href report) "`\n" - "- frontend-version: `" (:frontend-version report) "`\n" - "- backend-version: `" (:backend-version report) "`\n" + (when-let [href (get report :href)] + (str "- href: `" href "`\n")) + (when-let [version (get report :frontend-version)] + (str "- frontend-version: `" version "`\n")) + (when-let [version (get report :backend-version)] + (str "- backend-version: `" version "`\n")) "\n" + (when-let [info (:info report)] + (str "```\n" info "```")) (when-let [trace (:trace report)] (str "```\n" "Trace:\n" @@ -54,13 +63,15 @@ (l/warn :hint "error on sending data" :response (pr-str resp))))) -(defn- record->report - [{:keys [::l/context ::l/id ::l/cause] :as record}] +(defn- log-record->report + [{:keys [::l/context ::l/id ::l/cause ::l/message] :as record}] (assert (l/valid-record? record) "expectd valid log record") (let [public-uri (cf/get :public-uri)] {:id id + :type "exception" :origin "logging" + :hint (or (some-> cause ex-message) @message) :tenant (cf/get :tenant) :host (cf/get :host) :backend-version (:full cf/version) @@ -74,7 +85,9 @@ (defn- audit-event->report [{:keys [::audit/context ::audit/props ::audit/id] :as event}] {:id id + :type "exception" :origin "audit-log" + :hint (get props :hint) :tenant (cf/get :tenant) :host (cf/get :host) :backend-version (:full cf/version) @@ -82,18 +95,35 @@ :profile-id (:audit/profile-id event) :href (get props :href)}) -(defn- handle-log-record - [cfg record] - (try - (let [report (record->report record)] - (send-mattermost-notification! cfg report)) - (catch Throwable cause - (l/warn :hint "unhandled error" :cause cause)))) +(defn- rlimit-event->report + [event] + {:id (::rlimit/id event) + :type "notification" + :origin "rlimit" + :hint (str "rlimit reject of " + (::rlimit/method event) + " for " + (::rlimit/uid event)) + :tenant (cf/get :tenant) + :host (cf/get :host) + :backend-version (:full cf/version) + :profile-id (::rlimit/profile-id event) + :info (with-out-str + (println "Rejected by:") + (println "------------") + (println "Method: " (::rlimit/method event)) + (println "Limit Name: " (::rlimit/name event)) + (println "Limit Strategy:" (::rlimit/strategy event)) + (println) + (println "Results & Config:") + (println "-----------------") + (doseq [result (::rlimit/results event)] + (pp/pprint (into (sorted-map) result))))}) -(defn- handle-audit-event - [cfg record] +(defn- handle-event + [cfg event event->report] (try - (let [report (audit-event->report record)] + (let [report (event->report event)] (send-mattermost-notification! cfg report)) (catch Throwable cause (l/warn :hint "unhandled error" :cause cause)))) @@ -116,10 +146,13 @@ (when @enabled (cond (::l/id item) - (handle-log-record cfg item) + (handle-event cfg item log-record->report) (::audit/id item) - (handle-audit-event cfg item) + (handle-event cfg item audit-event->report) + + (::rlimit/id item) + (handle-event cfg item rlimit-event->report) :else (l/warn :hint "received unexpected item" :item item))) diff --git a/backend/src/app/main.clj b/backend/src/app/main.clj index a872be3658..a5027bd190 100644 --- a/backend/src/app/main.clj +++ b/backend/src/app/main.clj @@ -317,7 +317,13 @@ ::climit/enabled (contains? cf/flags :rpc-climit)} :app.rpc/rlimit - {::wrk/executor (ig/ref ::wrk/netty-executor)} + {::wrk/executor (ig/ref ::wrk/netty-executor) + + :app.loggers.mattermost/reporter + (ig/ref :app.loggers.mattermost/reporter) + + :app.loggers.database/reporter + (ig/ref :app.loggers.database/reporter)} :app.rpc/methods {::http.client/client (ig/ref ::http.client/client) diff --git a/backend/src/app/rpc.clj b/backend/src/app/rpc.clj index a9cef88868..31d4660e04 100644 --- a/backend/src/app/rpc.clj +++ b/backend/src/app/rpc.clj @@ -90,7 +90,7 @@ [methods] (let [methods (update-vals methods peek)] (fn [{:keys [params path-params method] :as request}] - (let [handler-name (:type path-params) + (let [handler-name (:method-name path-params) etag (yreq/get-header request "if-none-match") key-id (get request ::http/auth-key-id) @@ -227,8 +227,8 @@ (wrap-authentication cfg $ mdata))) (defn- process-method - [cfg module wrap-fn [f mdata]] - (l/trc :hint "add method" :module module :name (::sv/name mdata)) + [cfg wrap-fn [f mdata]] + (l/wrn :hint "add method" :module (::module cfg) :type (::type cfg) :name (::sv/name mdata)) (let [f (wrap-fn cfg f mdata) k (keyword (::sv/name mdata))] [k [mdata (partial f cfg)]])) @@ -239,7 +239,7 @@ (defn- resolve-methods [cfg] - (let [cfg (assoc cfg ::type "command" ::metrics-id :rpc-command-timing)] + (let [cfg (assoc cfg ::module "main" ::type "command" ::metrics-id :rpc-main-timing)] (->> (sv/scan-ns 'app.rpc.commands.access-token 'app.rpc.commands.audit @@ -266,7 +266,7 @@ 'app.rpc.commands.verify-token 'app.rpc.commands.viewer 'app.rpc.commands.webhooks) - (map (partial process-method cfg "rpc" wrap)) + (map (partial process-method cfg wrap)) (into {})))) (def ^:private schema:methods-params @@ -298,13 +298,13 @@ (defn- resolve-management-methods [cfg] - (let [cfg (assoc cfg ::type "management" ::metrics-id :rpc-management-timing) + (let [cfg (assoc cfg ::module "management" ::type "command" ::metrics-id :rpc-management-timing) mods (cond->> (list 'app.rpc.management.exporter) (contains? cf/flags :nitrate) (cons 'app.rpc.management.nitrate))] (->> (apply sv/scan-ns mods) - (map (partial process-method cfg "management" wrap-management)) + (map (partial process-method cfg wrap-management)) (into {})))) (def ^:private schema:management-methods-params @@ -359,7 +359,7 @@ (let [public-uri (cf/get :public-uri)] ["/api" ["/management" - ["/methods/:type" + ["/methods/:method-name" {:middleware [[mw/shared-key-auth shared-keys] [session/authz cfg]] :handler (make-rpc-handler management-methods)}] @@ -370,7 +370,7 @@ :description "MANAGEMENT API")] ["/main" - ["/methods/:type" + ["/methods/:method-name" {:middleware [[mw/cors] [sec/client-header-check] [session/authz cfg] @@ -388,7 +388,7 @@ ["/openapi" {:handler (redirect (u/join public-uri "/api/main/doc/openapi"))}] ["/openapi.join" {:handler (redirect (u/join public-uri "/api/main/doc/openapi.json"))}] - ["/rpc/command/:type" + ["/rpc/command/:method-name" {:middleware [[mw/cors] [sec/client-header-check] [session/authz cfg] diff --git a/backend/src/app/rpc/rlimit.clj b/backend/src/app/rpc/rlimit.clj index 03ae35831f..2111d95e1e 100644 --- a/backend/src/app/rpc/rlimit.clj +++ b/backend/src/app/rpc/rlimit.clj @@ -52,6 +52,8 @@ [app.common.uuid :as uuid] [app.config :as cf] [app.http :as-alias http] + [app.loggers.database :as loggers.db] + [app.loggers.mattermost :as loggers.mm] [app.redis :as rds] [app.redis.script :as-alias rscript] [app.rpc :as-alias rpc] @@ -171,9 +173,9 @@ :hint (str/ffmt "looks like '%' does not have a valid format" opts)))) (defmethod process-limit :bucket - [rconn profile-id now {:keys [::key ::params ::service ::capacity ::interval ::rate] :as limit}] + [rconn profile-id now {:keys [::key ::params ::method ::capacity ::interval ::rate] :as limit}] (let [script (-> bucket-rate-limit-script - (assoc ::rscript/keys [(str key "." service "." profile-id)]) + (assoc ::rscript/keys [(str key "." method "." profile-id)]) (assoc ::rscript/vals (conj params (->seconds now)))) result (rds/eval rconn script) allowed? (boolean (nth result 0)) @@ -181,7 +183,7 @@ reset (* (/ (inst-ms interval) rate) (- capacity remaining))] (l/trace :hint "limit processed" - :service service + :method method :limit (name (::name limit)) :strategy (name (::strategy limit)) :opts (::opts limit) @@ -193,17 +195,17 @@ (assoc ::lresult/remaining remaining)))) (defmethod process-limit :window - [rconn profile-id now {:keys [::permits ::unit ::key ::service] :as limit}] + [rconn uid now {:keys [::permits ::unit ::key ::method] :as limit}] (let [ts (ct/truncate now unit) ttl (ct/diff now (ct/plus ts {unit 1})) script (-> window-rate-limit-script - (assoc ::rscript/keys [(str key "." service "." profile-id "." (ct/format-inst ts))]) + (assoc ::rscript/keys [(str key "." method "." uid "." (ct/format-inst ts))]) (assoc ::rscript/vals [permits (->seconds ttl)])) result (rds/eval rconn script) allowed? (boolean (nth result 0)) remaining (nth result 1)] (l/trace :hint "limit processed" - :service service + :method method :name (name (::name limit)) :strategy (name (::strategy limit)) :opts (::opts limit) @@ -211,12 +213,13 @@ :remaining remaining) (-> limit (assoc ::lresult/allowed allowed?) + (assoc ::lresult/timestamp ts) (assoc ::lresult/remaining remaining) (assoc ::lresult/reset (ct/plus ts {unit 1}))))) (defn- process-limits - [rconn profile-id limits now] - (let [results (into [] (map (partial process-limit rconn profile-id now)) limits) + [{:keys [::rds/conn] :as cfg} uid limits now] + (let [results (into [] (map (partial process-limit conn uid now)) limits) remaining (->> results (d/index-by ::name ::lresult/remaining) (uri/map->query-string)) @@ -227,11 +230,22 @@ rejected (d/seek (complement ::lresult/allowed) results)] (when rejected - (l/warn :hint "rejected rate limit" - :profile-id (str profile-id) - :limit-service (-> rejected ::service name) - :limit-name (-> rejected ::name name) - :limit-strategy (-> rejected ::strategy name))) + (let [event {::id (uuid/next) + ::uid uid + ::method (-> rejected ::method name) + ::name (-> rejected ::name name) + ::strategy (-> rejected ::strategy name) + ::results results}] + + (l/warn :hint "rejected rate limit" + :method (-> rejected ::method name) + :name (-> rejected ::name name) + :strategy (-> rejected ::strategy name) + :uid (str uid) + :report-id (:id event)) + + (loggers.mm/emit cfg event) + (loggers.db/emit cfg event))) {::enabled true ::allowed (not (some? rejected)) @@ -244,7 +258,7 @@ [state skey sname] (when-let [limits (or (get-in @state [::limits skey]) (get-in @state [::limits :default]))] - (into [] (map #(assoc % ::service sname)) limits))) + (into [] (map #(assoc % ::method sname)) limits))) (defn- get-uid [{:keys [::rpc/profile-id] :as params}] @@ -254,10 +268,10 @@ uuid/zero))) (defn- process-request' - [{:keys [::rds/conn] :as cfg} limits params] + [cfg limits params] (try (let [uid (get-uid params) - result (process-limits conn uid limits (ct/now))] + result (process-limits cfg uid limits (ct/now))] (if (contains? cf/flags :soft-rpc-rlimit) {::enabled false} result)) @@ -275,8 +289,8 @@ (assert (or (nil? rlimit) (valid-rlimit-instance? rlimit)) "expected a valid rlimit instance") (if rlimit - (let [skey (keyword (::rpc/type cfg) (->> mdata ::sv/spec name)) - sname (str (::rpc/type cfg) "." (->> mdata ::sv/spec name)) + (let [skey (keyword (::rpc/module cfg) (->> mdata ::sv/spec name)) + sname (str (::rpc/module cfg) "." (->> mdata ::sv/spec name)) cfg (-> cfg (assoc ::skey skey) (assoc ::sname sname))] diff --git a/backend/test/backend_tests/helpers.clj b/backend/test/backend_tests/helpers.clj index 74815650ec..8ddb3448a2 100644 --- a/backend/test/backend_tests/helpers.clj +++ b/backend/test/backend_tests/helpers.clj @@ -104,13 +104,13 @@ (assoc-in [::db/pool ::db/password] (:database-password config)) (assoc-in [:app.rpc/methods :app.setup/templates] templates) (assoc-in [:app.rpc/methods :app.setup/templates] templates) - (update :app.rpc/methods - (fn [state] - (-> state - (assoc :app.setup/templates templates) - (assoc :app.loggers.mattermost/reporter nil) - (assoc :app.loggers.database/reporter nil)))) - + (update :app.rpc/rlimit assoc + :app.loggers.mattermost/reporter nil + :app.loggers.database/reporter nil) + (update :app.rpc/methods assoc + :app.setup/templates templates + :app.loggers.mattermost/reporter nil + :app.loggers.database/reporter nil) (dissoc :app.srepl/server :app.http/server :app.http/route