diff --git a/api/bun.lock b/api/bun.lock index d63ee02..5d09c2e 100644 --- a/api/bun.lock +++ b/api/bun.lock @@ -5,6 +5,16 @@ "": { "dependencies": { "@fastify/cors": "^10.0.0", + "@opentelemetry/api": "^1.9.1", + "@opentelemetry/api-logs": "^0.215.0", + "@opentelemetry/exporter-logs-otlp-http": "^0.215.0", + "@opentelemetry/exporter-metrics-otlp-http": "^0.215.0", + "@opentelemetry/exporter-trace-otlp-http": "^0.215.0", + "@opentelemetry/resources": "^2.7.0", + "@opentelemetry/sdk-logs": "^0.215.0", + "@opentelemetry/sdk-metrics": "^2.7.0", + "@opentelemetry/sdk-trace-node": "^2.7.0", + "@opentelemetry/semantic-conventions": "^1.40.0", "@sinclair/typebox": "^0.34.48", "cache-manager": "^7.2.8", "cache-manager-fs-hash": "^3.0.0", @@ -36,8 +46,58 @@ "@keyv/serialize": ["@keyv/serialize@1.1.1", "", {}, "sha512-dXn3FZhPv0US+7dtJsIi2R+c7qWYiReoEh5zUntWCf4oSpMNib8FDhSoed6m3QyZdx5hK7iLFkYk3rNxwt8vTA=="], + "@opentelemetry/api": ["@opentelemetry/api@1.9.1", "", {}, "sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q=="], + + "@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.215.0", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-xrFlqhdhUyO8wSRn6DjE0145/HPWSJ5Nm0C7vWua6TdL/FSEAZvEyvdsa9CRXuxo9ebb7j/NEPhEcO62IJ0qUA=="], + + "@opentelemetry/context-async-hooks": ["@opentelemetry/context-async-hooks@2.7.0", "", { "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-MWXggArM+Y11mPS8VOrqxOj+YMGQSRuvhM91eSBX4xFpJa05mpkeVvM8pPux5ElkEjV5RMgrkisrlP/R83SpBQ=="], + + "@opentelemetry/core": ["@opentelemetry/core@2.7.0", "", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-DT12SXVwV2eoJrGf4nnsvZojxxeQo+LlNAsoYGRRObPWTeN6APiqZ2+nqDCQDvQX40eLi1AePONS0onoASp3yQ=="], + + "@opentelemetry/exporter-logs-otlp-http": ["@opentelemetry/exporter-logs-otlp-http@0.215.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.215.0", "@opentelemetry/core": "2.7.0", "@opentelemetry/otlp-exporter-base": "0.215.0", "@opentelemetry/otlp-transformer": "0.215.0", "@opentelemetry/sdk-logs": "0.215.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-U7Qb+TVX2GZH5RSC+Gx9aE5zChKP1kPg87X3PlI/41lWVPJdBIzmgMmuE28MmQlrK84nLHCIqUOOben8YkSzBw=="], + + "@opentelemetry/exporter-metrics-otlp-http": ["@opentelemetry/exporter-metrics-otlp-http@0.215.0", "", { "dependencies": { "@opentelemetry/core": "2.7.0", "@opentelemetry/otlp-exporter-base": "0.215.0", "@opentelemetry/otlp-transformer": "0.215.0", "@opentelemetry/resources": "2.7.0", "@opentelemetry/sdk-metrics": "2.7.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-FRydO5j7MWnXK9ghfykKxiSM8I5UeiicK/UNl3/mv86xoEKkb+LKz1I3WXgkuYVOQf22VNqbPO58s2W1mVWtEQ=="], + + "@opentelemetry/exporter-trace-otlp-http": ["@opentelemetry/exporter-trace-otlp-http@0.215.0", "", { "dependencies": { "@opentelemetry/core": "2.7.0", "@opentelemetry/otlp-exporter-base": "0.215.0", "@opentelemetry/otlp-transformer": "0.215.0", "@opentelemetry/resources": "2.7.0", "@opentelemetry/sdk-trace-base": "2.7.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-k4J9ISeGpb0Bm/wCrlcrbroMFTkiWMrdhNxQGrlktxLy127Yzd4/7nrTawn5d/ApktYTknvdixsE6++34Qfi1w=="], + + "@opentelemetry/otlp-exporter-base": ["@opentelemetry/otlp-exporter-base@0.215.0", "", { "dependencies": { "@opentelemetry/core": "2.7.0", "@opentelemetry/otlp-transformer": "0.215.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-lHrfbmeLSmesGSkkHiqDwOzfaEMSWXdc7q6UoLfbW8byONCb+bE/zkAr0kapN4US1baT/2nbpNT7Cn9XoB96Vg=="], + + "@opentelemetry/otlp-transformer": ["@opentelemetry/otlp-transformer@0.215.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.215.0", "@opentelemetry/core": "2.7.0", "@opentelemetry/resources": "2.7.0", "@opentelemetry/sdk-logs": "0.215.0", "@opentelemetry/sdk-metrics": "2.7.0", "@opentelemetry/sdk-trace-base": "2.7.0", "protobufjs": "^8.0.1" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-cWwBvaV+vkXHkSoTYR8hGw+AW03UlgTr6xtrUKOMeum3T+8vffYXIfXu6KY5MLu8O9QtoBKqaKWw9I5xoOepng=="], + + "@opentelemetry/resources": ["@opentelemetry/resources@2.7.0", "", { "dependencies": { "@opentelemetry/core": "2.7.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-K+oi0hNMv94EpZbnW3eyu2X6SGVpD3O5DhG2NIp65Hc7lhAj9brRXTAVzh3wB82+q3ThakEf7Zd7RsFUqcTc7A=="], + + "@opentelemetry/sdk-logs": ["@opentelemetry/sdk-logs@0.215.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.215.0", "@opentelemetry/core": "2.7.0", "@opentelemetry/resources": "2.7.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.4.0 <1.10.0" } }, "sha512-y3ucOmphzc4vgBTyIGchs+N/1rkACmoka8QalT2z1LBNM232Z17zMYayHcMl+dgMoOadZ0b72UZv7mDtqy1cFA=="], + + "@opentelemetry/sdk-metrics": ["@opentelemetry/sdk-metrics@2.7.0", "", { "dependencies": { "@opentelemetry/core": "2.7.0", "@opentelemetry/resources": "2.7.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.9.0 <1.10.0" } }, "sha512-Vd7h95av/LYRsAVN7wbprvvJnHkq7swMXAo7Uad0Uxf9jl6NSReLa0JNivrcc5BVIx/vl2t+cgdVQQbnVhsR9w=="], + + "@opentelemetry/sdk-trace-base": ["@opentelemetry/sdk-trace-base@2.7.0", "", { "dependencies": { "@opentelemetry/core": "2.7.0", "@opentelemetry/resources": "2.7.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-Yg9zEXJB50DLVLpsKPk7NmNqlPlS+OvqhJGh0A8oawIOTPOwlm4eXs9BMJV7L79lvEwI+dWtAj+YjTyddV336A=="], + + "@opentelemetry/sdk-trace-node": ["@opentelemetry/sdk-trace-node@2.7.0", "", { "dependencies": { "@opentelemetry/context-async-hooks": "2.7.0", "@opentelemetry/core": "2.7.0", "@opentelemetry/sdk-trace-base": "2.7.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-RrFHOXw0IYp/OThew6QORdybnnLitUAUMCJKcQNBYS0hDkCYarO2vTkVxfrGxCIqd5XHSMvbCpBd/T8ZMw8oSg=="], + + "@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.40.0", "", {}, "sha512-cifvXDhcqMwwTlTK04GBNeIe7yyo28Mfby85QXFe1Yk8nmi36Ab/5UQwptOx84SsoGNRg+EVSjwzfSZMy6pmlw=="], + "@pinojs/redact": ["@pinojs/redact@0.4.0", "", {}, "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg=="], + "@protobufjs/aspromise": ["@protobufjs/aspromise@1.1.2", "", {}, "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ=="], + + "@protobufjs/base64": ["@protobufjs/base64@1.1.2", "", {}, "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg=="], + + "@protobufjs/codegen": ["@protobufjs/codegen@2.0.4", "", {}, "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg=="], + + "@protobufjs/eventemitter": ["@protobufjs/eventemitter@1.1.0", "", {}, "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q=="], + + "@protobufjs/fetch": ["@protobufjs/fetch@1.1.0", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.1", "@protobufjs/inquire": "^1.1.0" } }, "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ=="], + + "@protobufjs/float": ["@protobufjs/float@1.0.2", "", {}, "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ=="], + + "@protobufjs/inquire": ["@protobufjs/inquire@1.1.0", "", {}, "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q=="], + + "@protobufjs/path": ["@protobufjs/path@1.1.2", "", {}, "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA=="], + + "@protobufjs/pool": ["@protobufjs/pool@1.1.0", "", {}, "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw=="], + + "@protobufjs/utf8": ["@protobufjs/utf8@1.1.0", "", {}, "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw=="], + "@sinclair/typebox": ["@sinclair/typebox@0.34.48", "", {}, "sha512-kKJTNuK3AQOrgjjotVxMrCn1sUJwM76wMszfq1kdU4uYVJjvEWuFQ6HgvLt4Xz3fSmZlTOxJ/Ie13KnIcWQXFA=="], "@types/cache-manager": ["@types/cache-manager@3.4.3", "", {}, "sha512-71aBXoFYXZW4TnDHHH8gExw2lS28BZaWeKefgsiJI7QYZeJfUEbMKw6CQtzGjlYQcGIWwB76hcCrkVA3YHSvsw=="], @@ -112,6 +172,8 @@ "lockfile": ["lockfile@1.0.4", "", { "dependencies": { "signal-exit": "^3.0.2" } }, "sha512-cvbTwETRfsFh4nHsL1eGWapU1XFi5Ot9E85sWAwia7Y7EgB7vfqcZhTKZ+l7hCGxSPoushMv5GKhT5PdLv03WA=="], + "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], + "minimist": ["minimist@1.2.8", "", {}, "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA=="], "mnemonist": ["mnemonist@0.40.0", "", { "dependencies": { "obliterator": "^2.0.4" } }, "sha512-kdd8AFNig2AD5Rkih7EPCXhu/iMvwevQFX/uEiGhZyPZi7fHqOoF4V4kHLpCfysxXMgQ4B52kdPMCwARshKvEg=="], @@ -132,6 +194,8 @@ "process-warning": ["process-warning@5.0.0", "", {}, "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA=="], + "protobufjs": ["protobufjs@8.0.1", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.4", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.0", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-NWWCCscLjs+cOKF/s/XVNFRW7Yih0fdH+9brffR5NZCy8k42yRdl5KlWKMVXuI1vfCoy4o1z80XR/W/QUb3V3w=="], + "pump": ["pump@3.0.3", "", { "dependencies": { "end-of-stream": "^1.1.0", "once": "^1.3.1" } }, "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA=="], "quick-format-unescaped": ["quick-format-unescaped@4.0.4", "", {}, "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg=="], diff --git a/api/otelcol/config.yaml b/api/otelcol/config.yaml new file mode 100644 index 0000000..2b1bf21 --- /dev/null +++ b/api/otelcol/config.yaml @@ -0,0 +1,68 @@ +# OpenTelemetry Collector configuration for deflock-api +# Receives traces and logs from the API, forwards to Grafana Cloud via the +# unified OTLP gateway (routes traces → Tempo, logs → Loki automatically). +# +# Required environment variables (set in /etc/systemd/system/otelcol.service): +# GRAFANA_OTLP_ENDPOINT e.g. https://otlp-gateway-prod-us-east-0.grafana.net/otlp +# Found in Grafana Cloud portal → OpenTelemetry +# GRAFANA_INSTANCE_ID numeric stack ID (same for all signals) +# GRAFANA_API_KEY Grafana Cloud API key (Metrics Publisher role is sufficient) + +receivers: + otlp: + protocols: + http: + endpoint: 127.0.0.1:4318 + +processors: + batch: + timeout: 5s + send_batch_size: 256 + + # Drop healthcheck spans to reduce noise + filter/drop_healthcheck: + error_mode: ignore + traces: + span: + - 'attributes["http.route"] == "/healthcheck"' + + # Only forward spans that resulted in an error + filter/errors_only: + error_mode: ignore + traces: + span: + - 'status.code != STATUS_CODE_ERROR' + +exporters: + otlp_http/grafana: + endpoint: ${env:GRAFANA_OTLP_ENDPOINT} + auth: + authenticator: basicauth/grafana + +extensions: + basicauth/grafana: + client_auth: + username: ${env:GRAFANA_INSTANCE_ID} + password: ${env:GRAFANA_API_KEY} + + health_check: + endpoint: 127.0.0.1:13133 + +service: + telemetry: + metrics: + level: none + extensions: [basicauth/grafana, health_check] + pipelines: + traces: + receivers: [otlp] + processors: [filter/drop_healthcheck, filter/errors_only, batch] + exporters: [otlp_http/grafana] + logs: + receivers: [otlp] + processors: [batch] + exporters: [otlp_http/grafana] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [otlp_http/grafana] diff --git a/api/otelcol/otelcol.service b/api/otelcol/otelcol.service new file mode 100644 index 0000000..0963a95 --- /dev/null +++ b/api/otelcol/otelcol.service @@ -0,0 +1,25 @@ +[Unit] +Description=OpenTelemetry Collector (deflock) +After=network.target +Wants=network.target + +[Service] +Type=simple +User=otelcol +Group=deflock + +ExecStart=/usr/local/bin/otelcol-contrib --config /etc/otelcol/config.yaml + +EnvironmentFile=/home/nullplate/secrets/api/.env + +Restart=on-failure +RestartSec=5s + +# Harden the service +NoNewPrivileges=true +ProtectSystem=strict +ProtectHome=true +ReadWritePaths=/var/lib/otelcol + +[Install] +WantedBy=multi-user.target diff --git a/api/package.json b/api/package.json index 8a9e0c9..6a31ef4 100644 --- a/api/package.json +++ b/api/package.json @@ -9,6 +9,16 @@ }, "dependencies": { "@fastify/cors": "^10.0.0", + "@opentelemetry/api": "^1.9.1", + "@opentelemetry/api-logs": "^0.215.0", + "@opentelemetry/exporter-logs-otlp-http": "^0.215.0", + "@opentelemetry/exporter-metrics-otlp-http": "^0.215.0", + "@opentelemetry/exporter-trace-otlp-http": "^0.215.0", + "@opentelemetry/resources": "^2.7.0", + "@opentelemetry/sdk-logs": "^0.215.0", + "@opentelemetry/sdk-metrics": "^2.7.0", + "@opentelemetry/sdk-trace-node": "^2.7.0", + "@opentelemetry/semantic-conventions": "^1.40.0", "@sinclair/typebox": "^0.34.48", "cache-manager": "^7.2.8", "cache-manager-fs-hash": "^3.0.0", diff --git a/api/server.ts b/api/server.ts index d3082e7..d6d3610 100644 --- a/api/server.ts +++ b/api/server.ts @@ -1,5 +1,34 @@ +import './telemetry'; +import { tracer, otelLogger, SeverityNumber, meter } from './telemetry'; +import { type Span, SpanKind, SpanStatusCode, context, trace } from '@opentelemetry/api'; import Fastify, { FastifyInstance, FastifyError } from 'fastify'; + +declare module 'fastify' { + interface FastifyRequest { + span?: Span; + traceId?: string; + errorHandled?: boolean; + } +} + +function classifyError(error: FastifyError): string { + if (error.code === 'FST_ERR_VALIDATION') return 'validation_error'; + const msg = error.message.toLowerCase(); + if (msg.includes('geocode') || msg.includes('nominatim')) return 'upstream_error:nominatim'; + if (msg.includes('sponsors') || msg.includes('github')) return 'upstream_error:github'; + if (msg.includes('zammad') || msg.includes('ticket')) return 'upstream_error:zammad'; + if (msg.includes('turnstile') || msg.includes('siteverify')) return 'upstream_error:turnstile'; + return 'internal_error'; +} + +function classifyByStatus(statusCode: number): string { + if (statusCode === 404) return 'not_found'; + if (statusCode === 400) return 'client_error'; + if (statusCode === 401 || statusCode === 403) return 'auth_error'; + if (statusCode >= 400 && statusCode < 500) return 'client_error'; + return 'internal_error'; +} import cors from '@fastify/cors'; import { NominatimClient, NominatimResultSchema } from './services/NominatimClient'; import { GithubClient, SponsorsResponseSchema } from './services/GithubClient'; @@ -24,16 +53,42 @@ const start = async () => { // Global error handler server.setErrorHandler((error: FastifyError, request, reply) => { + const errorType = classifyError(error); + const statusCode = error.statusCode ?? 500; + const { span } = request; + + if (span) { + span.setAttribute('error.type', errorType); + span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); + span.recordException(error); + } + + otelLogger.emit({ + severityNumber: SeverityNumber.ERROR, + severityText: 'ERROR', + body: error.message, + attributes: { + 'error.type': errorType, + 'http.route': (request.routeOptions as { url?: string })?.url ?? '', + 'http.request.method': request.method, + 'http.response.status_code': statusCode, + 'exception.message': error.message, + 'exception.stacktrace': error.stack ?? '', + 'trace.id': request.traceId ?? '', + }, + }); + + request.errorHandled = true; + server.log.error({ url: request.url, method: request.method, + traceId: request.traceId, error: error.message, stack: error.stack, }, 'Request error'); - - reply.status(error.statusCode || 500).send({ - error: 'Internal Server Error', - }); + + reply.status(statusCode).send({ error: 'Internal Server Error' }); }); // Coors Banquet Config @@ -56,6 +111,59 @@ const start = async () => { methods: ['GET', 'HEAD', 'POST'], }); + server.addHook('onRequest', (request, _reply, done) => { + const route = (request.routeOptions as { url?: string })?.url ?? request.url.split('?')[0]; + const span = tracer.startSpan(`${request.method} ${route}`, { + kind: SpanKind.SERVER, + attributes: { + 'http.request.method': request.method, + 'http.route': route, + 'http.url': request.url, + 'network.peer.address': request.ip, + }, + }); + request.span = span; + request.traceId = span.spanContext().traceId; + done(); + }); + + server.addHook('onResponse', (request, reply, done) => { + const { span } = request; + if (span) { + const statusCode = reply.statusCode; + const route = (request.routeOptions as { url?: string })?.url ?? request.url.split('?')[0]; + requestCounter.add(1, { + 'http.route': route, + 'http.request.method': request.method, + 'http.response.status_code': statusCode, + }); + span.setAttribute('http.response.status_code', statusCode); + if (statusCode >= 500) { + span.setStatus({ code: SpanStatusCode.ERROR }); + } + if (!request.errorHandled && statusCode >= 400) { + otelLogger.emit({ + severityNumber: statusCode >= 500 ? SeverityNumber.ERROR : SeverityNumber.WARN, + severityText: statusCode >= 500 ? 'ERROR' : 'WARN', + body: `HTTP ${statusCode} ${request.method} ${(request.routeOptions as { url?: string })?.url ?? request.url}`, + attributes: { + 'error.type': classifyByStatus(statusCode), + 'http.route': (request.routeOptions as { url?: string })?.url ?? '', + 'http.request.method': request.method, + 'http.response.status_code': statusCode, + 'trace.id': request.traceId ?? '', + }, + }); + } + span.end(); + } + done(); + }); + + const requestCounter = meter.createCounter('http.server.requests.total', { + description: 'Total number of HTTP requests, by route, method, and status code', + }); + const nominatim = new NominatimClient(); const githubClient = new GithubClient(); const turnstileClient = new TurnstileClient(); @@ -88,7 +196,7 @@ const start = async () => { }, async (request, reply) => { const { query } = request.query as { query: string }; reply.header('Cache-Control', 'public, max-age=86400, s-maxage=86400'); - const result = await nominatim.geocodeSingleResult(query); + const result = await nominatim.geocodeSingleResult(query, request.span); if (!result) { return reply.status(404).send({ error: 'No results found' }); } @@ -115,7 +223,7 @@ const start = async () => { }, async (request, reply) => { const { query } = request.query as { query: string }; reply.header('Cache-Control', 'public, max-age=86400, s-maxage=86400'); - const result = await nominatim.geocodePhrase(query); + const result = await nominatim.geocodePhrase(query, false, request.span); return result; }); @@ -135,7 +243,7 @@ const start = async () => { }, async (request, reply) => { const { username } = request.query as { username?: string }; reply.header('Cache-Control', 'public, max-age=60, s-maxage=600'); - const result = await githubClient.getSponsors(username || 'frillweeman'); + const result = await githubClient.getSponsors(username || 'frillweeman', request.span); return result; }); @@ -152,12 +260,12 @@ const start = async () => { const { name, email, topic, subject, message, turnstileToken } = request.body as ContactMessageBody; const remoteIp = request.ip; - const valid = await turnstileClient.verify(turnstileToken, remoteIp); + const valid = await turnstileClient.verify(turnstileToken, remoteIp, request.span); if (!valid) { return reply.status(400).send({ error: 'Invalid captcha' }); } - await zammadClient.createTicket({ name, email, topic, subject, message }); + await zammadClient.createTicket({ name, email, topic, subject, message }, request.span); return reply.status(201).send({}); }); diff --git a/api/services/GithubClient.ts b/api/services/GithubClient.ts index a268c15..346134c 100644 --- a/api/services/GithubClient.ts +++ b/api/services/GithubClient.ts @@ -1,5 +1,7 @@ import { Type, Static } from '@sinclair/typebox'; +import { type Span, SpanKind, SpanStatusCode, context, trace } from '@opentelemetry/api'; +import { tracer } from '../telemetry'; const GITHUB_TOKEN = process.env.GITHUB_TOKEN || ''; const graphQLEndpoint = 'https://api.github.com/graphql'; @@ -18,22 +20,37 @@ export type Sponsor = Static; export const SponsorsResponseSchema = Type.Array(SponsorSchema); export class GithubClient { - async getSponsors(username: string): Promise { - const query = `query { user(login: \"${username}\") { sponsorshipsAsMaintainer(first: 100) { nodes { sponsor { login name avatarUrl url } } } } }`; + async getSponsors(username: string, parentSpan?: Span): Promise { + const query = `query { user(login: "${username}") { sponsorshipsAsMaintainer(first: 100) { nodes { sponsor { login name avatarUrl url } } } } }`; const body = JSON.stringify({ query, variables: '' }); - const response = await fetch(graphQLEndpoint, { - method: 'POST', - headers: { - 'Authorization': `Bearer ${GITHUB_TOKEN}`, - 'User-Agent': 'Shotgun', - 'Content-Type': 'application/json', - }, - body, - }); - if (!response.ok) { - throw new Error(`Failed to get sponsors: ${response.status}`); + const ctx = parentSpan ? trace.setSpan(context.active(), parentSpan) : context.active(); + const span = tracer.startSpan('github.getSponsors', { + kind: SpanKind.CLIENT, + attributes: { 'peer.service': 'github', 'http.request.method': 'POST' }, + }, ctx); + try { + const response = await fetch(graphQLEndpoint, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${GITHUB_TOKEN}`, + 'User-Agent': 'Shotgun', + 'Content-Type': 'application/json', + }, + body, + }); + span.setAttribute('http.response.status_code', response.status); + if (!response.ok) { + throw new Error(`Failed to get sponsors: ${response.status}`); + } + const json = await response.json(); + return json?.data?.user?.sponsorshipsAsMaintainer?.nodes || []; + } catch (err) { + span.recordException(err as Error); + span.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message }); + span.setAttribute('error.type', 'upstream_error'); + throw err; + } finally { + span.end(); } - const json = await response.json(); - return json?.data?.user?.sponsorshipsAsMaintainer?.nodes || []; } } diff --git a/api/services/NominatimClient.ts b/api/services/NominatimClient.ts index 2880e6a..636e23a 100644 --- a/api/services/NominatimClient.ts +++ b/api/services/NominatimClient.ts @@ -1,6 +1,8 @@ import { createCache, Cache } from 'cache-manager'; import { Type, Static } from '@sinclair/typebox'; +import { type Span, SpanKind, SpanStatusCode, context, trace } from '@opentelemetry/api'; +import { tracer, otelLogger, SeverityNumber } from '../telemetry'; const { DiskStore } = require('cache-manager-fs-hash'); export const NominatimResultSchema = Type.Object({ @@ -53,7 +55,7 @@ const cache: Cache = createCache({ export class NominatimClient { baseUrl = 'https://nominatim.openstreetmap.org/search'; - async geocodePhrase(query: string, includeGeoJson: boolean = false): Promise { + async geocodePhrase(query: string, includeGeoJson: boolean = false, parentSpan?: Span): Promise { const cacheKey = `geocode:${query}`; const cached = await cache.get(cacheKey); if (cached) { @@ -61,21 +63,46 @@ export class NominatimClient { } const geojsonParam = includeGeoJson ? '&polygon_geojson=1' : ''; const url = `${this.baseUrl}?q=${encodeURIComponent(query)}&format=json&addressdetails=1&limit=8&countrycodes=us&dedupe=1${geojsonParam}`; - const response = await fetch(url, { - headers: { - 'User-Agent': 'DeFlock/1.2', - }, - }); - if (!response.ok) { - throw new Error(`Failed to geocode phrase: ${response.status}`); + const ctx = parentSpan ? trace.setSpan(context.active(), parentSpan) : context.active(); + const span = tracer.startSpan('nominatim.geocode', { + kind: SpanKind.CLIENT, + attributes: { 'peer.service': 'nominatim', 'http.request.method': 'GET' }, + }, ctx); + try { + const response = await fetch(url, { + headers: { 'User-Agent': 'DeFlock/1.2' }, + }); + span.setAttribute('http.response.status_code', response.status); + if (!response.ok) { + const body = await response.text(); + otelLogger.emit({ + severityNumber: SeverityNumber.ERROR, + severityText: 'ERROR', + body: `Nominatim error: ${response.status}`, + attributes: { + 'nominatim.status_code': response.status, + 'nominatim.response_body': body, + 'http.url': url, + }, + context: trace.setSpan(context.active(), span), + }); + throw new Error(`Failed to geocode phrase: ${response.status}`); + } + const json = await response.json(); + await cache.set(cacheKey, json); + return json; + } catch (err) { + span.recordException(err as Error); + span.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message }); + span.setAttribute('error.type', 'upstream_error'); + throw err; + } finally { + span.end(); } - const json = await response.json(); - await cache.set(cacheKey, json); - return json; } - async geocodeSingleResult(query: string): Promise { - const results = await this.geocodePhrase(query, true); + async geocodeSingleResult(query: string, parentSpan?: Span): Promise { + const results = await this.geocodePhrase(query, true, parentSpan); if (!results.length) return null; diff --git a/api/services/TurnstileClient.ts b/api/services/TurnstileClient.ts index 856a017..7fcfd4d 100644 --- a/api/services/TurnstileClient.ts +++ b/api/services/TurnstileClient.ts @@ -1,25 +1,47 @@ +import { type Span, SpanKind, SpanStatusCode, context, trace } from '@opentelemetry/api'; +import { tracer } from '../telemetry'; + const TURNSTILE_SECRET_KEY = process.env.TURNSTILE_SECRET_KEY || ''; const SITEVERIFY_URL = 'https://challenges.cloudflare.com/turnstile/v0/siteverify'; export class TurnstileClient { - async verify(token: string, remoteIp?: string): Promise { + async verify(token: string, remoteIp?: string, parentSpan?: Span): Promise { const body = new URLSearchParams({ secret: TURNSTILE_SECRET_KEY, response: token, ...(remoteIp ? { remoteip: remoteIp } : {}), }); - - const response = await fetch(SITEVERIFY_URL, { - method: 'POST', - headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, - body: body.toString(), - }); - - if (!response.ok) { - throw new Error(`Turnstile siteverify request failed: ${response.status}`); + const ctx = parentSpan ? trace.setSpan(context.active(), parentSpan) : context.active(); + const span = tracer.startSpan('turnstile.verify', { + kind: SpanKind.CLIENT, + attributes: { 'peer.service': 'turnstile', 'http.request.method': 'POST' }, + }, ctx); + try { + const response = await fetch(SITEVERIFY_URL, { + method: 'POST', + headers: { 'Content-Type': 'application/x-www-form-urlencoded' }, + body: body.toString(), + }); + span.setAttribute('http.response.status_code', response.status); + if (!response.ok) { + throw new Error(`Turnstile siteverify request failed: ${response.status}`); + } + const json = await response.json() as { success: boolean }; + const success = json.success === true; + if (!success) { + span.setAttribute('error.type', 'captcha_failure'); + span.setStatus({ code: SpanStatusCode.ERROR, message: 'Captcha verification failed' }); + } + return success; + } catch (err) { + span.recordException(err as Error); + span.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message }); + if (!(err instanceof Error && err.message.startsWith('Turnstile'))) { + span.setAttribute('error.type', 'upstream_error'); + } + throw err; + } finally { + span.end(); } - - const json = await response.json() as { success: boolean }; - return json.success === true; } } diff --git a/api/services/ZammadClient.ts b/api/services/ZammadClient.ts index f1e5937..9a6beea 100644 --- a/api/services/ZammadClient.ts +++ b/api/services/ZammadClient.ts @@ -1,4 +1,6 @@ import { Type, Static } from '@sinclair/typebox'; +import { type Span, SpanKind, SpanStatusCode, context, trace } from '@opentelemetry/api'; +import { tracer } from '../telemetry'; const ZAMMAD_URL = process.env.ZAMMAD_URL || ''; const ZAMMAD_TOKEN = process.env.ZAMMAD_TOKEN || ''; @@ -44,76 +46,114 @@ export interface CreateTicketPayload { } export class ZammadClient { - private async upsertCustomer(name: string, email: string): Promise { + private async upsertCustomer(name: string, email: string, parentSpan: Span): Promise { const normalizedEmail = email.toLowerCase(); + const ctx = trace.setSpan(context.active(), parentSpan); // Search for existing user by email - const searchResponse = await fetch( - `${ZAMMAD_URL}/api/v1/users/search?query=${encodeURIComponent(normalizedEmail)}&limit=1`, - { - headers: { 'Authorization': `Token token=${ZAMMAD_TOKEN}` }, + const searchSpan = tracer.startSpan('zammad.upsertCustomer.search', { + kind: SpanKind.CLIENT, + attributes: { 'peer.service': 'zammad', 'http.request.method': 'GET' }, + }, ctx); + try { + const searchResponse = await fetch( + `${ZAMMAD_URL}/api/v1/users/search?query=${encodeURIComponent(normalizedEmail)}&limit=1`, + { + headers: { 'Authorization': `Token token=${ZAMMAD_TOKEN}` }, + } + ); + searchSpan.setAttribute('http.response.status_code', searchResponse.status); + if (searchResponse.ok) { + const users = await searchResponse.json() as Array<{ id: number; email: string }>; + const match = users.find(u => u.email?.toLowerCase() === normalizedEmail); + if (match) return match.id; } - ); - - if (searchResponse.ok) { - const users = await searchResponse.json() as Array<{ id: number; email: string }>; - const match = users.find(u => u.email?.toLowerCase() === normalizedEmail); - if (match) return match.id; + } catch (err) { + searchSpan.recordException(err as Error); + searchSpan.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message }); + throw err; + } finally { + searchSpan.end(); } // Create the customer if not found - const createResponse = await fetch(`${ZAMMAD_URL}/api/v1/users`, { - method: 'POST', - headers: { - 'Authorization': `Token token=${ZAMMAD_TOKEN}`, - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ firstname: name, email: normalizedEmail, roles: ['Customer'] }), - }); - - if (!createResponse.ok) { - const text = await createResponse.text(); - throw new Error(`Zammad customer creation failed: ${createResponse.status} ${text}`); + const createSpan = tracer.startSpan('zammad.upsertCustomer.create', { + kind: SpanKind.CLIENT, + attributes: { 'peer.service': 'zammad', 'http.request.method': 'POST' }, + }, ctx); + try { + const createResponse = await fetch(`${ZAMMAD_URL}/api/v1/users`, { + method: 'POST', + headers: { + 'Authorization': `Token token=${ZAMMAD_TOKEN}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ firstname: name, email: normalizedEmail, roles: ['Customer'] }), + }); + createSpan.setAttribute('http.response.status_code', createResponse.status); + if (!createResponse.ok) { + const text = await createResponse.text(); + throw new Error(`Zammad customer creation failed: ${createResponse.status} ${text}`); + } + const user = await createResponse.json() as { id: number }; + return user.id; + } catch (err) { + createSpan.recordException(err as Error); + createSpan.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message }); + throw err; + } finally { + createSpan.end(); } - - const user = await createResponse.json() as { id: number }; - return user.id; } async createTicket(payload: CreateTicketPayload): Promise { const { name, email, topic, subject, message } = payload; const group = TOPIC_GROUP_MAP[topic]; - const customerId = await this.upsertCustomer(name, email); - - const body = JSON.stringify({ - title: subject, - group, - priority: topic === 'media' ? '3 high' : '2 normal', - customer_id: customerId, - article: { - subject, - body: message, - type: 'email', - sender: 'Customer', - from: `${name} <${email}>`, - to: 'contact@deflock.org', - internal: false, - }, + const span = tracer.startSpan('zammad.createTicket', { + kind: SpanKind.CLIENT, + attributes: { 'peer.service': 'zammad', 'http.request.method': 'POST' }, }); + try { + const customerId = await this.upsertCustomer(name, email, span); - const response = await fetch(`${ZAMMAD_URL}/api/v1/tickets`, { - method: 'POST', - headers: { - 'Authorization': `Token token=${ZAMMAD_TOKEN}`, - 'Content-Type': 'application/json', - }, - body, - }); + const body = JSON.stringify({ + title: subject, + group, + priority: topic === 'media' ? '3 high' : '2 normal', + customer_id: customerId, + article: { + subject, + body: message, + type: 'email', + sender: 'Customer', + from: `${name} <${email}>`, + to: 'contact@deflock.org', + internal: false, + }, + }); - if (!response.ok) { - const text = await response.text(); - throw new Error(`Zammad ticket creation failed: ${response.status} ${text}`); + const ctx = trace.setSpan(context.active(), span); + const response = await context.with(ctx, () => fetch(`${ZAMMAD_URL}/api/v1/tickets`, { + method: 'POST', + headers: { + 'Authorization': `Token token=${ZAMMAD_TOKEN}`, + 'Content-Type': 'application/json', + }, + body, + })); + span.setAttribute('http.response.status_code', response.status); + if (!response.ok) { + const text = await response.text(); + throw new Error(`Zammad ticket creation failed: ${response.status} ${text}`); + } + } catch (err) { + span.recordException(err as Error); + span.setStatus({ code: SpanStatusCode.ERROR, message: (err as Error).message }); + span.setAttribute('error.type', 'upstream_error'); + throw err; + } finally { + span.end(); } } } diff --git a/api/telemetry.ts b/api/telemetry.ts new file mode 100644 index 0000000..2d6469a --- /dev/null +++ b/api/telemetry.ts @@ -0,0 +1,49 @@ +import { NodeTracerProvider, BatchSpanProcessor } from '@opentelemetry/sdk-trace-node'; +import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http'; +import { LoggerProvider, BatchLogRecordProcessor } from '@opentelemetry/sdk-logs'; +import { OTLPLogExporter } from '@opentelemetry/exporter-logs-otlp-http'; +import { MeterProvider, PeriodicExportingMetricReader } from '@opentelemetry/sdk-metrics'; +import { OTLPMetricExporter } from '@opentelemetry/exporter-metrics-otlp-http'; +import { resourceFromAttributes } from '@opentelemetry/resources'; +import { trace, metrics } from '@opentelemetry/api'; +import { logs, SeverityNumber } from '@opentelemetry/api-logs'; + +export { SeverityNumber }; + +const OTEL_ENDPOINT = process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? 'http://127.0.0.1:4318'; + +const resource = resourceFromAttributes({ + 'service.name': 'deflock-api', + 'deployment.environment': process.env.NODE_ENV ?? 'production', +}); + +const tracerProvider = new NodeTracerProvider({ + resource, + spanProcessors: [ + new BatchSpanProcessor(new OTLPTraceExporter({ url: `${OTEL_ENDPOINT}/v1/traces` })), + ], +}); +tracerProvider.register(); + +const loggerProvider = new LoggerProvider({ + resource, + processors: [ + new BatchLogRecordProcessor(new OTLPLogExporter({ url: `${OTEL_ENDPOINT}/v1/logs` })), + ], +}); +logs.setGlobalLoggerProvider(loggerProvider); + +const meterProvider = new MeterProvider({ + resource, + readers: [ + new PeriodicExportingMetricReader({ + exporter: new OTLPMetricExporter({ url: `${OTEL_ENDPOINT}/v1/metrics` }), + exportIntervalMillis: 10_000, + }), + ], +}); +metrics.setGlobalMeterProvider(meterProvider); + +export const tracer = trace.getTracer('deflock-api', '1.0.0'); +export const otelLogger = logs.getLogger('deflock-api', '1.0.0'); +export const meter = metrics.getMeter('deflock-api', '1.0.0');