mirror of
https://github.com/Control-D-Inc/ctrld.git
synced 2026-05-15 00:50:25 +02:00
committed by
Cuong Manh Le
parent
44352f8006
commit
71f26a6d81
+12
-1
@@ -719,6 +719,10 @@ NOTE: Uninstalling will set DNS to values provided by DHCP.`,
|
||||
sort.Strings(s)
|
||||
return s
|
||||
}
|
||||
// If metrics is enabled, server set this for all clients, so we can check only the first one.
|
||||
// Ideally, we may have a field in response to indicate that query count should be shown, but
|
||||
// it would break earlier version of ctrld, which only look list of clients in response.
|
||||
withQueryCount := len(clients) > 0 && clients[0].IncludeQueryCount
|
||||
data := make([][]string, len(clients))
|
||||
for i, c := range clients {
|
||||
row := []string{
|
||||
@@ -727,10 +731,17 @@ NOTE: Uninstalling will set DNS to values provided by DHCP.`,
|
||||
c.Mac,
|
||||
strings.Join(map2Slice(c.Source), ","),
|
||||
}
|
||||
if withQueryCount {
|
||||
row = append(row, strconv.FormatInt(c.QueryCount, 10))
|
||||
}
|
||||
data[i] = row
|
||||
}
|
||||
table := tablewriter.NewWriter(os.Stdout)
|
||||
table.SetHeader([]string{"IP", "Hostname", "Mac", "Discovered"})
|
||||
headers := []string{"IP", "Hostname", "Mac", "Discovered"}
|
||||
if withQueryCount {
|
||||
headers = append(headers, "Queries")
|
||||
}
|
||||
table.SetHeader(headers)
|
||||
table.SetAutoFormatHeaders(false)
|
||||
table.AppendBulk(data)
|
||||
table.Render()
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
dto "github.com/prometheus/client_model/go"
|
||||
|
||||
"github.com/Control-D-Inc/ctrld"
|
||||
)
|
||||
|
||||
@@ -66,6 +68,25 @@ func (p *prog) registerControlServerHandler() {
|
||||
sort.Slice(clients, func(i, j int) bool {
|
||||
return clients[i].IP.Less(clients[j].IP)
|
||||
})
|
||||
if p.cfg.Service.MetricsQueryStats {
|
||||
for _, client := range clients {
|
||||
client.IncludeQueryCount = true
|
||||
dm := &dto.Metric{}
|
||||
m, err := statsClientQueriesCount.MetricVec.GetMetricWithLabelValues(
|
||||
client.IP.String(),
|
||||
client.Mac,
|
||||
client.Hostname,
|
||||
)
|
||||
if err != nil {
|
||||
mainLog.Load().Debug().Err(err).Msgf("could not get metrics for client: %v", client)
|
||||
continue
|
||||
}
|
||||
if err := m.Write(dm); err == nil {
|
||||
client.QueryCount = int64(dm.Counter.GetValue())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(w).Encode(&clients); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
|
||||
+55
-11
@@ -54,6 +54,14 @@ type proxyRequest struct {
|
||||
ufr *upstreamForResult
|
||||
}
|
||||
|
||||
// proxyResponse contains data for proxying a DNS response from upstream.
|
||||
type proxyResponse struct {
|
||||
answer *dns.Msg
|
||||
cached bool
|
||||
clientInfo bool
|
||||
upstream string
|
||||
}
|
||||
|
||||
// upstreamForResult represents the result of processing rules for a request.
|
||||
type upstreamForResult struct {
|
||||
upstreams []string
|
||||
@@ -101,26 +109,49 @@ func (p *prog) serveDNS(listenerNum string) error {
|
||||
fmtSrcToDest := fmtRemoteToLocal(listenerNum, ci.Hostname, remoteAddr.String())
|
||||
t := time.Now()
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "QUERY: %s: %s %s", fmtSrcToDest, dns.TypeToString[q.Qtype], domain)
|
||||
res := p.upstreamFor(ctx, listenerNum, listenerConfig, remoteAddr, ci.Mac, domain)
|
||||
ur := p.upstreamFor(ctx, listenerNum, listenerConfig, remoteAddr, ci.Mac, domain)
|
||||
|
||||
labelValues := make([]string, 0, len(statsQueriesCountLabels))
|
||||
labelValues = append(labelValues, net.JoinHostPort(listenerConfig.IP, strconv.Itoa(listenerConfig.Port)))
|
||||
labelValues = append(labelValues, ci.IP)
|
||||
labelValues = append(labelValues, ci.Mac)
|
||||
labelValues = append(labelValues, ci.Hostname)
|
||||
|
||||
var answer *dns.Msg
|
||||
if !res.matched && listenerConfig.Restricted {
|
||||
if !ur.matched && listenerConfig.Restricted {
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "query refused, %s does not match any network policy", remoteAddr.String())
|
||||
answer = new(dns.Msg)
|
||||
answer.SetRcode(m, dns.RcodeRefused)
|
||||
labelValues = append(labelValues, "") // no upstream
|
||||
} else {
|
||||
var failoverRcode []int
|
||||
if listenerConfig.Policy != nil {
|
||||
failoverRcode = listenerConfig.Policy.FailoverRcodeNumbers
|
||||
}
|
||||
answer = p.proxy(ctx, &proxyRequest{
|
||||
pr := p.proxy(ctx, &proxyRequest{
|
||||
msg: m,
|
||||
ci: ci,
|
||||
failoverRcodes: failoverRcode,
|
||||
ufr: res,
|
||||
ufr: ur,
|
||||
})
|
||||
answer = pr.answer
|
||||
rtt := time.Since(t)
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "received response of %d bytes in %s", answer.Len(), rtt)
|
||||
upstream := pr.upstream
|
||||
switch {
|
||||
case pr.cached:
|
||||
upstream = "cache"
|
||||
case pr.clientInfo:
|
||||
upstream = "client_info_table"
|
||||
}
|
||||
labelValues = append(labelValues, upstream)
|
||||
}
|
||||
labelValues = append(labelValues, dns.TypeToString[q.Qtype])
|
||||
labelValues = append(labelValues, dns.RcodeToString[answer.Rcode])
|
||||
go func() {
|
||||
p.WithLabelValuesInc(statsQueriesCount, labelValues...)
|
||||
p.WithLabelValuesInc(statsClientQueriesCount, []string{ci.IP, ci.Mac, ci.Hostname}...)
|
||||
}()
|
||||
if err := w.WriteMsg(answer); err != nil {
|
||||
ctrld.Log(ctx, mainLog.Load().Error().Err(err), "serveDNS: failed to send DNS response to client")
|
||||
}
|
||||
@@ -360,7 +391,7 @@ func (p *prog) proxyLanHostnameQuery(ctx context.Context, msg *dns.Msg) *dns.Msg
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *prog) proxy(ctx context.Context, req *proxyRequest) *dns.Msg {
|
||||
func (p *prog) proxy(ctx context.Context, req *proxyRequest) *proxyResponse {
|
||||
var staleAnswer *dns.Msg
|
||||
upstreams := req.ufr.upstreams
|
||||
serveStaleCache := p.cache != nil && p.cfg.Service.CacheServeStale
|
||||
@@ -370,6 +401,8 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *dns.Msg {
|
||||
upstreams = []string{upstreamOS}
|
||||
}
|
||||
|
||||
res := &proxyResponse{}
|
||||
|
||||
// LAN/PTR lookup flow:
|
||||
//
|
||||
// 1. If there's matching rule, follow it.
|
||||
@@ -384,14 +417,18 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *dns.Msg {
|
||||
case isPrivatePtrLookup(req.msg):
|
||||
isLanOrPtrQuery = true
|
||||
if answer := p.proxyPrivatePtrLookup(ctx, req.msg); answer != nil {
|
||||
return answer
|
||||
res.answer = answer
|
||||
res.clientInfo = true
|
||||
return res
|
||||
}
|
||||
upstreams, upstreamConfigs = p.upstreamsAndUpstreamConfigForLanAndPtr(upstreams, upstreamConfigs)
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "private PTR lookup, using upstreams: %v", upstreams)
|
||||
case isLanHostnameQuery(req.msg):
|
||||
isLanOrPtrQuery = true
|
||||
if answer := p.proxyLanHostnameQuery(ctx, req.msg); answer != nil {
|
||||
return answer
|
||||
res.answer = answer
|
||||
res.clientInfo = true
|
||||
return res
|
||||
}
|
||||
upstreams, upstreamConfigs = p.upstreamsAndUpstreamConfigForLanAndPtr(upstreams, upstreamConfigs)
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "lan hostname lookup, using upstreams: %v", upstreams)
|
||||
@@ -413,7 +450,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *dns.Msg {
|
||||
if cachedValue.Expire.After(now) {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "hit cached response")
|
||||
setCachedAnswerTTL(answer, now, cachedValue.Expire)
|
||||
return answer
|
||||
res.answer = answer
|
||||
res.cached = true
|
||||
return res
|
||||
}
|
||||
staleAnswer = answer
|
||||
}
|
||||
@@ -475,7 +514,9 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *dns.Msg {
|
||||
ctrld.Log(ctx, mainLog.Load().Debug(), "serving stale cached response")
|
||||
now := time.Now()
|
||||
setCachedAnswerTTL(staleAnswer, now, now.Add(staleTTL))
|
||||
return staleAnswer
|
||||
res.answer = staleAnswer
|
||||
res.cached = true
|
||||
return res
|
||||
}
|
||||
continue
|
||||
}
|
||||
@@ -509,12 +550,15 @@ func (p *prog) proxy(ctx context.Context, req *proxyRequest) *dns.Msg {
|
||||
hostname = req.ci.Hostname
|
||||
}
|
||||
ctrld.Log(ctx, mainLog.Load().Info(), "REPLY: %s -> %s (%s): %s", upstreams[n], req.ufr.srcAddr, hostname, dns.RcodeToString[answer.Rcode])
|
||||
return answer
|
||||
res.answer = answer
|
||||
res.upstream = upstreamConfig.Endpoint
|
||||
return res
|
||||
}
|
||||
ctrld.Log(ctx, mainLog.Load().Error(), "all %v endpoints failed", upstreams)
|
||||
answer := new(dns.Msg)
|
||||
answer.SetRcode(req.msg, dns.RcodeServerFailure)
|
||||
return answer
|
||||
res.answer = answer
|
||||
return res
|
||||
}
|
||||
|
||||
func (p *prog) upstreamsAndUpstreamConfigForLanAndPtr(upstreams []string, upstreamConfigs []*ctrld.UpstreamConfig) ([]string, []*ctrld.UpstreamConfig) {
|
||||
|
||||
@@ -187,8 +187,8 @@ func TestCache(t *testing.T) {
|
||||
got1 := prog.proxy(context.Background(), req1)
|
||||
got2 := prog.proxy(context.Background(), req2)
|
||||
assert.NotSame(t, got1, got2)
|
||||
assert.Equal(t, answer1.Rcode, got1.Rcode)
|
||||
assert.Equal(t, answer2.Rcode, got2.Rcode)
|
||||
assert.Equal(t, answer1.Rcode, got1.answer.Rcode)
|
||||
assert.Equal(t, answer2.Rcode, got2.answer.Rcode)
|
||||
}
|
||||
|
||||
func Test_ipAndMacFromMsg(t *testing.T) {
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net"
|
||||
"net/http"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/prometheus/prom2json"
|
||||
)
|
||||
|
||||
// metricsServer represents a server to expose Prometheus metrics via HTTP.
|
||||
type metricsServer struct {
|
||||
server *http.Server
|
||||
mux *http.ServeMux
|
||||
reg *prometheus.Registry
|
||||
addr string
|
||||
started bool
|
||||
}
|
||||
|
||||
// newMetricsServer returns new metrics server.
|
||||
func newMetricsServer(addr string, reg *prometheus.Registry) (*metricsServer, error) {
|
||||
mux := http.NewServeMux()
|
||||
ms := &metricsServer{
|
||||
server: &http.Server{Handler: mux},
|
||||
mux: mux,
|
||||
reg: reg,
|
||||
}
|
||||
ms.addr = addr
|
||||
ms.registerMetricsServerHandler()
|
||||
return ms, nil
|
||||
}
|
||||
|
||||
// register adds handlers for given pattern.
|
||||
func (ms *metricsServer) register(pattern string, handler http.Handler) {
|
||||
ms.mux.Handle(pattern, handler)
|
||||
}
|
||||
|
||||
// registerMetricsServerHandler adds handlers for metrics server.
|
||||
func (ms *metricsServer) registerMetricsServerHandler() {
|
||||
ms.register("/metrics", promhttp.HandlerFor(
|
||||
ms.reg,
|
||||
promhttp.HandlerOpts{
|
||||
EnableOpenMetrics: true,
|
||||
Timeout: 10 * time.Second,
|
||||
},
|
||||
))
|
||||
ms.register("/metrics/json", jsonResponse(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
g := prometheus.ToTransactionalGatherer(ms.reg)
|
||||
mfs, done, err := g.Gather()
|
||||
defer done()
|
||||
if err != nil {
|
||||
msg := "could not gather metrics"
|
||||
mainLog.Load().Warn().Err(err).Msg(msg)
|
||||
http.Error(w, msg, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
result := make([]*prom2json.Family, 0, len(mfs))
|
||||
for _, mf := range mfs {
|
||||
result = append(result, prom2json.NewFamily(mf))
|
||||
}
|
||||
if err := json.NewEncoder(w).Encode(result); err != nil {
|
||||
msg := "could not marshal metrics result"
|
||||
mainLog.Load().Warn().Err(err).Msg(msg)
|
||||
http.Error(w, msg, http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
})))
|
||||
}
|
||||
|
||||
// start runs the metricsServer.
|
||||
func (ms *metricsServer) start() error {
|
||||
listener, err := net.Listen("tcp", ms.addr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go ms.server.Serve(listener)
|
||||
ms.started = true
|
||||
return nil
|
||||
}
|
||||
|
||||
// stop shutdowns the metricsServer within 2 seconds timeout.
|
||||
func (ms *metricsServer) stop() error {
|
||||
if !ms.started {
|
||||
return nil
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*1)
|
||||
defer cancel()
|
||||
return ms.server.Shutdown(ctx)
|
||||
}
|
||||
|
||||
// runMetricsServer initializes metrics stats and runs the metrics server if enabled.
|
||||
func (p *prog) runMetricsServer(ctx context.Context, reloadCh chan struct{}) {
|
||||
if !p.metricsEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
// Reset all stats.
|
||||
statsVersion.Reset()
|
||||
statsQueriesCount.Reset()
|
||||
statsClientQueriesCount.Reset()
|
||||
|
||||
reg := prometheus.NewRegistry()
|
||||
// Register queries count stats if enabled.
|
||||
if cfg.Service.MetricsQueryStats {
|
||||
reg.MustRegister(statsQueriesCount)
|
||||
reg.MustRegister(statsClientQueriesCount)
|
||||
}
|
||||
|
||||
addr := p.cfg.Service.MetricsListener
|
||||
ms, err := newMetricsServer(addr, reg)
|
||||
if err != nil {
|
||||
mainLog.Load().Warn().Err(err).Msg("could not create new metrics server")
|
||||
return
|
||||
}
|
||||
// Only start listener address if defined.
|
||||
if addr != "" {
|
||||
// Go runtime stats.
|
||||
reg.MustRegister(collectors.NewBuildInfoCollector())
|
||||
reg.MustRegister(collectors.NewGoCollector(
|
||||
collectors.WithGoCollectorRuntimeMetrics(collectors.MetricsAll),
|
||||
))
|
||||
// ctrld stats.
|
||||
reg.MustRegister(statsVersion)
|
||||
statsVersion.WithLabelValues(commit, runtime.Version(), curVersion()).Inc()
|
||||
reg.MustRegister(statsTimeStart)
|
||||
statsTimeStart.Set(float64(time.Now().Unix()))
|
||||
mainLog.Load().Debug().Msgf("starting metrics server on: %s", addr)
|
||||
if err := ms.start(); err != nil {
|
||||
mainLog.Load().Warn().Err(err).Msg("could not start metrics server")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case <-p.stopCh:
|
||||
case <-ctx.Done():
|
||||
case <-reloadCh:
|
||||
}
|
||||
|
||||
if err := ms.stop(); err != nil {
|
||||
mainLog.Load().Warn().Err(err).Msg("could not stop metrics server")
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -348,6 +348,13 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) {
|
||||
p.checkDnsLoopTicker(ctx)
|
||||
}()
|
||||
|
||||
wg.Add(1)
|
||||
// Prometheus exporter goroutine.
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
p.runMetricsServer(ctx, reloadCh)
|
||||
}()
|
||||
|
||||
if !reload {
|
||||
// Stop writing log to unix socket.
|
||||
consoleWriter.Out = os.Stdout
|
||||
@@ -365,6 +372,11 @@ func (p *prog) run(reload bool, reloadCh chan struct{}) {
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// metricsEnabled reports whether prometheus exporter is enabled/disabled.
|
||||
func (p *prog) metricsEnabled() bool {
|
||||
return p.cfg.Service.MetricsQueryStats || p.cfg.Service.MetricsListener != ""
|
||||
}
|
||||
|
||||
func (p *prog) Stop(s service.Service) error {
|
||||
mainLog.Load().Info().Msg("Service stopped")
|
||||
close(p.stopCh)
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
package cli
|
||||
|
||||
import "github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
const (
|
||||
metricsLabelListener = "listener"
|
||||
metricsLabelClientSourceIP = "client_source_ip"
|
||||
metricsLabelClientMac = "client_mac"
|
||||
metricsLabelClientHostname = "client_hostname"
|
||||
metricsLabelUpstream = "upstream"
|
||||
metricsLabelRRType = "rr_type"
|
||||
metricsLabelRCode = "rcode"
|
||||
)
|
||||
|
||||
// statsVersion represent ctrld version.
|
||||
var statsVersion = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "ctrld_build_info",
|
||||
Help: "Version of ctrld process.",
|
||||
}, []string{"gitref", "goversion", "version"})
|
||||
|
||||
// statsTimeStart represents start time of ctrld service.
|
||||
var statsTimeStart = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "ctrld_time_seconds",
|
||||
Help: "Start time of the ctrld process since unix epoch in seconds.",
|
||||
})
|
||||
|
||||
var statsQueriesCountLabels = []string{
|
||||
metricsLabelListener,
|
||||
metricsLabelClientSourceIP,
|
||||
metricsLabelClientMac,
|
||||
metricsLabelClientHostname,
|
||||
metricsLabelUpstream,
|
||||
metricsLabelRRType,
|
||||
metricsLabelRCode,
|
||||
}
|
||||
|
||||
// statsQueriesCount counts total number of queries.
|
||||
var statsQueriesCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "ctrld_queries_count",
|
||||
Help: "Total number of queries.",
|
||||
}, statsQueriesCountLabels)
|
||||
|
||||
// statsClientQueriesCount counts total number of queries of a client.
|
||||
//
|
||||
// The labels "client_source_ip", "client_mac", "client_hostname" are unbounded,
|
||||
// thus this stat is highly inefficient if there are many devices.
|
||||
var statsClientQueriesCount = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: "ctrld_client_queries_count",
|
||||
Help: "Total number queries of a client.",
|
||||
}, []string{metricsLabelClientSourceIP, metricsLabelClientMac, metricsLabelClientHostname})
|
||||
|
||||
// WithLabelValuesInc increases prometheus counter by 1 if query stats is enabled.
|
||||
func (p *prog) WithLabelValuesInc(c *prometheus.CounterVec, lvs ...string) {
|
||||
if p.cfg.Service.MetricsQueryStats {
|
||||
c.WithLabelValues(lvs...).Inc()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user