This repository has been archived by the owner on Feb 20, 2022. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathExporterLogic.cs
110 lines (94 loc) · 4.08 KB
/
ExporterLogic.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
using Axinom.Toolkit;
using Prometheus;
using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
namespace DockerExporter
{
public sealed class ExporterLogic
{
public string DockerUrl { get; set; }
public ExporterLogic()
{
// Default value only valid if not running as container.
// This is intended for development purposes only.
if (Helpers.Environment.IsMicrosoftOperatingSystem())
{
DockerUrl = "npipe://./pipe/docker_engine";
}
else
{
DockerUrl = "unix:///var/run/docker.sock";
}
}
public async Task RunAsync(CancellationToken cancel)
{
_log.Info($"Configured to probe Docker on {DockerUrl}");
_tracker = new DockerTracker(new Uri(DockerUrl));
Metrics.DefaultRegistry.AddBeforeCollectCallback(UpdateMetricsAsync);
var server = new MetricServer(9417);
#if DEBUG
_log.Info($"Open http://localhost:9417/metrics to initiate a probe.");
#endif
server.Start();
while (!cancel.IsCancellationRequested)
{
try
{
await Task.Delay(-1, cancel);
}
catch (TaskCanceledException) when (cancel.IsCancellationRequested)
{
// Totally normal - we are exiting.
break;
}
}
await server.StopAsync();
}
private DockerTracker? _tracker;
/// <summary>
/// Called before every Prometheus collection in order to update metrics.
/// </summary>
/// <remarks>
/// The Docker API can be very slow at times, so there is a risk that the scrape will
/// just time out under load. To avoid that, we enforce a maximum update duration and
/// will give up on fetching new values if the update takes longer than that. If the
/// threshold is crossed, we simply allow the scrape to proceed with stale data, while
/// the update keeps running in the background, hopefully eventually succeeding.
///
/// If multiple parallel scrapes are made, the results from the first one will be used
/// to satisfy all requests that come in while the data loading triggered by the first
/// scrape is still being performed (even if we give up with the scrape before loading finishes).
/// This acts as a primitive form of rate control to avoid overloading the fragile Docker API.
/// The implementation for this is in DockerTracker.
/// </remarks>
private async Task UpdateMetricsAsync(CancellationToken cancel)
{
_log.Debug("Probing Docker.");
using var inlineCancellation = new CancellationTokenSource(Constants.MaxInlineUpdateDuration);
using var combinedCancellation = CancellationTokenSource.CreateLinkedTokenSource(inlineCancellation.Token, cancel);
var updateTask = _tracker!.TryUpdateAsync()
.WithAbandonment(combinedCancellation.Token);
try
{
await updateTask;
}
catch (TaskCanceledException) when (inlineCancellation.IsCancellationRequested)
{
_log.Debug("Probe took too long - will return stale results and finish probe in background.");
// This is expected if it goes above the inline threshold, and will be ignored.
// Other exceptions are caught, logged, and ignored in DockerState itself.
ExporterLogicMetrics.InlineTimeouts.Inc();
}
catch (Exception ex)
{
// Errors that reach this point are fatal errors that we should never hide.
_log.Error(Helpers.Debug.GetAllExceptionMessages(ex));
_log.Error(ex.ToString());
Environment.Exit(-1);
}
}
private static readonly LogSource _log = Log.Default;
}
}