Skip to content

Commit 83dec2e

Browse files
committed
add metrics for managed LBs
1 parent ca777d9 commit 83dec2e

File tree

5 files changed

+127
-22
lines changed

5 files changed

+127
-22
lines changed

Diff for: docs/install/iam_policy.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,8 @@
4242
"elasticloadbalancing:DescribeTags",
4343
"elasticloadbalancing:DescribeTrustStores",
4444
"elasticloadbalancing:DescribeListenerAttributes",
45-
"elasticloadbalancing:DescribeCapacityReservation"
45+
"elasticloadbalancing:DescribeCapacityReservation",
46+
"tag: GetResources"
4647
],
4748
"Resource": "*"
4849
},

Diff for: main.go

+18-3
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,14 @@ func main() {
106106
}
107107

108108
// for open-source LBC, we track the resources with finalizers contains "k8s.aws"
109-
lbcMetricsCollector := lbcmetrics.NewCollector(metrics.Registry, mgr.GetClient(), "k8s.aws")
109+
lbcMetricsCollector := lbcmetrics.NewCollector(
110+
metrics.Registry,
111+
mgr.GetClient(),
112+
cloud.RGT(),
113+
"k8s.aws",
114+
"elbv2.k8s.aws/cluster",
115+
controllerCFG.ClusterName,
116+
)
110117

111118
clientSet, err := kubernetes.NewForConfig(mgr.GetConfig())
112119
if err != nil {
@@ -166,17 +173,25 @@ func main() {
166173

167174
// update the managed resource every 30s
168175
go func() {
169-
ticker := time.NewTicker(30 * time.Second) // Call every 30 seconds
176+
ticker := time.NewTicker(30 * time.Second)
170177
defer ticker.Stop()
171178

172179
for {
173180
select {
174181
case <-ticker.C:
175-
// Update metrics
182+
// Update managed resource metrics
176183
err := lbcMetricsCollector.UpdateManagedK8sResourceMetrics(context.Background())
177184
if err != nil {
178185
setupLog.Error(err, "failed to update managed Kubernetes resource metrics")
179186
}
187+
err = lbcMetricsCollector.UpdateManagedALBMetrics(context.Background())
188+
if err != nil {
189+
setupLog.Error(err, "failed to update managed ALB metrics")
190+
}
191+
err = lbcMetricsCollector.UpdateManagedNLBMetrics(context.Background())
192+
if err != nil {
193+
setupLog.Error(err, "failed to update managed NLB metrics")
194+
}
180195
}
181196
}
182197
}()

Diff for: pkg/metrics/lbc/collector.go

+69-6
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,48 @@ package lbc
22

33
import (
44
"context"
5+
awssdk "github.com/aws/aws-sdk-go-v2/aws"
6+
rgtsdk "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi"
7+
rgttypes "github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi/types"
58
"github.com/prometheus/client_golang/prometheus"
69
corev1 "k8s.io/api/core/v1"
710
networkingv1 "k8s.io/api/networking/v1"
811
elbv2api "sigs.k8s.io/aws-load-balancer-controller/apis/elbv2/v1beta1"
12+
"sigs.k8s.io/aws-load-balancer-controller/pkg/aws/services"
913
"strings"
1014

1115
"sigs.k8s.io/controller-runtime/pkg/client"
1216
"time"
1317
)
1418

15-
const networkLoadBalancerStr = "nlb"
19+
const (
20+
networkLoadBalancerStr = "nlb"
21+
resourceTypeALB = "elasticloadbalancing:loadbalancer/app"
22+
resourceTypeNLB = "elasticloadbalancing:loadbalancer/net"
23+
)
1624

1725
type MetricCollector interface {
1826
// ObservePodReadinessGateReady this metric is useful to determine how fast pods are becoming ready in the load balancer.
1927
// Due to some architectural constraints, we can only emit this metric for pods that are using readiness gates.
2028
ObservePodReadinessGateReady(namespace string, tgbName string, duration time.Duration)
2129

22-
// UpdateMetrics fetches and updates all relevant metrics.
30+
// UpdateManagedK8sResourceMetrics fetches and updates managed k8s resources metrics.
2331
UpdateManagedK8sResourceMetrics(ctx context.Context) error
32+
33+
// UpdateManagedALBMetrics updates managed ALB count metrics
34+
UpdateManagedALBMetrics(ctx context.Context) error
35+
36+
//UpdateManagedNLBMetrics updates managed NLB count metrics
37+
UpdateManagedNLBMetrics(ctx context.Context) error
2438
}
2539

2640
type collector struct {
2741
instruments *instruments
2842
runtimeClient client.Client
43+
rgt services.RGT
2944
finalizerKeyWord string
45+
clusterTagKey string
46+
clusterTagVal string
3047
}
3148

3249
type noOpCollector struct{}
@@ -38,7 +55,15 @@ func (n *noOpCollector) UpdateManagedK8sResourceMetrics(_ context.Context) error
3855
return nil
3956
}
4057

41-
func NewCollector(registerer prometheus.Registerer, runtimeClient client.Client, finalizerKeyWord string) MetricCollector {
58+
func (n *noOpCollector) UpdateManagedALBMetrics(_ context.Context) error {
59+
return nil
60+
}
61+
62+
func (n *noOpCollector) UpdateManagedNLBMetrics(_ context.Context) error {
63+
return nil
64+
}
65+
66+
func NewCollector(registerer prometheus.Registerer, runtimeClient client.Client, rgt services.RGT, finalizerKeyWord string, clusterTagKey string, clusterTagVal string) MetricCollector {
4267
if registerer == nil || runtimeClient == nil {
4368
return &noOpCollector{}
4469
}
@@ -47,7 +72,10 @@ func NewCollector(registerer prometheus.Registerer, runtimeClient client.Client,
4772
return &collector{
4873
instruments: instruments,
4974
runtimeClient: runtimeClient,
75+
rgt: rgt,
5076
finalizerKeyWord: finalizerKeyWord,
77+
clusterTagKey: clusterTagKey,
78+
clusterTagVal: clusterTagVal,
5179
}
5280
}
5381

@@ -77,7 +105,7 @@ func (c *collector) UpdateManagedK8sResourceMetrics(ctx context.Context) error {
77105
}
78106
}
79107
}
80-
c.instruments.ingressCount.Set(float64(ingressCount))
108+
c.instruments.managedIngressCount.Set(float64(ingressCount))
81109

82110
// Fetch service count
83111
serviceList := &corev1.ServiceList{}
@@ -98,7 +126,7 @@ func (c *collector) UpdateManagedK8sResourceMetrics(ctx context.Context) error {
98126
serviceCount++
99127
}
100128
}
101-
c.instruments.serviceCount.Set(float64(serviceCount))
129+
c.instruments.managedServiceCount.Set(float64(serviceCount))
102130

103131
// Fetch TargetGroupBinding count
104132
tgbList := &elbv2api.TargetGroupBindingList{}
@@ -114,7 +142,42 @@ func (c *collector) UpdateManagedK8sResourceMetrics(ctx context.Context) error {
114142
}
115143
}
116144
}
117-
c.instruments.tgbCount.Set(float64(tgbCount))
145+
c.instruments.managedTGBCount.Set(float64(tgbCount))
146+
147+
return nil
148+
}
149+
150+
func (c *collector) UpdateManagedALBMetrics(ctx context.Context) error {
151+
count, err := c.getManagedAWSResourceMetrics(ctx, resourceTypeALB)
152+
if err != nil {
153+
return err
154+
}
155+
c.instruments.managedALBCount.Set(float64(count))
156+
return nil
157+
}
118158

159+
func (c *collector) UpdateManagedNLBMetrics(ctx context.Context) error {
160+
count, err := c.getManagedAWSResourceMetrics(ctx, resourceTypeNLB)
161+
if err != nil {
162+
return err
163+
}
164+
c.instruments.managedNLBCount.Set(float64(count))
119165
return nil
120166
}
167+
168+
func (c *collector) getManagedAWSResourceMetrics(ctx context.Context, resourceType string) (count int, err error) {
169+
req := &rgtsdk.GetResourcesInput{
170+
ResourceTypeFilters: []string{resourceType},
171+
TagFilters: []rgttypes.TagFilter{
172+
{
173+
Key: awssdk.String(c.clusterTagKey),
174+
Values: []string{c.clusterTagVal},
175+
},
176+
},
177+
}
178+
resources, err := c.rgt.GetResourcesAsList(ctx, req)
179+
if err != nil {
180+
return 0, err
181+
}
182+
return len(resources), nil
183+
}

Diff for: pkg/metrics/lbc/instruments.go

+26-12
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ const (
2121

2222
type instruments struct {
2323
podReadinessFlipSeconds *prometheus.HistogramVec
24-
ingressCount prometheus.Gauge
25-
serviceCount prometheus.Gauge
26-
tgbCount prometheus.Gauge
24+
managedIngressCount prometheus.Gauge
25+
managedServiceCount prometheus.Gauge
26+
managedTGBCount prometheus.Gauge
27+
managedALBCount prometheus.Gauge
28+
managedNLBCount prometheus.Gauge
2729
}
2830

2931
// newInstruments allocates and register new metrics to registerer
@@ -34,28 +36,40 @@ func newInstruments(registerer prometheus.Registerer) *instruments {
3436
Help: "Latency from pod getting added to the load balancer until the readiness gate is flipped to healthy.",
3537
Buckets: []float64{10, 30, 60, 120, 180, 240, 300, 360, 420, 480, 540, 600},
3638
}, []string{labelNamespace, labelName})
37-
ingressCount := prometheus.NewGauge(prometheus.GaugeOpts{
39+
managedIngressCount := prometheus.NewGauge(prometheus.GaugeOpts{
3840
Name: "lb_controller_managed_ingress_count",
3941
Help: "Number of ingresses managed by the AWS Load Balancer Controller.",
4042
})
41-
serviceCount := prometheus.NewGauge(prometheus.GaugeOpts{
43+
managedServiceCount := prometheus.NewGauge(prometheus.GaugeOpts{
4244
Name: "lb_controller_managed_service_count",
4345
Help: "Number of service type Load Balancers (NLBs) managed by the AWS Load Balancer Controller.",
4446
})
45-
tgbCount := prometheus.NewGauge(prometheus.GaugeOpts{
47+
managedTGBCount := prometheus.NewGauge(prometheus.GaugeOpts{
4648
Name: "lb_controller_managed_targetgroupbinding_count",
4749
Help: "Number of targetgroupbindings managed by the AWS Load Balancer Controller.",
4850
})
51+
managedALBCount := prometheus.NewGauge(prometheus.GaugeOpts{
52+
Name: "lb_controller_managed_albs_total",
53+
Help: "Current number of ALBs managed by the controller",
54+
})
55+
managedNLBCount := prometheus.NewGauge(prometheus.GaugeOpts{
56+
Name: "lb_controller_managed_nlbs_total",
57+
Help: "Current number of NLBs managed by the controller",
58+
})
4959
registerer.MustRegister(
5060
podReadinessFlipSeconds,
51-
ingressCount,
52-
serviceCount,
53-
tgbCount,
61+
managedIngressCount,
62+
managedServiceCount,
63+
managedTGBCount,
64+
managedALBCount,
65+
managedNLBCount,
5466
)
5567
return &instruments{
5668
podReadinessFlipSeconds: podReadinessFlipSeconds,
57-
ingressCount: ingressCount,
58-
serviceCount: serviceCount,
59-
tgbCount: tgbCount,
69+
managedIngressCount: managedIngressCount,
70+
managedServiceCount: managedServiceCount,
71+
managedTGBCount: managedTGBCount,
72+
managedALBCount: managedALBCount,
73+
managedNLBCount: managedNLBCount,
6074
}
6175
}

Diff for: pkg/metrics/lbc/mockcollector.go

+12
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,18 @@ func (m *MockCollector) UpdateManagedK8sResourceMetrics(ctx context.Context) err
2626
return nil // No-op for the mock
2727
}
2828

29+
// UpdateManagedALBMetrics mocks updating managed ALB resource metrics.
30+
func (m *MockCollector) UpdateManagedALBMetrics(ctx context.Context) error {
31+
m.recordInvocation("UpdateManagedALBMetrics", ctx)
32+
return nil // No-op for the mock
33+
}
34+
35+
// UpdateManagedNLBMetrics mocks updating managed ALB resource metrics.
36+
func (m *MockCollector) UpdateManagedNLBMetrics(ctx context.Context) error {
37+
m.recordInvocation("UpdateManagedALBMetrics", ctx)
38+
return nil // No-op for the mock
39+
}
40+
2941
// recordHistogram adds a histogram metric invocation.
3042
func (m *MockCollector) recordHistogram(metricName string, namespace string, name string, d time.Duration) {
3143
if _, exists := m.Invocations[metricName]; !exists {

0 commit comments

Comments
 (0)