Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[vSphere] add triggered alarm to existing metricsets. #40714

Merged
merged 15 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Add new metrics for the vSphere Virtualmachine metricset. {pull}40485[40485]
- Add support for snapshot in vSphere virtualmachine metricset {pull}40683[40683]
- Update fields to use mapstr in vSphere virtualmachine metricset {pull}40707[40707]
- Add metrics related to alert in all the vSphere metricsets. {pull}40714[40714]
- Add support for period based intervalID in vSphere host and datastore metricsets {pull}40678[40678]

*Metricbeat*
Expand Down
70 changes: 70 additions & 0 deletions metricbeat/docs/fields.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -66955,6 +66955,16 @@ type: keyword

--

*`vsphere.cluster.triggerd_alarms.*`*::
+
--
List of all the triggerd alarms.


type: object

--

[float]
=== datastore

Expand Down Expand Up @@ -67092,6 +67102,16 @@ type: keyword

--

*`vsphere.datastore.triggerd_alarms.*`*::
+
--
List of all the triggerd alarms.


type: object

--

*`vsphere.datastore.vm.count`*::
+
--
Expand Down Expand Up @@ -67195,6 +67215,16 @@ type: long

--

*`vsphere.datastorecluster.triggerd_alarms.*`*::
+
--
List of all the triggerd alarms.


type: object

--

[float]
=== host

Expand Down Expand Up @@ -67552,6 +67582,16 @@ type: keyword

--

*`vsphere.host.triggerd_alarms.*`*::
+
--
List of all the triggerd alarms.


type: object

--

*`vsphere.host.uptime`*::
+
--
Expand Down Expand Up @@ -67681,6 +67721,16 @@ type: long

--

*`vsphere.network.triggerd_alarms.*`*::
+
--
List of all the triggerd alarms.


type: object

--

[float]
=== resourcepool

Expand Down Expand Up @@ -67907,6 +67957,16 @@ type: keyword

--

*`vsphere.resourcepool.triggerd_alarms.*`*::
+
--
List of all the triggerd alarms.


type: object

--

[float]
=== virtualmachine

Expand Down Expand Up @@ -68134,6 +68194,16 @@ type: long

--

*`vsphere.virtualmachine.triggerd_alarms.*`*::
+
--
List of all the triggerd alarms.


type: object

--

[[exported-fields-windows]]
== Windows fields

Expand Down
7 changes: 6 additions & 1 deletion metricbeat/module/vsphere/cluster/_meta/fields.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,9 @@
- name: names
type: keyword
description: >
List of all the Network names associated with the cluster.
List of all the Network names associated with the cluster.
- name: triggerd_alarms.*
type: object
object_type: keyword
description: >
List of all the triggerd alarms.
53 changes: 47 additions & 6 deletions metricbeat/module/vsphere/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"context"
"fmt"
"strings"
"time"

"github.com/elastic/beats/v7/metricbeat/mb"
"github.com/elastic/beats/v7/metricbeat/module/vsphere"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/vmware/govmomi/property"
"github.com/vmware/govmomi/view"
"github.com/vmware/govmomi/vim25/mo"
"github.com/vmware/govmomi/vim25/types"
)

// init registers the MetricSet with the central registry as soon as the program
Expand All @@ -53,6 +55,19 @@ type assetNames struct {
outputHostNames []string
}

type triggerdAlarm struct {
Name string `json:"name"`
ID string `json:"id"`
Status string `json:"status"`
Time time.Time `json:"time"`
Description string `json:"description"`
}

type metricData struct {
assetNames assetNames
triggerdAlarms []triggerdAlarm
}

// New creates a new instance of the MetricSet.
func New(base mb.BaseMetricSet) (mb.MetricSet, error) {
ms, err := vsphere.NewMetricSet(base)
Expand Down Expand Up @@ -97,7 +112,7 @@ func (m *ClusterMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er

// Retrieve summary property for all Clusters
var clt []mo.ClusterComputeResource
err = v.Retrieve(ctx, []string{"ClusterComputeResource"}, []string{"name", "host", "network", "datastore", "configuration"}, &clt)
err = v.Retrieve(ctx, []string{"ClusterComputeResource"}, []string{"name", "host", "network", "datastore", "configuration", "triggeredAlarmState"}, &clt)
if err != nil {
return fmt.Errorf("error in Retrieve: %w", err)
}
Expand All @@ -120,24 +135,29 @@ func (m *ClusterMetricSet) Fetch(ctx context.Context, reporter mb.ReporterV2) er
m.Logger().Warn("Metric das_config.enabled not found")
}

triggerdAlarm, err := getTriggerdAlarm(ctx, pc, clt[i].TriggeredAlarmState)
if err != nil {
m.Logger().Errorf("Failed to retrieve alerts from cluster %s: %w", clt[i].Name, err)
}

reporter.Event(mb.Event{
MetricSetFields: m.mapEvent(clt[i], assetNames),
MetricSetFields: m.mapEvent(clt[i], &metricData{assetNames: assetNames, triggerdAlarms: triggerdAlarm}),
})
}
}
return nil

}

func getAssetNames(ctx context.Context, pc *property.Collector, cl *mo.ClusterComputeResource) (*assetNames, error) {
func getAssetNames(ctx context.Context, pc *property.Collector, cl *mo.ClusterComputeResource) (assetNames, error) {
referenceList := append(cl.Datastore, cl.Host...)

outputDatastoreNames := make([]string, 0, len(cl.Datastore))
outputHostNames := make([]string, 0, len(cl.Host))
if len(referenceList) > 0 {
var objects []mo.ManagedEntity
if err := pc.Retrieve(ctx, referenceList, []string{"name"}, &objects); err != nil {
return nil, fmt.Errorf("failed to retrieve managed entities: %w", err)
return assetNames{}, fmt.Errorf("failed to retrieve managed entities: %w", err)
}

for _, ob := range objects {
Expand All @@ -157,7 +177,7 @@ func getAssetNames(ctx context.Context, pc *property.Collector, cl *mo.ClusterCo
if len(cl.Network) > 0 {
var netObjects []mo.Network
if err := pc.Retrieve(ctx, cl.Network, []string{"name"}, &netObjects); err != nil {
return nil, fmt.Errorf("failed to retrieve network objects: %w", err)
return assetNames{}, fmt.Errorf("failed to retrieve network objects: %w", err)
}

for _, ob := range netObjects {
Expand All @@ -166,9 +186,30 @@ func getAssetNames(ctx context.Context, pc *property.Collector, cl *mo.ClusterCo
}
}

return &assetNames{
return assetNames{
outputNetworkNames: outputNetworkNames,
outputDatastoreNames: outputDatastoreNames,
outputHostNames: outputHostNames,
}, nil
}

func getTriggerdAlarm(ctx context.Context, pc *property.Collector, triggeredAlarmState []types.AlarmState) ([]triggerdAlarm, error) {
var triggeredAlarms []triggerdAlarm
for _, alarmState := range triggeredAlarmState {
var triggeredAlarm triggerdAlarm
var alarm mo.Alarm
err := pc.RetrieveOne(ctx, alarmState.Alarm, nil, &alarm)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is the alarm state being compared to red "critical alarms ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ishleenk17 As we discussed yesterday, we will fetch all the triggered alarms and ingest them, filtering will be done in pipeline of kibana.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we able to access triggered_alarms.state in the triggered alarms object, if user wants to get only red alarms there?
Can you share here the processor
We will be able to do it in ingest pipeline in Integrations.
Are we mentioning somewhere which processors can they use to achieve the same result in beats ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes i have tried to access triggered_alarm.state and fetch red alarms.

In beats also Processors are supported which can be used to fetch red alarms. user can use something like script processor.

if err != nil {
return nil, err
}
triggeredAlarm.Name = alarm.Info.Name
triggeredAlarm.Description = alarm.Info.Description
triggeredAlarm.ID = alarmState.Key
triggeredAlarm.Status = string(alarmState.OverallStatus)
triggeredAlarm.Time = alarmState.Time

triggeredAlarms = append(triggeredAlarms, triggeredAlarm)
}

return triggeredAlarms, nil
}
18 changes: 11 additions & 7 deletions metricbeat/module/vsphere/cluster/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,27 @@ import (
"github.com/elastic/elastic-agent-libs/mapstr"
)

func (m *ClusterMetricSet) mapEvent(cl mo.ClusterComputeResource, data *assetNames) mapstr.M {
func (m *ClusterMetricSet) mapEvent(cl mo.ClusterComputeResource, data *metricData) mapstr.M {
event := mapstr.M{
"host": mapstr.M{
"count": len(data.outputHostNames),
"names": data.outputHostNames,
"count": len(data.assetNames.outputHostNames),
"names": data.assetNames.outputHostNames,
},
"datastore": mapstr.M{
"count": len(data.outputDatastoreNames),
"names": data.outputDatastoreNames,
"count": len(data.assetNames.outputDatastoreNames),
"names": data.assetNames.outputDatastoreNames,
},
"network": mapstr.M{
"count": len(data.outputNetworkNames),
"names": data.outputNetworkNames,
"count": len(data.assetNames.outputNetworkNames),
"names": data.assetNames.outputNetworkNames,
},
"name": cl.Name,
}

if len(data.triggerdAlarms) > 0 {
event.Put("triggerd_alarms", data.triggerdAlarms)
}

if cl.Configuration.DasConfig.Enabled != nil {
event.Put("das_config.enabled", *cl.Configuration.DasConfig.Enabled)
}
Expand Down
6 changes: 5 additions & 1 deletion metricbeat/module/vsphere/cluster/data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ func TestEventMapping(t *testing.T) {
outputNetworkNames: []string{"Network_0"},
}

outputEvent := m.mapEvent(ClusterTest, &assetNames)
var metricDataTest = metricData{
assetNames: assetNames,
}

outputEvent := m.mapEvent(ClusterTest, &metricDataTest)
testEvent := mapstr.M{
"das_config": mapstr.M{
"enabled": false,
Expand Down
5 changes: 5 additions & 0 deletions metricbeat/module/vsphere/datastore/_meta/fields.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@
type: keyword
description: >
Status of the datastore.
- name: triggerd_alarms.*
type: object
object_type: keyword
description: >
List of all the triggerd alarms.
- name: vm.count
type: long
description: >
Expand Down
4 changes: 4 additions & 0 deletions metricbeat/module/vsphere/datastore/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ func (m *DataStoreMetricSet) mapEvent(ds mo.Datastore, data *metricData) mapstr.
},
}

if len(data.triggerdAlarms) > 0 {
event.Put("triggerd_alarms", data.triggerdAlarms)
}

if ds.Summary.Capacity > 0 {
usedSpacePercent := float64(ds.Summary.Capacity-ds.Summary.FreeSpace) / float64(ds.Summary.Capacity)
event.Put("capacity.used.pct", usedSpacePercent)
Expand Down
Loading
Loading