Skip to content

Commit

Permalink
Added message retries and acquire timer metrics (#1841)
Browse files Browse the repository at this point in the history
* Added message retries and acquire timer metrics

* Removed unused metric property from subscription view

* CR fixes
  • Loading branch information
szczygiel-m authored Apr 8, 2024
1 parent 82996cf commit a58f62e
Show file tree
Hide file tree
Showing 32 changed files with 310 additions and 333 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public class SubscriptionMetrics {
private MetricDecimalValue codes2xx;
private MetricDecimalValue codes4xx;
private MetricDecimalValue codes5xx;
private MetricDecimalValue retries;
private MetricLongValue lag;
private Subscription.State state;
private MetricDecimalValue rate;
Expand All @@ -30,6 +31,7 @@ public SubscriptionMetrics(@JsonProperty("delivered") long delivered,
@JsonProperty("codes2xx") MetricDecimalValue codes2xx,
@JsonProperty("codes4xx") MetricDecimalValue codes4xx,
@JsonProperty("codes5xx") MetricDecimalValue codes5xx,
@JsonProperty("retries") MetricDecimalValue retries,
@JsonProperty("Subscription") Subscription.State state,
@JsonProperty("rate") MetricDecimalValue rate,
@JsonProperty("throughput") MetricDecimalValue throughput,
Expand All @@ -42,6 +44,7 @@ public SubscriptionMetrics(@JsonProperty("delivered") long delivered,
this.codes2xx = codes2xx;
this.codes4xx = codes4xx;
this.codes5xx = codes5xx;
this.retries = retries;
this.state = state;
this.rate = rate;
this.throughput = throughput;
Expand Down Expand Up @@ -84,6 +87,10 @@ public MetricDecimalValue getCodes5xx() {
return codes5xx;
}

public MetricDecimalValue getRetries() {
return retries;
}

public Subscription.State getState() {
return state;
}
Expand Down Expand Up @@ -147,6 +154,11 @@ public Builder withCodes5xx(MetricDecimalValue count) {
return this;
}

public Builder withRetries(MetricDecimalValue retries) {
subscriptionMetrics.retries = retries;
return this;
}

public Builder withRate(MetricDecimalValue rate) {
subscriptionMetrics.rate = rate;
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public class Counters {
public static final String PUBLISHED = "published." + GROUP + "." + TOPIC;
public static final String DELIVERED = "delivered." + GROUP + "." + TOPIC + "." + SUBSCRIPTION;
public static final String DISCARDED = "discarded." + GROUP + "." + TOPIC + "." + SUBSCRIPTION;
public static final String RETRIES = "retries." + GROUP + "." + TOPIC + "." + SUBSCRIPTION;
public static final String MAXRATE_RATE_HISTORY_FAILURES =
"consumers-rate.max-rate.node." + GROUP + "." + TOPIC + "." + SUBSCRIPTION + ".history.failures";
public static final String MAXRATE_FETCH_FAILURES =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ public class Meters {
public static final String DISCARDED_METER = "discarded-meter";
public static final String DISCARDED_TOPIC_METER = DISCARDED_METER + "." + GROUP + "." + TOPIC;
public static final String DISCARDED_SUBSCRIPTION_METER = DISCARDED_TOPIC_METER + "." + SUBSCRIPTION;

public static final String RETRIES_METER = "retries-meter";
public static final String RETRIES_TOPIC_METER = RETRIES_METER + "." + GROUP + "." + TOPIC;
public static final String RETRIES_SUBSCRIPTION_METER = RETRIES_TOPIC_METER + "." + SUBSCRIPTION;
public static final String DELAYED_PROCESSING = "delayed-processing";
public static final String TOPIC_DELAYED_PROCESSING = DELAYED_PROCESSING + "." + GROUP + "." + TOPIC;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,20 @@
import static pl.allegro.tech.hermes.common.metric.Counters.DISCARDED;
import static pl.allegro.tech.hermes.common.metric.Counters.MAXRATE_FETCH_FAILURES;
import static pl.allegro.tech.hermes.common.metric.Counters.MAXRATE_RATE_HISTORY_FAILURES;
import static pl.allegro.tech.hermes.common.metric.Counters.RETRIES;
import static pl.allegro.tech.hermes.common.metric.Gauges.MAX_RATE_ACTUAL_RATE_VALUE;
import static pl.allegro.tech.hermes.common.metric.Gauges.MAX_RATE_VALUE;
import static pl.allegro.tech.hermes.common.metric.Gauges.OUTPUT_RATE;
import static pl.allegro.tech.hermes.common.metric.Meters.DISCARDED_SUBSCRIPTION_METER;
import static pl.allegro.tech.hermes.common.metric.Meters.FAILED_METER_SUBSCRIPTION;
import static pl.allegro.tech.hermes.common.metric.Meters.FILTERED_METER;
import static pl.allegro.tech.hermes.common.metric.Meters.RETRIES_SUBSCRIPTION_METER;
import static pl.allegro.tech.hermes.common.metric.Meters.SUBSCRIPTION_BATCH_METER;
import static pl.allegro.tech.hermes.common.metric.Meters.SUBSCRIPTION_METER;
import static pl.allegro.tech.hermes.common.metric.Meters.SUBSCRIPTION_THROUGHPUT_BYTES;
import static pl.allegro.tech.hermes.common.metric.SubscriptionTagsFactory.subscriptionTags;
import static pl.allegro.tech.hermes.common.metric.Timers.CONSUMER_IDLE_TIME;
import static pl.allegro.tech.hermes.common.metric.Timers.RATE_LIMITER_ACQUIRE;
import static pl.allegro.tech.hermes.common.metric.Timers.SUBSCRIPTION_LATENCY;

public class MetricsFacade {
Expand Down Expand Up @@ -132,11 +135,13 @@ public void unregisterAllMetricsRelatedTo(SubscriptionName subscription) {
meterRegistry.remove(meter);
}
hermesMetrics.unregister(DISCARDED_SUBSCRIPTION_METER, subscription);
hermesMetrics.unregister(RETRIES_SUBSCRIPTION_METER, subscription);
hermesMetrics.unregister(FAILED_METER_SUBSCRIPTION, subscription);
hermesMetrics.unregister(SUBSCRIPTION_BATCH_METER, subscription);
hermesMetrics.unregister(SUBSCRIPTION_METER, subscription);
hermesMetrics.unregister(DELIVERED, subscription);
hermesMetrics.unregister(DISCARDED, subscription);
hermesMetrics.unregister(RETRIES, subscription);
hermesMetrics.unregisterInflightGauge(subscription);
hermesMetrics.unregisterInflightTimeHistogram(subscription);
hermesMetrics.unregisterConsumerErrorsTimeoutMeter(subscription);
Expand All @@ -150,6 +155,7 @@ public void unregisterAllMetricsRelatedTo(SubscriptionName subscription) {
hermesMetrics.unregister(CONSUMER_IDLE_TIME, subscription);
hermesMetrics.unregister(FILTERED_METER, subscription);
hermesMetrics.unregister(SUBSCRIPTION_LATENCY, subscription);
hermesMetrics.unregister(RATE_LIMITER_ACQUIRE, subscription);
hermesMetrics.unregister(SUBSCRIPTION_THROUGHPUT_BYTES, subscription);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,30 @@ public HermesCounter discarded(SubscriptionName subscription) {
};
}

public HermesCounter retries(SubscriptionName subscription) {
return size -> {
hermesMetrics.meter(Meters.RETRIES_METER).mark(size);
hermesMetrics.meter(Meters.RETRIES_TOPIC_METER, subscription.getTopicName()).mark(size);
hermesMetrics.meter(Meters.RETRIES_SUBSCRIPTION_METER, subscription.getTopicName(), subscription.getName()).mark(size);
hermesMetrics.counter(Counters.RETRIES, subscription.getTopicName(), subscription.getName()).inc(size);
micrometerCounter(SubscriptionMetricsNames.SUBSCRIPTION_RETRIES, subscription).increment(size);
};
}

public HermesTimer latency(SubscriptionName subscription) {
return HermesTimer.from(
meterRegistry.timer(SubscriptionMetricsNames.SUBSCRIPTION_LATENCY, subscriptionTags(subscription)),
hermesMetrics.timer(Timers.SUBSCRIPTION_LATENCY, subscription.getTopicName(), subscription.getName())
);
}

public HermesTimer rateLimiterAcquire(SubscriptionName subscription) {
return HermesTimer.from(
meterRegistry.timer(SubscriptionMetricsNames.SUBSCRIPTION_RATE_LIMITER_ACQUIRE, subscriptionTags(subscription)),
hermesMetrics.timer(Timers.RATE_LIMITER_ACQUIRE, subscription.getTopicName(), subscription.getName())
);
}

public <T> void registerInflightGauge(SubscriptionName subscription, T obj, ToDoubleFunction<T> f) {
hermesMetrics.registerInflightGauge(subscription, () -> (int) f.applyAsDouble(obj));
meterRegistry.gauge(SubscriptionMetricsNames.SUBSCRIPTION_INFLIGHT, subscriptionTags(subscription), obj, f);
Expand Down Expand Up @@ -133,7 +150,9 @@ public static class SubscriptionMetricsNames {
public static final String SUBSCRIPTION_THROUGHPUT = "subscription.throughput-bytes";
public static final String SUBSCRIPTION_BATCHES = "subscription.batches";
public static final String SUBSCRIPTION_DISCARDED = "subscription.discarded";
public static final String SUBSCRIPTION_RETRIES = "subscription.retries";
public static final String SUBSCRIPTION_LATENCY = "subscription.latency";
public static final String SUBSCRIPTION_RATE_LIMITER_ACQUIRE = "subscription.rate-limiter-acquire";
public static final String SUBSCRIPTION_INFLIGHT = "subscription.inflight";
public static final String SUBSCRIPTION_IDLE_DURATION = "subscription.idle-duration";
public static final String SUBSCRIPTION_FILTERED_OUT = "subscription.filtered-out";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public class Timers {

public static final String LATENCY = "latency";
public static final String SUBSCRIPTION_LATENCY = LATENCY + "." + GROUP + "." + TOPIC + "." + SUBSCRIPTION;
public static final String RATE_LIMITER_ACQUIRE = "rate-limiter-acquire" + "." + GROUP + "." + TOPIC + "." + SUBSCRIPTION;

public static final String SCHEMA = "schema." + SCHEMA_REPO_TYPE;
public static final String GET_SCHEMA_LATENCY = SCHEMA + ".get-schema";
Expand Down
14 changes: 8 additions & 6 deletions hermes-console/json-server/db.json
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,13 @@
"delivered": 39099,
"discarded": 2137086,
"volume": 1288032256,
"timeouts": "0.0",
"otherErrors": "0.0",
"codes2xx": "0",
"codes4xx": "0.0",
"codes5xx": "0.01",
"rate": "0",
"timeouts": "12.3028857479387",
"otherErrors": "16.3028857479387",
"codes2xx": "1236.3028857479387",
"codes4xx": "123.3028857479387",
"codes5xx": "6.3028857479387",
"retries": "24.3028857479387",
"rate": "1319.6064543974392",
"throughput": "8.31",
"batchRate": "0.0",
"lag": "9055513"
Expand All @@ -521,6 +522,7 @@
"codes2xx": "0",
"codes4xx": "0.0",
"codes5xx": "0.01",
"retries": "0.01",
"rate": "0",
"throughput": "8.36",
"batchRate": "0.0",
Expand Down
1 change: 1 addition & 0 deletions hermes-console/src/api/subscription-metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export interface SubscriptionMetrics {
codes2xx: string;
codes4xx: string;
codes5xx: string;
retries: string;
lag: string;
rate: string;
throughput: string;
Expand Down
9 changes: 5 additions & 4 deletions hermes-console/src/dummy/subscription.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,11 @@ export const dummySubscriptionMetrics: SubscriptionMetrics = {
discarded: 2137086,
volume: 1288032256,
timeouts: '0.0',
otherErrors: '0.0',
codes2xx: '0',
codes4xx: '0.0',
codes5xx: '0.01',
otherErrors: '1.4',
codes2xx: '123',
codes4xx: '2.0',
codes5xx: '1.32',
retries: '2.03',
rate: '0',
throughput: '8.31',
batchRate: '0.0',
Expand Down
13 changes: 8 additions & 5 deletions hermes-console/src/i18n/en-US/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -472,19 +472,22 @@ const en_US = {
subscriberLatency: 'Subscriber latency',
delivered: 'Delivered',
discarded: 'Discarded',
timeouts: 'Timeouts',
otherErrors: 'Other errors',
codes2xx: 'Codes 2xx',
codes4xx: 'Codes 4xx',
codes5xx: 'Codes 5xx',
retries: 'Retries',
lag: 'Lag',
outputRate: 'Output rate',
tooltips: {
subscriberLatency:
'Latency of acknowledging messages by subscribing service as ' +
'measured by Hermes.',
lag:
'Total number of events waiting to be delivered. Each subscription ' +
'has a "natural" lag, which depends on production rate.',
outputRate:
'Maximum sending rate calculated based on receiving service ' +
'performance. For well-performing service output rate should be ' +
'equal to rate limit.',
retries:
'Total number of message sending retries. Retrying messages significantly reduces the rate on subscriptions.',
},
},
propertiesCard: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,79 @@ describe('MetricsCard', () => {
)!;
expect(within(deliveryRateRow).getByText('9,055,513')).toBeVisible();
});

it('should render subscription otherErrors', () => {
// when
vi.mocked(useMetrics).mockReturnValueOnce(useMetricsStub);
const { getByText } = render(MetricsCard, {
props,
testPinia: createTestingPiniaWithState(),
});

// then
const deliveryRateRow = getByText(
'subscription.metricsCard.otherErrors',
).closest('tr')!;
expect(within(deliveryRateRow).getByText('1.40')).toBeVisible();
});

it('should render subscription codes2xx', () => {
// when
vi.mocked(useMetrics).mockReturnValueOnce(useMetricsStub);
const { getByText } = render(MetricsCard, {
props,
testPinia: createTestingPiniaWithState(),
});

// then
const deliveryRateRow = getByText(
'subscription.metricsCard.codes2xx',
).closest('tr')!;
expect(within(deliveryRateRow).getByText('123.00')).toBeVisible();
});

it('should render subscription codes4xx', () => {
// when
vi.mocked(useMetrics).mockReturnValueOnce(useMetricsStub);
const { getByText } = render(MetricsCard, {
props,
testPinia: createTestingPiniaWithState(),
});

// then
const deliveryRateRow = getByText(
'subscription.metricsCard.codes4xx',
).closest('tr')!;
expect(within(deliveryRateRow).getByText('2.00')).toBeVisible();
});

it('should render subscription codes5xx', () => {
// when
vi.mocked(useMetrics).mockReturnValueOnce(useMetricsStub);
const { getByText } = render(MetricsCard, {
props,
testPinia: createTestingPiniaWithState(),
});

// then
const deliveryRateRow = getByText(
'subscription.metricsCard.codes5xx',
).closest('tr')!;
expect(within(deliveryRateRow).getByText('1.32')).toBeVisible();
});

it('should render subscription retries', () => {
// when
vi.mocked(useMetrics).mockReturnValueOnce(useMetricsStub);
const { getByText } = render(MetricsCard, {
props,
testPinia: createTestingPiniaWithState(),
});

// then
const deliveryRateRow = getByText(
'subscription.metricsCard.retries',
).closest('tr')!;
expect(within(deliveryRateRow).getByText('2.03')).toBeVisible();
});
});
35 changes: 25 additions & 10 deletions hermes-console/src/views/subscription/metrics-card/MetricsCard.vue
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@
:name="$t('subscription.metricsCard.deliveryRate')"
:value="formatNumber(props.subscriptionMetrics.rate, 2)"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.subscriberLatency')"
value="?"
:tooltip="$t('subscription.metricsCard.tooltips.subscriberLatency')"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.delivered')"
:value="formatNumber(props.subscriptionMetrics.delivered)"
Expand All @@ -42,16 +37,36 @@
:name="$t('subscription.metricsCard.discarded')"
:value="formatNumber(props.subscriptionMetrics.discarded)"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.timeouts')"
:value="formatNumber(props.subscriptionMetrics.timeouts, 2)"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.otherErrors')"
:value="formatNumber(props.subscriptionMetrics.otherErrors, 2)"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.codes2xx')"
:value="formatNumber(props.subscriptionMetrics.codes2xx, 2)"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.codes4xx')"
:value="formatNumber(props.subscriptionMetrics.codes4xx, 2)"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.codes5xx')"
:value="formatNumber(props.subscriptionMetrics.codes5xx, 2)"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.retries')"
:value="formatNumber(props.subscriptionMetrics.retries, 2)"
:tooltip="$t('subscription.metricsCard.tooltips.retries')"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.lag')"
:value="formatNumber(props.subscriptionMetrics.lag)"
:tooltip="$t('subscription.metricsCard.tooltips.lag')"
/>
<key-value-card-item
:name="$t('subscription.metricsCard.outputRate')"
value="?"
:tooltip="$t('subscription.metricsCard.tooltips.outputRate')"
/>
</key-value-card>
</template>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ describe('MetricsList', () => {
{ property: 'topicView.metrics.rate', value: '3.40' },
{ property: 'topicView.metrics.deliveryRate', value: '3.50' },
{ property: 'topicView.metrics.published', value: 100 },
{ property: 'topicView.metrics.latency', value: '?' },
{ property: 'topicView.metrics.messageSize', value: '?' },
])('should render all metrics properties %s', ({ property, value }) => {
// given
vi.mocked(useMetrics).mockReturnValueOnce(useMetricsStub);
Expand Down
5 changes: 0 additions & 5 deletions hermes-console/src/views/topic/metrics-list/MetricsList.vue
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@
:name="$t('topicView.metrics.published')"
:value="formatNumber(props.metrics.published)"
/>
<key-value-card-item :name="$t('topicView.metrics.latency')" value="?" />
<key-value-card-item
:name="$t('topicView.metrics.messageSize')"
value="?"
/>
</key-value-card>
</template>

Expand Down
Loading

0 comments on commit a58f62e

Please sign in to comment.