From f3d778a0862eee92466886e67503a2424ca0ab53 Mon Sep 17 00:00:00 2001
From: Graham Herceg <gherceg@dimagi.com>
Date: Mon, 30 Sep 2024 08:23:58 -0400
Subject: [PATCH 1/2] Add metric for no matching case in dedupe

---
 corehq/apps/data_interfaces/models.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/corehq/apps/data_interfaces/models.py b/corehq/apps/data_interfaces/models.py
index 9d95eb74a6cc..e7a8a9d62222 100644
--- a/corehq/apps/data_interfaces/models.py
+++ b/corehq/apps/data_interfaces/models.py
@@ -72,6 +72,7 @@
 )
 from corehq import toggles
 from corehq.util.log import with_progress_bar
+from corehq.util.metrics import metrics_counter
 from corehq.util.metrics.load_counters import dedupe_load_counter
 from corehq.util.quickcache import quickcache
 from corehq.util.test_utils import unit_testing_only
@@ -1178,6 +1179,7 @@ def _handle_case_duplicate(self, case, rule):
                 # but disabling this to avoid further quota issues.
                 # raise ValueError(f'Unable to find current ElasticSearch data for: {case.case_id}')
                 # Ignore this result for now
+                metrics_counter('commcare.dedupe.no_matching_case', tags={'domain': case.domain})
                 return CaseRuleActionResult(num_errors=1)
             else:
                 # Normal processing can involve latency between when a case is written to the database and when

From 86b7d991f3679fccbbd6c2f6d376964155653942 Mon Sep 17 00:00:00 2001
From: Graham Herceg <gherceg@dimagi.com>
Date: Tue, 1 Oct 2024 07:11:06 -0400
Subject: [PATCH 2/2] Track dedupe load for each result

---
 corehq/apps/data_interfaces/models.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/corehq/apps/data_interfaces/models.py b/corehq/apps/data_interfaces/models.py
index e7a8a9d62222..f29c7cd6c959 100644
--- a/corehq/apps/data_interfaces/models.py
+++ b/corehq/apps/data_interfaces/models.py
@@ -72,7 +72,6 @@
 )
 from corehq import toggles
 from corehq.util.log import with_progress_bar
-from corehq.util.metrics import metrics_counter
 from corehq.util.metrics.load_counters import dedupe_load_counter
 from corehq.util.quickcache import quickcache
 from corehq.util.test_utils import unit_testing_only
@@ -1164,8 +1163,6 @@ def _handle_case_duplicate(self, case, rule):
         if is_copied_case(case):
             return CaseRuleActionResult()
 
-        dedupe_load_counter('unknown', case.domain)()
-
         if not case_matching_rule_criteria_exists_in_es(case, rule):
             ALLOWED_ES_DELAY = timedelta(hours=1)
             if datetime.utcnow() - case.server_modified_on > ALLOWED_ES_DELAY:
@@ -1179,7 +1176,7 @@ def _handle_case_duplicate(self, case, rule):
                 # but disabling this to avoid further quota issues.
                 # raise ValueError(f'Unable to find current ElasticSearch data for: {case.case_id}')
                 # Ignore this result for now
-                metrics_counter('commcare.dedupe.no_matching_case', tags={'domain': case.domain})
+                dedupe_load_counter('unknown', case.domain, {'result': 'errored'})()
                 return CaseRuleActionResult(num_errors=1)
             else:
                 # Normal processing can involve latency between when a case is written to the database and when
@@ -1191,9 +1188,12 @@ def _handle_case_duplicate(self, case, rule):
                 # inserts into ElasticSearch are asychronous, we can receive cases here that will not yet be
                 # present in ElasticSearch but will never be processed later. In the short-term, we're avoiding
                 # this by resaving the case, with the intention to use a more stable approach in the future
+                dedupe_load_counter('unknown', case.domain, {'result': 'retried'})()
                 resave_case(rule.domain, case, send_post_save_signal=False)
                 return CaseRuleActionResult(num_updates=0)
 
+        dedupe_load_counter('unknown', case.domain, {'result': 'processed'})()
+
         try:
             existing_duplicate = CaseDuplicateNew.objects.get(case_id=case.case_id, action=self)
         except CaseDuplicateNew.DoesNotExist: