summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomer Tayar <ttayar@habana.ai>2024-03-12 16:48:42 +0200
committerOfir Bitton <obitton@habana.ai>2024-06-23 09:53:04 +0300
commit3d613b0cb5ef349ce93ae7e28f74ef8a75dc10ac (patch)
tree0b132a14bbba97e66ea94acd4579801603e889a2
parent340dd775d7fde09e5784fe411cd87d5b398785ae (diff)
accel/habanalabs: add a common handler for clock change events
As the new dynamic EQ includes clock change events which are common and not ASIC-specific, add a common handler for these events. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Ofir Bitton <obitton@habana.ai> Signed-off-by: Ofir Bitton <obitton@habana.ai>
-rw-r--r--drivers/accel/habanalabs/common/device.c46
-rw-r--r--drivers/accel/habanalabs/common/habanalabs.h1
2 files changed, 47 insertions, 0 deletions
diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c
index fd117489a05a..31daa9184537 100644
--- a/drivers/accel/habanalabs/common/device.c
+++ b/drivers/accel/habanalabs/common/device.c
@@ -2860,3 +2860,49 @@ void hl_eq_heartbeat_event_handle(struct hl_device *hdev)
hdev->heartbeat_debug_info.heartbeat_event_counter++;
hdev->eq_heartbeat_received = true;
}
+
+void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask)
+{
+ struct hl_clk_throttle *clk_throttle = &hdev->clk_throttling;
+ ktime_t zero_time = ktime_set(0, 0);
+
+ mutex_lock(&clk_throttle->lock);
+
+ switch (event_type) {
+ case EQ_EVENT_POWER_EVT_START:
+ clk_throttle->current_reason |= HL_CLK_THROTTLE_POWER;
+ clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_POWER;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
+ dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
+ break;
+
+ case EQ_EVENT_POWER_EVT_END:
+ clk_throttle->current_reason &= ~HL_CLK_THROTTLE_POWER;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
+ dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
+ break;
+
+ case EQ_EVENT_THERMAL_EVT_START:
+ clk_throttle->current_reason |= HL_CLK_THROTTLE_THERMAL;
+ clk_throttle->aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+ dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
+ break;
+
+ case EQ_EVENT_THERMAL_EVT_END:
+ clk_throttle->current_reason &= ~HL_CLK_THROTTLE_THERMAL;
+ clk_throttle->timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
+ *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
+ dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
+ break;
+
+ default:
+ dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
+ break;
+ }
+
+ mutex_unlock(&clk_throttle->lock);
+}
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 0d16b5310add..3ea1b131cd42 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -4063,6 +4063,7 @@ void hl_enable_err_info_capture(struct hl_error_info *captured_err_info);
void hl_init_cpu_for_irq(struct hl_device *hdev);
void hl_set_irq_affinity(struct hl_device *hdev, int irq);
void hl_eq_heartbeat_event_handle(struct hl_device *hdev);
+void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask);
#ifdef CONFIG_DEBUG_FS