/**************************************************************************** * * Copyright (c) 2019 Samsung Electronics Co., Ltd. All rights reserved * ****************************************************************************/ #include #include #include #include #include #include #include #include #include "scsc_mx_impl.h" #include "miframman.h" #include "mifmboxman.h" #include "mxman.h" #include "srvman.h" #include "mxmgmt_transport.h" #include "mxlog.h" #include "mxlogger.h" #include "fw_panic_record.h" #include "panicmon.h" #include "mxproc.h" #include "mxsyserr.h" #include "scsc/scsc_log_collector.h" #include #include #include /* If limits below are exceeded, a service level reset will be raised to level 7 */ #define SYSERR_RESET_HISTORY_SIZE (4) /* Minimum time between system error service resets (ms) */ #define SYSERR_RESET_MIN_INTERVAL (300000) /* No more then SYSERR_RESET_HISTORY_SIZE system error service resets in this period (ms)*/ #define SYSERR_RESET_MONITOR_PERIOD (3600000) /* Time stamps of last service resets in jiffies */ static unsigned long syserr_reset_history[SYSERR_RESET_HISTORY_SIZE] = {0}; static int syserr_reset_history_index; static uint syserr_reset_min_interval = SYSERR_RESET_MIN_INTERVAL; module_param(syserr_reset_min_interval, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(syserr_reset_min_interval, "Minimum time between system error service resets (ms)"); static uint syserr_reset_monitor_period = SYSERR_RESET_MONITOR_PERIOD; module_param(syserr_reset_monitor_period, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(syserr_reset_monitor_period, "No more then 4 system error service resets in this period (ms)"); void mx_syserr_init(void) { SCSC_TAG_INFO(MXMAN, "MM_SYSERR_INIT: syserr_reset_min_interval %lu syserr_reset_monitor_period %lu\n", syserr_reset_min_interval, syserr_reset_monitor_period); } void mx_syserr_handler(struct mxman *mxman, const void *message) { const struct mx_syserr_msg *msg = (const struct mx_syserr_msg *)message; struct srvman *srvman; struct mx_syserr_decode decode; srvman = scsc_mx_get_srvman(mxman->mx); SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND len: %u, ts: 0x%08X, tf: 0x%08X, str: 0x%x, code: 0x%08x, p0: 0x%x, p1: 0x%x\n", msg->syserr.length, msg->syserr.slow_clock, msg->syserr.fast_clock, msg->syserr.string_index, msg->syserr.syserr_code, msg->syserr.param[0], msg->syserr.param[1]); decode.subsys = (u8) ((msg->syserr.syserr_code >> SYSERR_SUB_SYSTEM_POSN) & SYSERR_SUB_SYSTEM_MASK); decode.level = (u8) ((msg->syserr.syserr_code >> SYSERR_LEVEL_POSN) & SYSERR_LEVEL_MASK); decode.type = (u8) ((msg->syserr.syserr_code >> SYSERR_TYPE_POSN) & SYSERR_TYPE_MASK); decode.subcode = (u16) ((msg->syserr.syserr_code >> SYSERR_SUB_CODE_POSN) & SYSERR_SUB_CODE_MASK); SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND Subsys %d, Level %d, Type %d, Subcode 0x%04x\n", decode.subsys, decode.level, decode.type, decode.subcode); /* Level 1 just gets logged without bothering anyone else */ if (decode.level == MX_SYSERR_LEVEL_1) { SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x log only\n", msg->syserr.syserr_code); return; } /* Ignore if panic reset in progress */ if ((srvman->error) || (mxman->mxman_state == MXMAN_STATE_FAILED)) { SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x ignored (reset in progess)\n", msg->syserr.syserr_code); return; } /* Ignore any system errors for the same sub-system if recovery is in progress */ if ((mxman->syserr_recovery_in_progress) && (mxman->last_syserr.subsys == decode.subsys)) { SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x ignored (recovery in progess)\n", msg->syserr.syserr_code); return; } /* Let affected sevices escalate if needed - this also checks if only one sub-system is running * and handles race conditions with shutting down service */ decode.level = srvman_notify_sub_system(srvman, &decode); if (decode.level >= MX_SYSERR_LEVEL_5) { unsigned long now = jiffies; int i; /* We use 0 as a NULL timestamp so avoid this */ now = (now) ? now : 1; if ((decode.level >= MX_SYSERR_LEVEL_7) || (mxman->syserr_recovery_in_progress)) { /* If full reset has been requested or a service restart is needed and one is * already in progress, trigger a full reset */ SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset\n", msg->syserr.syserr_code); mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__); return; } /* last_syserr_recovery_time is always zero-ed before we restart the chip */ if (mxman->last_syserr_recovery_time) { /* Have we had a too recent system error service reset * Chance of false positive here is low enough to be acceptable */ if ((syserr_reset_min_interval) && (time_in_range(now, mxman->last_syserr_recovery_time, mxman->last_syserr_recovery_time + msecs_to_jiffies(syserr_reset_min_interval)))) { SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset (less than %dms after last)\n", msg->syserr.syserr_code, syserr_reset_min_interval); mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__); return; } else if (syserr_reset_monitor_period) { /* Have we had too many system error service resets in one period? */ /* This will be the case if all our stored history was in this period */ bool out_of_danger_period_found = false; for (i = 0; (i < SYSERR_RESET_HISTORY_SIZE) && (!out_of_danger_period_found); i++) out_of_danger_period_found = ((!syserr_reset_history[i]) || (!time_in_range(now, syserr_reset_history[i], syserr_reset_history[i] + msecs_to_jiffies(syserr_reset_monitor_period)))); if (!out_of_danger_period_found) { SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset (too many within %dms)\n", msg->syserr.syserr_code, syserr_reset_monitor_period); mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__); return; } } } else /* First syserr service reset since chip was (re)started - zap history */ for (i = 0; i < SYSERR_RESET_HISTORY_SIZE; i++) syserr_reset_history[i] = 0; /* Otherwise trigger recovery of the affected subservices */ SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered service recovery\n", msg->syserr.syserr_code); syserr_reset_history[syserr_reset_history_index++ % SYSERR_RESET_HISTORY_SIZE] = now; mxman->last_syserr_recovery_time = now; mxman_syserr(mxman, &decode); } #ifdef CONFIG_SCSC_WLBTD #if IS_ENABLED(CONFIG_SCSC_LOG_COLLECTION) /* Trigger sable log collection */ SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x requested log collection\n", msg->syserr.syserr_code); scsc_log_collector_schedule_collection(SCSC_LOG_SYS_ERR, decode.subcode); #endif #endif }