182 lines
6.8 KiB
C
Executable File
182 lines
6.8 KiB
C
Executable File
/****************************************************************************
|
|
*
|
|
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All rights reserved
|
|
*
|
|
****************************************************************************/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/init.h>
|
|
#include <linux/firmware.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/version.h>
|
|
#include <linux/kmod.h>
|
|
#include <linux/notifier.h>
|
|
|
|
#include "scsc_mx_impl.h"
|
|
#include "miframman.h"
|
|
#include "mifmboxman.h"
|
|
#include "mxman.h"
|
|
#include "srvman.h"
|
|
#include "mxmgmt_transport.h"
|
|
#include "mxlog.h"
|
|
#include "mxlogger.h"
|
|
#include "fw_panic_record.h"
|
|
#include "panicmon.h"
|
|
#include "mxproc.h"
|
|
#include "mxsyserr.h"
|
|
#include "scsc/scsc_log_collector.h"
|
|
|
|
#include <scsc/scsc_release.h>
|
|
#include <scsc/scsc_mx.h>
|
|
#include <scsc/scsc_logring.h>
|
|
|
|
/* If limits below are exceeded, a service level reset will be raised to level 7 */
|
|
#define SYSERR_RESET_HISTORY_SIZE (4)
|
|
/* Minimum time between system error service resets (ms) */
|
|
#define SYSERR_RESET_MIN_INTERVAL (300000)
|
|
/* No more then SYSERR_RESET_HISTORY_SIZE system error service resets in this period (ms)*/
|
|
#define SYSERR_RESET_MONITOR_PERIOD (3600000)
|
|
|
|
|
|
/* Time stamps of last service resets in jiffies */
|
|
static unsigned long syserr_reset_history[SYSERR_RESET_HISTORY_SIZE] = {0};
|
|
static int syserr_reset_history_index;
|
|
|
|
static uint syserr_reset_min_interval = SYSERR_RESET_MIN_INTERVAL;
|
|
module_param(syserr_reset_min_interval, uint, S_IRUGO | S_IWUSR);
|
|
MODULE_PARM_DESC(syserr_reset_min_interval, "Minimum time between system error service resets (ms)");
|
|
|
|
static uint syserr_reset_monitor_period = SYSERR_RESET_MONITOR_PERIOD;
|
|
module_param(syserr_reset_monitor_period, uint, S_IRUGO | S_IWUSR);
|
|
MODULE_PARM_DESC(syserr_reset_monitor_period, "No more then 4 system error service resets in this period (ms)");
|
|
|
|
|
|
void mx_syserr_init(void)
|
|
{
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_INIT: syserr_reset_min_interval %lu syserr_reset_monitor_period %lu\n",
|
|
syserr_reset_min_interval, syserr_reset_monitor_period);
|
|
}
|
|
|
|
void mx_syserr_handler(struct mxman *mxman, const void *message)
|
|
{
|
|
const struct mx_syserr_msg *msg = (const struct mx_syserr_msg *)message;
|
|
struct srvman *srvman;
|
|
|
|
struct mx_syserr_decode decode;
|
|
|
|
srvman = scsc_mx_get_srvman(mxman->mx);
|
|
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND len: %u, ts: 0x%08X, tf: 0x%08X, str: 0x%x, code: 0x%08x, p0: 0x%x, p1: 0x%x\n",
|
|
msg->syserr.length,
|
|
msg->syserr.slow_clock,
|
|
msg->syserr.fast_clock,
|
|
msg->syserr.string_index,
|
|
msg->syserr.syserr_code,
|
|
msg->syserr.param[0],
|
|
msg->syserr.param[1]);
|
|
|
|
decode.subsys = (u8) ((msg->syserr.syserr_code >> SYSERR_SUB_SYSTEM_POSN) & SYSERR_SUB_SYSTEM_MASK);
|
|
decode.level = (u8) ((msg->syserr.syserr_code >> SYSERR_LEVEL_POSN) & SYSERR_LEVEL_MASK);
|
|
decode.type = (u8) ((msg->syserr.syserr_code >> SYSERR_TYPE_POSN) & SYSERR_TYPE_MASK);
|
|
decode.subcode = (u16) ((msg->syserr.syserr_code >> SYSERR_SUB_CODE_POSN) & SYSERR_SUB_CODE_MASK);
|
|
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND Subsys %d, Level %d, Type %d, Subcode 0x%04x\n",
|
|
decode.subsys, decode.level, decode.type, decode.subcode);
|
|
|
|
/* Level 1 just gets logged without bothering anyone else */
|
|
if (decode.level == MX_SYSERR_LEVEL_1) {
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x log only\n",
|
|
msg->syserr.syserr_code);
|
|
return;
|
|
}
|
|
|
|
/* Ignore if panic reset in progress */
|
|
if ((srvman->error) || (mxman->mxman_state == MXMAN_STATE_FAILED)) {
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x ignored (reset in progess)\n",
|
|
msg->syserr.syserr_code);
|
|
return;
|
|
}
|
|
|
|
/* Ignore any system errors for the same sub-system if recovery is in progress */
|
|
if ((mxman->syserr_recovery_in_progress) && (mxman->last_syserr.subsys == decode.subsys)) {
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x ignored (recovery in progess)\n",
|
|
msg->syserr.syserr_code);
|
|
return;
|
|
}
|
|
|
|
|
|
/* Let affected sevices escalate if needed - this also checks if only one sub-system is running
|
|
* and handles race conditions with shutting down service
|
|
*/
|
|
decode.level = srvman_notify_sub_system(srvman, &decode);
|
|
|
|
if (decode.level >= MX_SYSERR_LEVEL_5) {
|
|
unsigned long now = jiffies;
|
|
int i;
|
|
|
|
/* We use 0 as a NULL timestamp so avoid this */
|
|
now = (now) ? now : 1;
|
|
|
|
if ((decode.level >= MX_SYSERR_LEVEL_7) || (mxman->syserr_recovery_in_progress)) {
|
|
/* If full reset has been requested or a service restart is needed and one is
|
|
* already in progress, trigger a full reset
|
|
*/
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset\n",
|
|
msg->syserr.syserr_code);
|
|
|
|
mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__);
|
|
return;
|
|
}
|
|
|
|
/* last_syserr_recovery_time is always zero-ed before we restart the chip */
|
|
if (mxman->last_syserr_recovery_time) {
|
|
/* Have we had a too recent system error service reset
|
|
* Chance of false positive here is low enough to be acceptable
|
|
*/
|
|
if ((syserr_reset_min_interval) && (time_in_range(now, mxman->last_syserr_recovery_time,
|
|
mxman->last_syserr_recovery_time + msecs_to_jiffies(syserr_reset_min_interval)))) {
|
|
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset (less than %dms after last)\n",
|
|
msg->syserr.syserr_code, syserr_reset_min_interval);
|
|
mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__);
|
|
return;
|
|
} else if (syserr_reset_monitor_period) {
|
|
/* Have we had too many system error service resets in one period? */
|
|
/* This will be the case if all our stored history was in this period */
|
|
bool out_of_danger_period_found = false;
|
|
|
|
for (i = 0; (i < SYSERR_RESET_HISTORY_SIZE) && (!out_of_danger_period_found); i++)
|
|
out_of_danger_period_found = ((!syserr_reset_history[i]) ||
|
|
(!time_in_range(now, syserr_reset_history[i],
|
|
syserr_reset_history[i] + msecs_to_jiffies(syserr_reset_monitor_period))));
|
|
|
|
if (!out_of_danger_period_found) {
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered full reset (too many within %dms)\n",
|
|
msg->syserr.syserr_code, syserr_reset_monitor_period);
|
|
mxman_fail(mxman, SCSC_PANIC_CODE_HOST << 15, __func__);
|
|
return;
|
|
}
|
|
}
|
|
} else
|
|
/* First syserr service reset since chip was (re)started - zap history */
|
|
for (i = 0; i < SYSERR_RESET_HISTORY_SIZE; i++)
|
|
syserr_reset_history[i] = 0;
|
|
|
|
/* Otherwise trigger recovery of the affected subservices */
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x triggered service recovery\n",
|
|
msg->syserr.syserr_code);
|
|
syserr_reset_history[syserr_reset_history_index++ % SYSERR_RESET_HISTORY_SIZE] = now;
|
|
mxman->last_syserr_recovery_time = now;
|
|
mxman_syserr(mxman, &decode);
|
|
}
|
|
|
|
#ifdef CONFIG_SCSC_WLBTD
|
|
#if IS_ENABLED(CONFIG_SCSC_LOG_COLLECTION)
|
|
/* Trigger sable log collection */
|
|
SCSC_TAG_INFO(MXMAN, "MM_SYSERR_IND code: 0x%08x requested log collection\n", msg->syserr.syserr_code);
|
|
scsc_log_collector_schedule_collection(SCSC_LOG_SYS_ERR, decode.subcode);
|
|
#endif
|
|
#endif
|
|
}
|