Skip to content

Commit

Permalink
crash log: safe code
Browse files Browse the repository at this point in the history
  • Loading branch information
facetosea committed Jan 9, 2025
1 parent 852f5b6 commit 2d99f18
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 32 deletions.
1 change: 1 addition & 0 deletions include/util/tlog.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile);

int32_t initCrashLogWriter();
void checkAndPrepareCrashInfo();
bool reportThreadSetQuit();
void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime);

// clang-format off
Expand Down
4 changes: 2 additions & 2 deletions source/client/src/clientEnv.c
Original file line number Diff line number Diff line change
Expand Up @@ -839,9 +839,9 @@ static void *tscCrashReportThreadFp(void *param) {

while (1) {
checkAndPrepareCrashInfo();
if (clientStop > 0) break;
if (clientStop > 0 && reportThreadSetQuit()) break;
if (loopTimes++ < reportPeriodNum) {
if(loopTimes < 0) loopTimes = reportPeriodNum;
if (loopTimes < 0) loopTimes = reportPeriodNum;
taosMsleep(sleepTime);
continue;
}
Expand Down
4 changes: 3 additions & 1 deletion source/dnode/mgmt/mgmt_dnode/src/dmWorker.c
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,9 @@ static void *dmCrashReportThreadFp(void *param) {

while (1) {
checkAndPrepareCrashInfo();
if (pMgmt->pData->dropped || pMgmt->pData->stopped) break;
if ((pMgmt->pData->dropped || pMgmt->pData->stopped) && reportThreadSetQuit()) {
break;
}
if (loopTimes++ < reportPeriodNum) {
taosMsleep(sleepTime);
if(loopTimes < 0) loopTimes = reportPeriodNum;
Expand Down
91 changes: 62 additions & 29 deletions source/util/src/tlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -1280,9 +1280,15 @@ void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, vo
taosMemoryFree(pMsg);
}

typedef enum {
CRASH_LOG_WRITER_UNKNOWN = 0,
CRASH_LOG_WRITER_INIT = 1,
CRASH_LOG_WRITER_WAIT,
CRASH_LOG_WRITER_RUNNING,
CRASH_LOG_WRITER_QUIT
} CrashStatus;
typedef struct crashBasicInfo {
bool init;
bool isCrash;
int8_t status;
int64_t clusterId;
int64_t startTime;
char *nodeType;
Expand All @@ -1292,24 +1298,55 @@ typedef struct crashBasicInfo {
} crashBasicInfo;

crashBasicInfo gCrashBasicInfo = {0};
static void writeCrashLogToFileInNewThead() {
if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return;
char *pMsg = NULL;
const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL;
int32_t dflag = 255;
int64_t msgLen = -1;

if (tsEnableCrashReport) {
if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) {
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
} else {
msgLen = strlen(pMsg);
void setCrashWriterStatus(int8_t status) { atomic_store_8(&gCrashBasicInfo.status, status); }
bool reportThreadSetQuit() {
CrashStatus status =
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_QUIT);
if (status == CRASH_LOG_WRITER_INIT) {
return true;
} else {
return false;
}
}

bool setReportThreadWait() {
CrashStatus status =
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_WAIT);
if (status == CRASH_LOG_WRITER_INIT) {
return true;
} else {
return false;
}
}
bool setReportThreadRunning() {
CrashStatus status =
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING);
if (status == CRASH_LOG_WRITER_RUNNING) {
return true;
} else {
return false;
}
}
static void writeCrashLogToFileInNewThead() {
if (setReportThreadRunning()) {
char *pMsg = NULL;
const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL;
int32_t dflag = 255;
int64_t msgLen = -1;

if (tsEnableCrashReport) {
if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) {
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
} else {
msgLen = strlen(pMsg);
}
}
taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo);
setCrashWriterStatus(CRASH_LOG_WRITER_INIT);
tsem_post(&gCrashBasicInfo.sem);
}
taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo);
gCrashBasicInfo.isCrash = false;
tsem_post(&gCrashBasicInfo.sem);
}

void checkAndPrepareCrashInfo() {
Expand All @@ -1322,23 +1359,19 @@ int32_t initCrashLogWriter() {
uError("failed to init sem for crashLogWriter, code:%d", code);
return code;
}
gCrashBasicInfo.isCrash = false;
gCrashBasicInfo.init = true;
setCrashWriterStatus(CRASH_LOG_WRITER_INIT);
return code;
}

void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) {
if (!gCrashBasicInfo.init) {
return;
if (setReportThreadWait()) {
gCrashBasicInfo.clusterId = clusterId;
gCrashBasicInfo.startTime = startTime;
gCrashBasicInfo.nodeType = nodeType;
gCrashBasicInfo.signum = signum;
gCrashBasicInfo.sigInfo = sigInfo;
tsem_wait(&gCrashBasicInfo.sem);
}
gCrashBasicInfo.clusterId = clusterId;
gCrashBasicInfo.startTime = startTime;
gCrashBasicInfo.nodeType = nodeType;
gCrashBasicInfo.signum = signum;
gCrashBasicInfo.sigInfo = sigInfo;
gCrashBasicInfo.isCrash = true;

tsem_wait(&gCrashBasicInfo.sem);
}

void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) {
Expand Down

0 comments on commit 2d99f18

Please sign in to comment.