最近在开发中遇到一个链路追踪的需求,先看下最终效果:
发起请求:
$ curl -H "Content-Type:application/json" -X POST -d '{"username":"admin","password":"123456"}' http://127.0.0.1:8000/login
{"code":200,"msg":"ok","data":{"token":"xxx","userinfo":{"id":1,"roles":["admin"],"username":"admin","avatar":"xxx.gif","introduction":"user..","email":"admin@123.com","status":true,"isReset":true,"phone":"xxx","nickName":"taven","createdAt":"2021-12-18T20:20:58+08:00","roleIds":[1]}},"trace_id":"2f2dd12e-7fa0-4978-96c8-b1b010031adb"}
后端日志输出:
即从请求到达后端、再到业务逻辑处理、数据层执行 SQL 等一系列过程都有一个 trace_id
把它们关联起来,通过这一个 trace_id
就可以总览到该次请求的全链路执行情况。
我这里项目使用到有如下库:
- Web 处理层:gin
- 数据层 OR/M:gorm
- 权限校验:casbin
- 日志处理:logrus
由于此时项目很多接口已经成型,手动为每个方法加 trace_id
形参显然工作量太大,这时候我就想到能不能针对每个请求生成一个针对请求上下文全局能够随地获取到的 trace_id
。
对于 Java 而言,它可以通过 ThreadLocal 来获取当前的线程上下文信息,那么对于 Golang,是不是也能通过类似的方式获取到当前的 goroutine 信息?
果然略加搜索后,发现有同道中人和我有相同的想法,参考博文:
在该博文中提到了 Golang 开源的 gls 库来实现类似 Java 中 ThreadLocal 的效果,虽然有提到它性能好像不是很好,但是我这里只是针对公司内部系统的开发,请求量和并发也不高,所以我这里就选定了下面这个 gls 的实现:
下面说明一下具体的实现步骤。
为 Gin 添加 Middleware
创建一个 Middleware 用来生成 trace_id
:
package middleware
import (
"bytes"
"github.com/gin-gonic/gin"
"github.com/jtolds/gls"
uuid "github.com/satori/go.uuid"
"zze/loggers"
"io/ioutil"
)
var LoggerContextMgr = gls.NewContextManager()
func TraceMiddleware() gin.HandlerFunc {
return func(c *gin.Context) {
data, err := c.GetRawData()
if err != nil {
loggers.Logger.Errorf("get request raw data failed, err: %v", err)
c.Abort()
return
}
c.Request.Body = ioutil.NopCloser(bytes.NewBuffer(data))
requestBody := string(data)
traceId := uuid.NewV4().String()
traceContext := &loggers.CmdbLoggerContext{
TraceId: traceId,
RequestBody: requestBody,
}
gls.EnsureGoroutineId(func(gid uint) {
loggers.LoggerContextMgr.SetValues(gls.Values{gid: traceContext}, func() {
c.Next()
})
})
}
}
这里我这个 Middleware 同时把请求体字符串保存到了 goroutine 上下文中。
配置 Gin 使用该 Middleware:
engine := gin.New()
engine.Use(middleware.TraceMiddleware()) // 第一个
engine.Use(middleware.JWTAuthMiddleware())
engine.Use(middleware.Cors()) // 配置跨域
engine.Use(loggers.NewGinLogger())
完成上述配置后,在一个请求到达 Gin 框架后会立马生成一个 trace_id
保存到 goroutine 上下文。
初始化 logrus
logrus 可以通过 hook 的方式为日志扩展添加额外的字段,如下通过 hook 回调方法添加了 trace_id
等字段:
package loggers
import (
"fmt"
"github.com/jtolds/gls"
"github.com/natefinch/lumberjack"
"github.com/sirupsen/logrus"
"zze/app"
"io"
"os"
"path"
"time"
)
type CmdbLoggerContext struct {
Username string
UserId int64
TraceId string
RequestBody string
}
type GlsHook struct{}
func (h *GlsHook) Levels() []logrus.Level {
return logrus.AllLevels
}
func (h *GlsHook) Fire(e *logrus.Entry) error {
gls.EnsureGoroutineId(func(gid uint) {
value, ok := LoggerContextMgr.GetValue(gid)
if ok {
context := value.(*CmdbLoggerContext)
if context.UserId != 0 {
e.Data["user_identifier"] = fmt.Sprintf("%s(%d)", context.Username, context.UserId)
}
if context.TraceId != "" {
e.Data["trace_id"] = context.TraceId
}
if context.RequestBody != "" {
e.Data["request_body"] = context.RequestBody
}
}
})
return nil
}
// gls 全局上下文管理器
var LoggerContextMgr = gls.NewContextManager()
var Logger *logrus.Logger
func GetLogLevel(levelStr string) (level logrus.Level) {
//PANIC FATAL ERROR WARN INFO DEBUG TRACE
switch levelStr {
case "TRACE":
level = logrus.TraceLevel
case "DEBUG":
level = logrus.DebugLevel
case "INFO":
level = logrus.InfoLevel
case "WARN":
level = logrus.WarnLevel
case "ERROR":
level = logrus.ErrorLevel
case "FATAL":
level = logrus.FatalLevel
case "PANIC":
level = logrus.PanicLevel
default:
level = logrus.InfoLevel
}
return
}
func InitLogger() {
//var logFile *os.File
logDir := app.Configs.App.LogDir
nowTime := time.Now()
//日志文件
logFileName := fmt.Sprintf("%s-%s.log", app.Configs.Server.AppName, nowTime.Format("2006-01-02"))
logFilePath := path.Join(logDir, logFileName)
//日志同时输出到标准输出和文件
multiWriter := io.MultiWriter(os.Stdout, &lumberjack.Logger{
Filename: logFilePath, //日志文件位置
MaxSize: 1, // 单文件最大容量,单位是MB
MaxBackups: 3, // 最大保留过期文件个数
MaxAge: 1, // 保留过期文件的最大时间间隔,单位是天
Compress: true, // 是否需要压缩滚动日志, 使用的 gzip 压缩
})
Logger = logrus.New()
//设置日志级别
Logger.SetLevel(GetLogLevel(app.Configs.App.LogLevel))
if app.IsDebug() {
Logger.SetOutput(os.Stdout)
// 日志格式
Logger.SetFormatter(&logrus.TextFormatter{
//TimestampFormat: "2006-01-02 15:04:05",
ForceColors: true,
ForceQuote: true,
DisableTimestamp: true,
})
} else {
Logger.SetOutput(multiWriter)
// 日志格式
Logger.SetFormatter(&logrus.TextFormatter{
TimestampFormat: "2006-01-02 15:04:05",
ForceQuote: true,
QuoteEmptyFields: true,
})
}
Logger.AddHook(&GlsHook{})
}
除了初始化 logrus 实例外,上面还初始化了一个全局 gls 上下文管理器 LoggerContextMgr
。
重写 Gin Logger
由于项目需要统一使用 logrus 做日志输出,而 Gin 默认是有自己的 Logger,为了让 Gin 请求日志通过 logrus 输出,可以基于默认的 Logger 重写一个使用 logrus 做输出的 Logger。
Gin Logger 也是使用 Middleware 的方式嵌入,编写如下 Logger Middleware:
package loggers
import (
"fmt"
"github.com/gin-gonic/gin"
"zze/app"
"strings"
"time"
)
var defaultLogFormatter = func(param gin.LogFormatterParams) string {
var statusColor, methodColor, resetColor string
if app.IsDebug() {
statusColor = param.StatusCodeColor()
methodColor = param.MethodColor()
resetColor = param.ResetColor()
}
if param.Latency > time.Minute {
param.Latency = param.Latency - param.Latency%time.Second
}
return fmt.Sprintf("[GIN] %v |%s %3d %s| %13v | %15s |%s %-7s %s %#v\n%s",
param.TimeStamp.Format("2006/01/02 - 15:04:05"),
statusColor, param.StatusCode, resetColor,
param.Latency,
param.ClientIP,
methodColor, param.Method, resetColor,
param.Path,
param.ErrorMessage,
)
}
func NewGinLogger() gin.HandlerFunc {
conf := gin.LoggerConfig{}
formatter := conf.Formatter
if formatter == nil {
formatter = defaultLogFormatter
}
out := conf.Output
if out == nil {
out = gin.DefaultWriter
}
notlogged := conf.SkipPaths
var skip map[string]struct{}
if length := len(notlogged); length > 0 {
skip = make(map[string]struct{}, length)
for _, path := range notlogged {
skip[path] = struct{}{}
}
}
return func(c *gin.Context) {
// Start timer
start := time.Now()
path := c.Request.URL.Path
raw := c.Request.URL.RawQuery
// Process request
c.Next()
// Log only when path is not being skipped
if _, ok := skip[path]; !ok {
param := gin.LogFormatterParams{
Request: c.Request,
Keys: c.Keys,
}
// Stop timer
param.TimeStamp = time.Now()
param.Latency = param.TimeStamp.Sub(start)
param.ClientIP = c.ClientIP()
param.Method = c.Request.Method
param.StatusCode = c.Writer.Status()
param.ErrorMessage = c.Errors.ByType(gin.ErrorTypePrivate).String()
param.BodySize = c.Writer.Size()
if raw != "" {
path = path + "?" + raw
}
param.Path = path
if strings.HasPrefix(param.Path, "/ping") {
return
}
requestInfoStr := strings.ReplaceAll(strings.TrimSpace(formatter(param)), "\"", "'")
Logger.Infof(requestInfoStr)
}
}
}
配置使用自定义的 Logger:
engine.Use(loggers.NewGinLogger())
重写 Gorm Logger
同理,为了让 Gorm 使用 logrus,也需要重写它的 Logger:
package loggers
import (
"context"
"errors"
"fmt"
"github.com/sirupsen/logrus"
"github.com/tangwu2010/DomeOps/app"
"gorm.io/gorm/logger"
"gorm.io/gorm/utils"
"strings"
"time"
)
type GormLogger struct {
slowThreshold time.Duration
ignoreRecordNotFoundError bool
logLevel logrus.Level
infoStr, warnStr, errStr string
traceStr, traceErrStr, traceWarnStr string
colorInfoStr, colorWarnStr, colorErrStr string
colorTraceStr, colorTraceErrStr, colorTraceWarnStr string
}
func NewGormLogger(logLevel logrus.Level) *GormLogger {
var (
infoStr = "%s\n[info] "
warnStr = "%s\n[warn] "
errStr = "%s\n[error] "
traceStr = "%s\n[%.3fms] [rows:%v] %s"
traceWarnStr = "%s %s\n[%.3fms] [rows:%v] %s"
traceErrStr = "%s %s\n[%.3fms] [rows:%v] %s"
colorInfoStr = "%s\n[info] "
colorWarnStr = "%s\n[warn] "
colorErrStr = "%s\n[error] "
colorTraceStr = "%s\n[%.3fms] [rows:%v] %s"
colorTraceWarnStr = "%s %s\n[%.3fms] [rows:%v] %s"
colorTraceErrStr = "%s %s\n[%.3fms] [rows:%v] %s"
)
colorInfoStr = logger.Green + "%s\n" + logger.Reset + logger.Green + "[info] " + logger.Reset
colorWarnStr = logger.BlueBold + "%s\n" + logger.Reset + logger.Magenta + "[warn] " + logger.Reset
colorErrStr = logger.Magenta + "%s\n" + logger.Reset + logger.Red + "[error] " + logger.Reset
colorTraceStr = logger.Green + "%s\n" + logger.Reset + logger.Yellow + "[%.3fms] " + logger.BlueBold + "[rows:%v]" + logger.Reset + " %s"
colorTraceWarnStr = logger.Green + "%s " + logger.Yellow + "%s\n" + logger.Reset + logger.RedBold + "[%.3fms] " + logger.Yellow + "[rows:%v]" + logger.Magenta + " %s" + logger.Reset
colorTraceErrStr = logger.RedBold + "%s " + logger.MagentaBold + "%s\n" + logger.Reset + logger.Yellow + "[%.3fms] " + logger.BlueBold + "[rows:%v]" + logger.Reset + " %s"
return &GormLogger{
logLevel: logLevel,
slowThreshold: 200 * time.Millisecond,
ignoreRecordNotFoundError: true,
infoStr: infoStr,
warnStr: warnStr,
errStr: errStr,
traceStr: traceStr,
traceWarnStr: traceWarnStr,
traceErrStr: traceErrStr,
colorInfoStr: colorInfoStr,
colorWarnStr: colorWarnStr,
colorErrStr: colorErrStr,
colorTraceStr: colorTraceStr,
colorTraceWarnStr: colorTraceWarnStr,
colorTraceErrStr: colorTraceErrStr,
}
}
// LogMode log mode
func (l *GormLogger) LogMode(level logger.LogLevel) logger.Interface {
l.logLevel = logrus.Level(level)
return l
}
// Info print info
func (l GormLogger) Info(ctx context.Context, msg string, data ...interface{}) {
if l.logLevel >= logrus.InfoLevel {
l.printf(logrus.InfoLevel, l.infoStr+msg, l.colorInfoStr+msg, append([]interface{}{utils.FileWithLineNum()}, data...)...)
}
}
// Warn print warn messages
func (l GormLogger) Warn(ctx context.Context, msg string, data ...interface{}) {
if l.logLevel >= logrus.WarnLevel {
l.printf(logrus.WarnLevel, l.warnStr+msg, l.colorWarnStr+msg, append([]interface{}{utils.FileWithLineNum()}, data...)...)
}
}
// Error print error messages
func (l GormLogger) Error(ctx context.Context, msg string, data ...interface{}) {
if l.logLevel >= logrus.ErrorLevel {
l.printf(logrus.ErrorLevel, l.errStr+msg, l.colorErrStr+msg, append([]interface{}{utils.FileWithLineNum()}, data...)...)
}
}
// Trace print sql message
func (l GormLogger) Trace(ctx context.Context, begin time.Time, fc func() (string, int64), err error) {
elapsed := time.Since(begin)
switch {
case err != nil && l.logLevel >= logrus.ErrorLevel && (!errors.Is(err, logger.ErrRecordNotFound) || !l.ignoreRecordNotFoundError):
sql, rows := fc()
if rows == -1 {
l.printf(logrus.ErrorLevel, l.traceErrStr, l.colorTraceErrStr, utils.FileWithLineNum(), err, float64(elapsed.Nanoseconds())/1e6, "-", sql)
} else {
l.printf(logrus.ErrorLevel, l.traceErrStr, l.colorTraceErrStr, utils.FileWithLineNum(), err, float64(elapsed.Nanoseconds())/1e6, rows, sql)
}
case elapsed > l.slowThreshold && l.slowThreshold != 0 && l.logLevel >= logrus.WarnLevel:
sql, rows := fc()
slowLog := fmt.Sprintf("SLOW SQL >= %v", l.slowThreshold)
if rows == -1 {
l.printf(logrus.WarnLevel, l.traceWarnStr, l.colorTraceWarnStr, utils.FileWithLineNum(), slowLog, float64(elapsed.Nanoseconds())/1e6, "-", sql)
} else {
l.printf(logrus.WarnLevel, l.traceWarnStr, l.colorTraceWarnStr, utils.FileWithLineNum(), slowLog, float64(elapsed.Nanoseconds())/1e6, rows, sql)
}
case l.logLevel >= logrus.InfoLevel:
sql, rows := fc()
if rows == -1 {
l.printf(logrus.InfoLevel, l.traceStr, l.colorTraceStr, utils.FileWithLineNum(), float64(elapsed.Nanoseconds())/1e6, "-", sql)
} else {
l.printf(logrus.InfoLevel, l.traceStr, l.colorTraceStr, utils.FileWithLineNum(), float64(elapsed.Nanoseconds())/1e6, rows, sql)
}
}
}
func (l GormLogger) printf(level logrus.Level, msg string, colorfulMsg string, data ...interface{}) {
if app.IsDebug() {
msg = colorfulMsg
}
msg = strings.ReplaceAll(msg, "\n", " ")
switch level {
case logrus.TraceLevel:
Logger.Tracef(msg, data...)
case logrus.DebugLevel:
Logger.Debugf(msg, data...)
case logrus.InfoLevel:
Logger.Infof(msg, data...)
case logrus.WarnLevel:
Logger.Warnf(msg, data...)
case logrus.ErrorLevel:
logrus.Errorf(msg, data...)
}
}
配置 Gorm 使用自定义的 Logger:
package models
import (
"fmt"
"zze/app"
"zze/loggers"
"strings"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"log"
)
var Orm *gorm.DB
func InitDB() {
var err error
// 创建自定义 Logger 实例
gormLogger := loggers.NewGormLogger(loggers.Logger.GetLevel())
dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8&parseTime=True&loc=Local",
app.Configs.Mysql.User,
app.Configs.Mysql.Passwd,
app.Configs.Mysql.Host,
app.Configs.Mysql.Port,
app.Configs.Mysql.Name,
)
loggers.Logger.Infof("init dsn: %s", strings.ReplaceAll(dsn, app.Configs.Mysql.Passwd, "***"))
Orm, err = gorm.Open(mysql.New(mysql.Config{
DSN: dsn, // DSN data source name
DefaultStringSize: 256, // string 类型字段的默认长度
DisableDatetimePrecision: true, // 禁用 datetime 精度,MySQL 5.6 之前的数据库不支持
DontSupportRenameIndex: true, // 重命名索引时采用删除并新建的方式,MySQL 5.7 之前的数据库和 MariaDB 不支持重命名索引
DontSupportRenameColumn: true, // 用 `change` 重命名列,MySQL 8 之前的数据库和 MariaDB 不支持重命名列
SkipInitializeWithVersion: false, // 根据当前 MySQL 版本自动配置
}), &gorm.Config{
// 禁止创建物理外键
DisableForeignKeyConstraintWhenMigrating: true,
Logger: gormLogger,
})
if err != nil {
loggers.Logger.Fatalf("connect to db server failed, err: %v", err)
}
sqlDB, err := Orm.DB()
if err != nil {
loggers.Logger.Fatalf("create database instance failed, err: %v", err)
}
sqlDB.SetMaxOpenConns(50)
sqlDB.SetMaxIdleConns(10)
// 修改字段类型
//db.Model(&AlertDB{}).ModifyColumn("ends_at", "datetime")
//db.Model(&AlertDB{}).ModifyColumn("starts_at", "datetime")
err = Orm.AutoMigrate(&Project{}, &Application{}, &DeployEnv{}, &K8sConfig{})
if err != nil {
loggers.Logger.Fatalf("init database failed, err: %v", err)
}
}
给 Gin 响应添加 trace_id
我这里响应请求统一使用了如下结构体方法:
import (
"github.com/gin-gonic/gin"
"github.com/jtolds/gls"
"zze/loggers"
"net/http"
)
// swagger:model
type Response struct {
c *gin.Context
// 0: 失败 1: 成功
Code int `json:"code"`
// 错误时返回错误信息
Msg string `json:"msg"`
Data interface{} `json:"data"`
TraceId string `json:"trace_id"`
}
func NewResponse(c *gin.Context) *Response {
return &Response{c: c}
}
func (r *Response) SuccessResponse(msg string, data interface{}) {
r.Response(1, msg, data)
}
func (r *Response) FailedResponse(msg string, data interface{}) {
r.Response(0, msg, data)
}
func (r *Response) Response(code int, msg string, data interface{}) {
r.Code = code
r.Msg = msg
r.Data = data
gls.EnsureGoroutineId(func(gid uint) {
value, ok := loggers.LoggerContextMgr.GetValue(gid)
if ok {
context := value.(*loggers.CmdbLoggerContext)
r.TraceId = context.TraceId
}
r.c.JSON(http.StatusOK, r)
})
}
至此各个组件就都统一使用 logrus 做日志输出了,而 logrus 会取当前 goroutine 上下文中的 trace_id
并将其添加到每条日志的 trace_id
字段,就实现了同一个请求相关联的日志都有一个相同的 trace_id
,即完成了日志链路追踪的效果。
评论区