Merge branch 'master' of github.com:didi/nightingale
This commit is contained in:
commit
d92ca5f2a9
|
@ -4,16 +4,19 @@ about: Report a bug encountered while operating Nightingale
|
||||||
labels: kind/bug
|
labels: kind/bug
|
||||||
---
|
---
|
||||||
|
|
||||||
**What happened**:
|
**问题现象**:
|
||||||
|
|
||||||
**What you expected to happen**:
|
|
||||||
|
|
||||||
**How to reproduce it (as minimally and precisely as possible)**:
|
**复现方法**:
|
||||||
|
|
||||||
**Anything else we need to know?**:
|
|
||||||
|
|
||||||
**Environment**:
|
**环境信息**:
|
||||||
- Nightingale Version (`./n9e-rdb -v`):
|
|
||||||
- OS Version (`uname -a`):
|
- 夜莺服务端版本 (通过`./n9e-server -v`可得知版本):
|
||||||
- Logs (stdout/error/warning/http.request/http.response):
|
- 夜莺客户端版本 (通过`./n9e-agentd -v`可得知版本):
|
||||||
- Others:
|
- 操作系统版本 (通过`uname -a`可得知OS版本):
|
||||||
|
|
||||||
|
**日志线索**:
|
||||||
|
|
||||||
|
*日志分两部分,一个是logs目录下,重点查看ERROR日志;另一部分是stdout,如果是systemd托管的,可以通过 `journalctl -u <n9e-server|n9e-agentd> -f` 查看*
|
||||||
|
|
||||||
|
|
|
@ -86,7 +86,6 @@ func configRoutes(r *gin.Engine) {
|
||||||
pages.DELETE("/user-group/:id", login(), userGroupDel)
|
pages.DELETE("/user-group/:id", login(), userGroupDel)
|
||||||
|
|
||||||
pages.GET("/classpaths", login(), classpathListGets)
|
pages.GET("/classpaths", login(), classpathListGets)
|
||||||
pages.GET("/classpaths/tree", login(), classpathListNodeGets)
|
|
||||||
pages.GET("/classpaths/tree-node/:id", login(), classpathListNodeGetsById)
|
pages.GET("/classpaths/tree-node/:id", login(), classpathListNodeGetsById)
|
||||||
pages.POST("/classpaths", login(), classpathAdd)
|
pages.POST("/classpaths", login(), classpathAdd)
|
||||||
pages.PUT("/classpath/:id", login(), classpathPut)
|
pages.PUT("/classpath/:id", login(), classpathPut)
|
||||||
|
@ -159,6 +158,7 @@ func configRoutes(r *gin.Engine) {
|
||||||
pages.DELETE("/alert-events", login(), alertEventsDel)
|
pages.DELETE("/alert-events", login(), alertEventsDel)
|
||||||
pages.GET("/alert-event/:id", login(), alertEventGet)
|
pages.GET("/alert-event/:id", login(), alertEventGet)
|
||||||
pages.DELETE("/alert-event/:id", login(), alertEventDel)
|
pages.DELETE("/alert-event/:id", login(), alertEventDel)
|
||||||
|
pages.PUT("/alert-event/:id", login(), alertEventNotePut)
|
||||||
|
|
||||||
pages.GET("/history-alert-events", login(), historyAlertEventGets)
|
pages.GET("/history-alert-events", login(), historyAlertEventGets)
|
||||||
pages.GET("/history-alert-event/:id", login(), historyAlertEventGet)
|
pages.GET("/history-alert-event/:id", login(), historyAlertEventGet)
|
||||||
|
@ -205,7 +205,7 @@ func configRoutes(r *gin.Engine) {
|
||||||
|
|
||||||
v1.GET("/can-do-op-by-name", login(), canDoOpByName)
|
v1.GET("/can-do-op-by-name", login(), canDoOpByName)
|
||||||
v1.GET("/can-do-op-by-token", login(), canDoOpByToken)
|
v1.GET("/can-do-op-by-token", login(), canDoOpByToken)
|
||||||
|
v1.GET("/get-user-by-name", login(), getUserByName)
|
||||||
}
|
}
|
||||||
|
|
||||||
push := r.Group("/v1/n9e/series").Use(gzip.Gzip(gzip.DefaultCompression))
|
push := r.Group("/v1/n9e/series").Use(gzip.Gzip(gzip.DefaultCompression))
|
||||||
|
|
|
@ -54,6 +54,20 @@ func alertEventGet(c *gin.Context) {
|
||||||
renderData(c, ae, nil)
|
renderData(c, ae, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type alertEventNoteForm struct {
|
||||||
|
EventNote string `json:"event_note"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func alertEventNotePut(c *gin.Context) {
|
||||||
|
var f alertEventNoteForm
|
||||||
|
bind(c, &f)
|
||||||
|
|
||||||
|
me := loginUser(c).MustPerm("alert_event_modify")
|
||||||
|
ae := AlertEvent(urlParamInt64(c, "id"))
|
||||||
|
|
||||||
|
renderMessage(c, models.AlertEventUpdateEventNote(ae.Id, ae.HashId, f.EventNote, me.Id))
|
||||||
|
}
|
||||||
|
|
||||||
func alertEventDel(c *gin.Context) {
|
func alertEventDel(c *gin.Context) {
|
||||||
loginUser(c).MustPerm("alert_event_delete")
|
loginUser(c).MustPerm("alert_event_delete")
|
||||||
renderMessage(c, AlertEvent(urlParamInt64(c, "id")).Del())
|
renderMessage(c, AlertEvent(urlParamInt64(c, "id")).Del())
|
||||||
|
|
|
@ -90,3 +90,8 @@ func canDoOpByToken(c *gin.Context) {
|
||||||
can, err := user.CanDo(queryStr(c, "op"))
|
can, err := user.CanDo(queryStr(c, "op"))
|
||||||
renderData(c, can, err)
|
renderData(c, can, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getUserByName(c *gin.Context) {
|
||||||
|
user, err := models.UserGetByUsername(queryStr(c, "name"))
|
||||||
|
renderData(c, user, err)
|
||||||
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ func classpathListGets(c *gin.Context) {
|
||||||
}, nil)
|
}, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//此api暂时不对外开放
|
||||||
func classpathListNodeGets(c *gin.Context) {
|
func classpathListNodeGets(c *gin.Context) {
|
||||||
query := queryStr(c, "query", "")
|
query := queryStr(c, "query", "")
|
||||||
|
|
||||||
|
|
|
@ -527,6 +527,12 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
|
||||||
func SendEvent(event *models.AlertEvent) {
|
func SendEvent(event *models.AlertEvent) {
|
||||||
// update last event
|
// update last event
|
||||||
LastEvents.Set(event)
|
LastEvents.Set(event)
|
||||||
|
ae, err := models.AlertEventGet("hash_id = ?", event.HashId)
|
||||||
|
if err == nil && ae != nil {
|
||||||
|
logger.Debugf("[event exists do not send again][type:%+v][event:%+v]", event.IsPromePull, event)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
ok := EventQueue.PushFront(event)
|
ok := EventQueue.PushFront(event)
|
||||||
if !ok {
|
if !ok {
|
||||||
logger.Errorf("push event:%v err", event)
|
logger.Errorf("push event:%v err", event)
|
||||||
|
|
|
@ -17,6 +17,9 @@ type AlertEvent struct {
|
||||||
RuleId int64 `json:"rule_id"`
|
RuleId int64 `json:"rule_id"`
|
||||||
RuleName string `json:"rule_name"`
|
RuleName string `json:"rule_name"`
|
||||||
RuleNote string `json:"rule_note"`
|
RuleNote string `json:"rule_note"`
|
||||||
|
ProcessorUid int64 `json:"processor_uid"`
|
||||||
|
ProcessorObj User `json:"processor_user_obj" xorm:"-"`
|
||||||
|
EventNote string `json:"event_note"`
|
||||||
HashId string `json:"hash_id"` // 唯一标识
|
HashId string `json:"hash_id"` // 唯一标识
|
||||||
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
|
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
|
||||||
LastSend bool `json:"last_sent" xorm:"-"` // true 代表上次发了,false代表还没发:给prometheus做for判断的
|
LastSend bool `json:"last_sent" xorm:"-"` // true 代表上次发了,false代表还没发:给prometheus做for判断的
|
||||||
|
@ -114,6 +117,14 @@ func (ae *AlertEvent) FillObjs() error {
|
||||||
ae.NotifyUserObjs = users
|
ae.NotifyUserObjs = users
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ae.ProcessorUid != 0 {
|
||||||
|
processor, err := UserGetById(ae.ProcessorUid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ae.ProcessorObj = *processor
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,6 +256,7 @@ func AlertEventGets(stime, etime int64, query string, status, priority int, limi
|
||||||
func AlertEventGet(where string, args ...interface{}) (*AlertEvent, error) {
|
func AlertEventGet(where string, args ...interface{}) (*AlertEvent, error) {
|
||||||
var obj AlertEvent
|
var obj AlertEvent
|
||||||
has, err := DB.Where(where, args...).Get(&obj)
|
has, err := DB.Where(where, args...).Get(&obj)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Errorf("mysql.error: query alert_event(%s)%+v fail: %s", where, args, err)
|
logger.Errorf("mysql.error: query alert_event(%s)%+v fail: %s", where, args, err)
|
||||||
return nil, internalServerError
|
return nil, internalServerError
|
||||||
|
@ -256,3 +268,24 @@ func AlertEventGet(where string, args ...interface{}) (*AlertEvent, error) {
|
||||||
|
|
||||||
return &obj, nil
|
return &obj, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func AlertEventUpdateEventNote(id int64, hashId string, note string, uid int64) error {
|
||||||
|
session := DB.NewSession()
|
||||||
|
defer session.Close()
|
||||||
|
|
||||||
|
if err := session.Begin(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := session.Exec("UPDATE alert_event SET event_note = ?, processor_uid = ? WHERE id = ?", note, uid, id); err != nil {
|
||||||
|
logger.Errorf("mysql.error: update alert_event event_note fail: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := session.Exec("UPDATE history_alert_event SET event_note = ?, processor_uid = ? WHERE hash_id = ? ORDER BY id DESC LIMIT 1", note, uid, hashId); err != nil {
|
||||||
|
logger.Errorf("mysql.error: update history_alert_event event_note fail: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return session.Commit()
|
||||||
|
}
|
||||||
|
|
|
@ -28,7 +28,7 @@ type CollectRule struct {
|
||||||
type PortConfig struct {
|
type PortConfig struct {
|
||||||
Port int `json:"port"`
|
Port int `json:"port"`
|
||||||
Protocol string `json:"protocol"` // tcp or udp
|
Protocol string `json:"protocol"` // tcp or udp
|
||||||
Timeout int `json:"timeout "` // second
|
Timeout int `json:"timeout"` // second
|
||||||
}
|
}
|
||||||
|
|
||||||
type ProcConfig struct {
|
type ProcConfig struct {
|
||||||
|
@ -41,7 +41,7 @@ type ScriptConfig struct {
|
||||||
Params string `json:"params"`
|
Params string `json:"params"`
|
||||||
Stdin string `json:"stdin"`
|
Stdin string `json:"stdin"`
|
||||||
Env map[string]string `json:"env"`
|
Env map[string]string `json:"env"`
|
||||||
Timeout int `json:"timeout "` // second
|
Timeout int `json:"timeout"` // second
|
||||||
}
|
}
|
||||||
|
|
||||||
type LogConfig struct {
|
type LogConfig struct {
|
||||||
|
|
|
@ -15,6 +15,9 @@ type HistoryAlertEvent struct {
|
||||||
RuleId int64 `json:"rule_id"`
|
RuleId int64 `json:"rule_id"`
|
||||||
RuleName string `json:"rule_name"`
|
RuleName string `json:"rule_name"`
|
||||||
RuleNote string `json:"rule_note"`
|
RuleNote string `json:"rule_note"`
|
||||||
|
ProcessorUid int64 `json:"processor_uid"`
|
||||||
|
ProcessorObj User `json:"processor_user_objs" xorm:"-"`
|
||||||
|
EventNote string `json:"event_note"`
|
||||||
HashId string `json:"hash_id"` // 唯一标识
|
HashId string `json:"hash_id"` // 唯一标识
|
||||||
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
|
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
|
||||||
ResClasspaths string `json:"res_classpaths"`
|
ResClasspaths string `json:"res_classpaths"`
|
||||||
|
@ -79,6 +82,14 @@ func (hae *HistoryAlertEvent) FillObjs() error {
|
||||||
hae.NotifyUserObjs = users
|
hae.NotifyUserObjs = users
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if hae.ProcessorUid != 0 {
|
||||||
|
processor, err := UserGetById(hae.ProcessorUid)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
hae.ProcessorObj = *processor
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
39
sql/n9e.sql
39
sql/n9e.sql
|
@ -99,6 +99,7 @@ insert into `role_operation`(role_name, operation) values('Standard', 'alert_rul
|
||||||
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_modify');
|
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_modify');
|
||||||
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_delete');
|
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_delete');
|
||||||
insert into `role_operation`(role_name, operation) values('Standard', 'alert_event_delete');
|
insert into `role_operation`(role_name, operation) values('Standard', 'alert_event_delete');
|
||||||
|
insert into `role_operation`(role_name, operation) values('Standard', 'alert_event_modify');
|
||||||
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_create');
|
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_create');
|
||||||
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_modify');
|
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_modify');
|
||||||
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_delete');
|
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_delete');
|
||||||
|
@ -295,6 +296,8 @@ CREATE TABLE `alert_event` (
|
||||||
`rule_id` bigint unsigned not null,
|
`rule_id` bigint unsigned not null,
|
||||||
`rule_name` varchar(255) not null,
|
`rule_name` varchar(255) not null,
|
||||||
`rule_note` varchar(512) not null default 'alert rule note',
|
`rule_note` varchar(512) not null default 'alert rule note',
|
||||||
|
`processor_uid` bigint NOT NULL default 0,
|
||||||
|
`event_note` varchar(512) not null default 'alert event note',
|
||||||
`res_classpaths` varchar(1024) not null default '' comment 'belong classpaths',
|
`res_classpaths` varchar(1024) not null default '' comment 'belong classpaths',
|
||||||
`priority` tinyint(1) not null,
|
`priority` tinyint(1) not null,
|
||||||
`status` tinyint(1) not null,
|
`status` tinyint(1) not null,
|
||||||
|
@ -314,24 +317,26 @@ CREATE TABLE `alert_event` (
|
||||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||||
|
|
||||||
CREATE TABLE `history_alert_event` (
|
CREATE TABLE `history_alert_event` (
|
||||||
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
|
`id` bigint unsigned not null AUTO_INCREMENT,
|
||||||
`hash_id` varchar(255) NOT NULL COMMENT 'rule_id + point_pk',
|
`hash_id` varchar(255) not null COMMENT 'rule_id + point_pk',
|
||||||
`rule_id` bigint unsigned NOT NULL,
|
`rule_id` bigint unsigned not null,
|
||||||
`rule_name` varchar(255) NOT NULL,
|
`rule_name` varchar(255) not null,
|
||||||
`rule_note` varchar(512) NOT NULL DEFAULT 'alert rule note',
|
`rule_note` varchar(512) not null default 'alert rule note',
|
||||||
`res_classpaths` varchar(1024) NOT NULL DEFAULT '' COMMENT 'belong classpaths',
|
`processor_uid` bigint not null default 0,
|
||||||
`priority` tinyint(1) NOT NULL,
|
`event_note` varchar(512) not null default 'alert event note',
|
||||||
`status` tinyint(1) NOT NULL,
|
`res_classpaths` varchar(1024) not null default '' COMMENT 'belong classpaths',
|
||||||
`is_prome_pull` tinyint(1) NOT NULL,
|
`priority` tinyint(1) not null,
|
||||||
`is_recovery` tinyint(1) NOT NULL,
|
`status` tinyint(1) not null,
|
||||||
|
`is_prome_pull` tinyint(1) not null,
|
||||||
|
`is_recovery` tinyint(1) not null,
|
||||||
`history_points` text COMMENT 'metric, history points',
|
`history_points` text COMMENT 'metric, history points',
|
||||||
`trigger_time` bigint NOT NULL,
|
`trigger_time` bigint not null,
|
||||||
`notify_channels` varchar(255) NOT NULL DEFAULT '',
|
`notify_channels` varchar(255) not null default '',
|
||||||
`notify_groups` varchar(255) NOT NULL DEFAULT '',
|
`notify_groups` varchar(255) not null default '',
|
||||||
`notify_users` varchar(255) NOT NULL DEFAULT '',
|
`notify_users` varchar(255) not null default '',
|
||||||
`runbook_url` varchar(255) DEFAULT NULL,
|
`runbook_url` varchar(255) default NULL,
|
||||||
`readable_expression` varchar(1024) NOT NULL COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
|
`readable_expression` varchar(1024) not null COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
|
||||||
`tags` varchar(1024) NOT NULL DEFAULT 'merge data_tags rule_tags and res_tags',
|
`tags` varchar(1024) not null default 'merge data_tags rule_tags and res_tags',
|
||||||
PRIMARY KEY (`id`),
|
PRIMARY KEY (`id`),
|
||||||
KEY `hash_id` (`hash_id`),
|
KEY `hash_id` (`hash_id`),
|
||||||
KEY `rule_id` (`rule_id`),
|
KEY `rule_id` (`rule_id`),
|
||||||
|
|
|
@ -153,6 +153,11 @@ func changeCollectRule(rule *models.CollectRule) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
tags := strings.Fields(rule.AppendTags)
|
||||||
|
for i := 0; i < len(tags); i++ {
|
||||||
|
tags[i] = strings.Replace(tags[i], "=", ":", 1)
|
||||||
|
}
|
||||||
|
|
||||||
config := ScriptCollectFormat{
|
config := ScriptCollectFormat{
|
||||||
Instances: []struct {
|
Instances: []struct {
|
||||||
MinCollectionInterval int `json:"min_collection_interval,omitempty"`
|
MinCollectionInterval int `json:"min_collection_interval,omitempty"`
|
||||||
|
@ -162,6 +167,7 @@ func changeCollectRule(rule *models.CollectRule) error {
|
||||||
Env map[string]string `json:"env"`
|
Env map[string]string `json:"env"`
|
||||||
Stdin string `json:"stdin"`
|
Stdin string `json:"stdin"`
|
||||||
Timeout int `json:"timeout"`
|
Timeout int `json:"timeout"`
|
||||||
|
Tags []string `json:"tags,omitempty"`
|
||||||
}{{
|
}{{
|
||||||
MinCollectionInterval: rule.Step,
|
MinCollectionInterval: rule.Step,
|
||||||
FilePath: conf.Path,
|
FilePath: conf.Path,
|
||||||
|
@ -169,6 +175,7 @@ func changeCollectRule(rule *models.CollectRule) error {
|
||||||
Env: conf.Env,
|
Env: conf.Env,
|
||||||
Stdin: conf.Stdin,
|
Stdin: conf.Stdin,
|
||||||
Timeout: conf.Timeout,
|
Timeout: conf.Timeout,
|
||||||
|
Tags: tags,
|
||||||
}},
|
}},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,6 +264,7 @@ type ScriptCollectFormat struct {
|
||||||
Env map[string]string `json:"env"`
|
Env map[string]string `json:"env"`
|
||||||
Stdin string `json:"stdin"`
|
Stdin string `json:"stdin"`
|
||||||
Timeout int `json:"timeout"`
|
Timeout int `json:"timeout"`
|
||||||
|
Tags []string `json:"tags,omitempty"`
|
||||||
} `json:"instances"`
|
} `json:"instances"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue