Merge branch 'master' of github.com:didi/nightingale
This commit is contained in:
commit
d92ca5f2a9
|
@ -4,16 +4,19 @@ about: Report a bug encountered while operating Nightingale
|
|||
labels: kind/bug
|
||||
---
|
||||
|
||||
**What happened**:
|
||||
**问题现象**:
|
||||
|
||||
**What you expected to happen**:
|
||||
|
||||
**How to reproduce it (as minimally and precisely as possible)**:
|
||||
**复现方法**:
|
||||
|
||||
**Anything else we need to know?**:
|
||||
|
||||
**Environment**:
|
||||
- Nightingale Version (`./n9e-rdb -v`):
|
||||
- OS Version (`uname -a`):
|
||||
- Logs (stdout/error/warning/http.request/http.response):
|
||||
- Others:
|
||||
**环境信息**:
|
||||
|
||||
- 夜莺服务端版本 (通过`./n9e-server -v`可得知版本):
|
||||
- 夜莺客户端版本 (通过`./n9e-agentd -v`可得知版本):
|
||||
- 操作系统版本 (通过`uname -a`可得知OS版本):
|
||||
|
||||
**日志线索**:
|
||||
|
||||
*日志分两部分,一个是logs目录下,重点查看ERROR日志;另一部分是stdout,如果是systemd托管的,可以通过 `journalctl -u <n9e-server|n9e-agentd> -f` 查看*
|
||||
|
||||
|
|
|
@ -86,7 +86,6 @@ func configRoutes(r *gin.Engine) {
|
|||
pages.DELETE("/user-group/:id", login(), userGroupDel)
|
||||
|
||||
pages.GET("/classpaths", login(), classpathListGets)
|
||||
pages.GET("/classpaths/tree", login(), classpathListNodeGets)
|
||||
pages.GET("/classpaths/tree-node/:id", login(), classpathListNodeGetsById)
|
||||
pages.POST("/classpaths", login(), classpathAdd)
|
||||
pages.PUT("/classpath/:id", login(), classpathPut)
|
||||
|
@ -159,6 +158,7 @@ func configRoutes(r *gin.Engine) {
|
|||
pages.DELETE("/alert-events", login(), alertEventsDel)
|
||||
pages.GET("/alert-event/:id", login(), alertEventGet)
|
||||
pages.DELETE("/alert-event/:id", login(), alertEventDel)
|
||||
pages.PUT("/alert-event/:id", login(), alertEventNotePut)
|
||||
|
||||
pages.GET("/history-alert-events", login(), historyAlertEventGets)
|
||||
pages.GET("/history-alert-event/:id", login(), historyAlertEventGet)
|
||||
|
@ -205,7 +205,7 @@ func configRoutes(r *gin.Engine) {
|
|||
|
||||
v1.GET("/can-do-op-by-name", login(), canDoOpByName)
|
||||
v1.GET("/can-do-op-by-token", login(), canDoOpByToken)
|
||||
|
||||
v1.GET("/get-user-by-name", login(), getUserByName)
|
||||
}
|
||||
|
||||
push := r.Group("/v1/n9e/series").Use(gzip.Gzip(gzip.DefaultCompression))
|
||||
|
|
|
@ -54,6 +54,20 @@ func alertEventGet(c *gin.Context) {
|
|||
renderData(c, ae, nil)
|
||||
}
|
||||
|
||||
type alertEventNoteForm struct {
|
||||
EventNote string `json:"event_note"`
|
||||
}
|
||||
|
||||
func alertEventNotePut(c *gin.Context) {
|
||||
var f alertEventNoteForm
|
||||
bind(c, &f)
|
||||
|
||||
me := loginUser(c).MustPerm("alert_event_modify")
|
||||
ae := AlertEvent(urlParamInt64(c, "id"))
|
||||
|
||||
renderMessage(c, models.AlertEventUpdateEventNote(ae.Id, ae.HashId, f.EventNote, me.Id))
|
||||
}
|
||||
|
||||
func alertEventDel(c *gin.Context) {
|
||||
loginUser(c).MustPerm("alert_event_delete")
|
||||
renderMessage(c, AlertEvent(urlParamInt64(c, "id")).Del())
|
||||
|
|
|
@ -90,3 +90,8 @@ func canDoOpByToken(c *gin.Context) {
|
|||
can, err := user.CanDo(queryStr(c, "op"))
|
||||
renderData(c, can, err)
|
||||
}
|
||||
|
||||
func getUserByName(c *gin.Context) {
|
||||
user, err := models.UserGetByUsername(queryStr(c, "name"))
|
||||
renderData(c, user, err)
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ func classpathListGets(c *gin.Context) {
|
|||
}, nil)
|
||||
}
|
||||
|
||||
//此api暂时不对外开放
|
||||
func classpathListNodeGets(c *gin.Context) {
|
||||
query := queryStr(c, "query", "")
|
||||
|
||||
|
|
|
@ -527,6 +527,12 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
|
|||
func SendEvent(event *models.AlertEvent) {
|
||||
// update last event
|
||||
LastEvents.Set(event)
|
||||
ae, err := models.AlertEventGet("hash_id = ?", event.HashId)
|
||||
if err == nil && ae != nil {
|
||||
logger.Debugf("[event exists do not send again][type:%+v][event:%+v]", event.IsPromePull, event)
|
||||
return
|
||||
}
|
||||
|
||||
ok := EventQueue.PushFront(event)
|
||||
if !ok {
|
||||
logger.Errorf("push event:%v err", event)
|
||||
|
|
|
@ -17,6 +17,9 @@ type AlertEvent struct {
|
|||
RuleId int64 `json:"rule_id"`
|
||||
RuleName string `json:"rule_name"`
|
||||
RuleNote string `json:"rule_note"`
|
||||
ProcessorUid int64 `json:"processor_uid"`
|
||||
ProcessorObj User `json:"processor_user_obj" xorm:"-"`
|
||||
EventNote string `json:"event_note"`
|
||||
HashId string `json:"hash_id"` // 唯一标识
|
||||
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
|
||||
LastSend bool `json:"last_sent" xorm:"-"` // true 代表上次发了,false代表还没发:给prometheus做for判断的
|
||||
|
@ -114,6 +117,14 @@ func (ae *AlertEvent) FillObjs() error {
|
|||
ae.NotifyUserObjs = users
|
||||
}
|
||||
|
||||
if ae.ProcessorUid != 0 {
|
||||
processor, err := UserGetById(ae.ProcessorUid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ae.ProcessorObj = *processor
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -245,6 +256,7 @@ func AlertEventGets(stime, etime int64, query string, status, priority int, limi
|
|||
func AlertEventGet(where string, args ...interface{}) (*AlertEvent, error) {
|
||||
var obj AlertEvent
|
||||
has, err := DB.Where(where, args...).Get(&obj)
|
||||
|
||||
if err != nil {
|
||||
logger.Errorf("mysql.error: query alert_event(%s)%+v fail: %s", where, args, err)
|
||||
return nil, internalServerError
|
||||
|
@ -256,3 +268,24 @@ func AlertEventGet(where string, args ...interface{}) (*AlertEvent, error) {
|
|||
|
||||
return &obj, nil
|
||||
}
|
||||
|
||||
func AlertEventUpdateEventNote(id int64, hashId string, note string, uid int64) error {
|
||||
session := DB.NewSession()
|
||||
defer session.Close()
|
||||
|
||||
if err := session.Begin(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := session.Exec("UPDATE alert_event SET event_note = ?, processor_uid = ? WHERE id = ?", note, uid, id); err != nil {
|
||||
logger.Errorf("mysql.error: update alert_event event_note fail: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err := session.Exec("UPDATE history_alert_event SET event_note = ?, processor_uid = ? WHERE hash_id = ? ORDER BY id DESC LIMIT 1", note, uid, hashId); err != nil {
|
||||
logger.Errorf("mysql.error: update history_alert_event event_note fail: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return session.Commit()
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ type CollectRule struct {
|
|||
type PortConfig struct {
|
||||
Port int `json:"port"`
|
||||
Protocol string `json:"protocol"` // tcp or udp
|
||||
Timeout int `json:"timeout "` // second
|
||||
Timeout int `json:"timeout"` // second
|
||||
}
|
||||
|
||||
type ProcConfig struct {
|
||||
|
@ -41,7 +41,7 @@ type ScriptConfig struct {
|
|||
Params string `json:"params"`
|
||||
Stdin string `json:"stdin"`
|
||||
Env map[string]string `json:"env"`
|
||||
Timeout int `json:"timeout "` // second
|
||||
Timeout int `json:"timeout"` // second
|
||||
}
|
||||
|
||||
type LogConfig struct {
|
||||
|
|
|
@ -15,6 +15,9 @@ type HistoryAlertEvent struct {
|
|||
RuleId int64 `json:"rule_id"`
|
||||
RuleName string `json:"rule_name"`
|
||||
RuleNote string `json:"rule_note"`
|
||||
ProcessorUid int64 `json:"processor_uid"`
|
||||
ProcessorObj User `json:"processor_user_objs" xorm:"-"`
|
||||
EventNote string `json:"event_note"`
|
||||
HashId string `json:"hash_id"` // 唯一标识
|
||||
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警,为1时前端使用 ReadableExpression 拉取最近1小时数据
|
||||
ResClasspaths string `json:"res_classpaths"`
|
||||
|
@ -79,6 +82,14 @@ func (hae *HistoryAlertEvent) FillObjs() error {
|
|||
hae.NotifyUserObjs = users
|
||||
}
|
||||
|
||||
if hae.ProcessorUid != 0 {
|
||||
processor, err := UserGetById(hae.ProcessorUid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
hae.ProcessorObj = *processor
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
39
sql/n9e.sql
39
sql/n9e.sql
|
@ -99,6 +99,7 @@ insert into `role_operation`(role_name, operation) values('Standard', 'alert_rul
|
|||
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_modify');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_delete');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'alert_event_delete');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'alert_event_modify');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_create');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_modify');
|
||||
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_delete');
|
||||
|
@ -295,6 +296,8 @@ CREATE TABLE `alert_event` (
|
|||
`rule_id` bigint unsigned not null,
|
||||
`rule_name` varchar(255) not null,
|
||||
`rule_note` varchar(512) not null default 'alert rule note',
|
||||
`processor_uid` bigint NOT NULL default 0,
|
||||
`event_note` varchar(512) not null default 'alert event note',
|
||||
`res_classpaths` varchar(1024) not null default '' comment 'belong classpaths',
|
||||
`priority` tinyint(1) not null,
|
||||
`status` tinyint(1) not null,
|
||||
|
@ -314,24 +317,26 @@ CREATE TABLE `alert_event` (
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
||||
|
||||
CREATE TABLE `history_alert_event` (
|
||||
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
|
||||
`hash_id` varchar(255) NOT NULL COMMENT 'rule_id + point_pk',
|
||||
`rule_id` bigint unsigned NOT NULL,
|
||||
`rule_name` varchar(255) NOT NULL,
|
||||
`rule_note` varchar(512) NOT NULL DEFAULT 'alert rule note',
|
||||
`res_classpaths` varchar(1024) NOT NULL DEFAULT '' COMMENT 'belong classpaths',
|
||||
`priority` tinyint(1) NOT NULL,
|
||||
`status` tinyint(1) NOT NULL,
|
||||
`is_prome_pull` tinyint(1) NOT NULL,
|
||||
`is_recovery` tinyint(1) NOT NULL,
|
||||
`id` bigint unsigned not null AUTO_INCREMENT,
|
||||
`hash_id` varchar(255) not null COMMENT 'rule_id + point_pk',
|
||||
`rule_id` bigint unsigned not null,
|
||||
`rule_name` varchar(255) not null,
|
||||
`rule_note` varchar(512) not null default 'alert rule note',
|
||||
`processor_uid` bigint not null default 0,
|
||||
`event_note` varchar(512) not null default 'alert event note',
|
||||
`res_classpaths` varchar(1024) not null default '' COMMENT 'belong classpaths',
|
||||
`priority` tinyint(1) not null,
|
||||
`status` tinyint(1) not null,
|
||||
`is_prome_pull` tinyint(1) not null,
|
||||
`is_recovery` tinyint(1) not null,
|
||||
`history_points` text COMMENT 'metric, history points',
|
||||
`trigger_time` bigint NOT NULL,
|
||||
`notify_channels` varchar(255) NOT NULL DEFAULT '',
|
||||
`notify_groups` varchar(255) NOT NULL DEFAULT '',
|
||||
`notify_users` varchar(255) NOT NULL DEFAULT '',
|
||||
`runbook_url` varchar(255) DEFAULT NULL,
|
||||
`readable_expression` varchar(1024) NOT NULL COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
|
||||
`tags` varchar(1024) NOT NULL DEFAULT 'merge data_tags rule_tags and res_tags',
|
||||
`trigger_time` bigint not null,
|
||||
`notify_channels` varchar(255) not null default '',
|
||||
`notify_groups` varchar(255) not null default '',
|
||||
`notify_users` varchar(255) not null default '',
|
||||
`runbook_url` varchar(255) default NULL,
|
||||
`readable_expression` varchar(1024) not null COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
|
||||
`tags` varchar(1024) not null default 'merge data_tags rule_tags and res_tags',
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `hash_id` (`hash_id`),
|
||||
KEY `rule_id` (`rule_id`),
|
||||
|
|
|
@ -153,6 +153,11 @@ func changeCollectRule(rule *models.CollectRule) error {
|
|||
return err
|
||||
}
|
||||
|
||||
tags := strings.Fields(rule.AppendTags)
|
||||
for i := 0; i < len(tags); i++ {
|
||||
tags[i] = strings.Replace(tags[i], "=", ":", 1)
|
||||
}
|
||||
|
||||
config := ScriptCollectFormat{
|
||||
Instances: []struct {
|
||||
MinCollectionInterval int `json:"min_collection_interval,omitempty"`
|
||||
|
@ -162,6 +167,7 @@ func changeCollectRule(rule *models.CollectRule) error {
|
|||
Env map[string]string `json:"env"`
|
||||
Stdin string `json:"stdin"`
|
||||
Timeout int `json:"timeout"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
}{{
|
||||
MinCollectionInterval: rule.Step,
|
||||
FilePath: conf.Path,
|
||||
|
@ -169,6 +175,7 @@ func changeCollectRule(rule *models.CollectRule) error {
|
|||
Env: conf.Env,
|
||||
Stdin: conf.Stdin,
|
||||
Timeout: conf.Timeout,
|
||||
Tags: tags,
|
||||
}},
|
||||
}
|
||||
|
||||
|
@ -257,6 +264,7 @@ type ScriptCollectFormat struct {
|
|||
Env map[string]string `json:"env"`
|
||||
Stdin string `json:"stdin"`
|
||||
Timeout int `json:"timeout"`
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
} `json:"instances"`
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue