Merge branch 'master' of github.com:didi/nightingale

This commit is contained in:
UlricQin 2021-08-24 10:35:17 +08:00
commit d92ca5f2a9
11 changed files with 116 additions and 30 deletions

View File

@ -4,16 +4,19 @@ about: Report a bug encountered while operating Nightingale
labels: kind/bug
---
**What happened**:
**问题现象**:
**What you expected to happen**:
**How to reproduce it (as minimally and precisely as possible)**:
**复现方法**:
**Anything else we need to know?**:
**Environment**:
- Nightingale Version (`./n9e-rdb -v`):
- OS Version (`uname -a`):
- Logs (stdout/error/warning/http.request/http.response):
- Others:
**环境信息**:
- 夜莺服务端版本 (通过`./n9e-server -v`可得知版本):
- 夜莺客户端版本 (通过`./n9e-agentd -v`可得知版本):
- 操作系统版本 (通过`uname -a`可得知OS版本):
**日志线索**:
*日志分两部分一个是logs目录下重点查看ERROR日志另一部分是stdout如果是systemd托管的可以通过 `journalctl -u <n9e-server|n9e-agentd> -f` 查看*

View File

@ -86,7 +86,6 @@ func configRoutes(r *gin.Engine) {
pages.DELETE("/user-group/:id", login(), userGroupDel)
pages.GET("/classpaths", login(), classpathListGets)
pages.GET("/classpaths/tree", login(), classpathListNodeGets)
pages.GET("/classpaths/tree-node/:id", login(), classpathListNodeGetsById)
pages.POST("/classpaths", login(), classpathAdd)
pages.PUT("/classpath/:id", login(), classpathPut)
@ -159,6 +158,7 @@ func configRoutes(r *gin.Engine) {
pages.DELETE("/alert-events", login(), alertEventsDel)
pages.GET("/alert-event/:id", login(), alertEventGet)
pages.DELETE("/alert-event/:id", login(), alertEventDel)
pages.PUT("/alert-event/:id", login(), alertEventNotePut)
pages.GET("/history-alert-events", login(), historyAlertEventGets)
pages.GET("/history-alert-event/:id", login(), historyAlertEventGet)
@ -205,7 +205,7 @@ func configRoutes(r *gin.Engine) {
v1.GET("/can-do-op-by-name", login(), canDoOpByName)
v1.GET("/can-do-op-by-token", login(), canDoOpByToken)
v1.GET("/get-user-by-name", login(), getUserByName)
}
push := r.Group("/v1/n9e/series").Use(gzip.Gzip(gzip.DefaultCompression))

View File

@ -54,6 +54,20 @@ func alertEventGet(c *gin.Context) {
renderData(c, ae, nil)
}
type alertEventNoteForm struct {
EventNote string `json:"event_note"`
}
func alertEventNotePut(c *gin.Context) {
var f alertEventNoteForm
bind(c, &f)
me := loginUser(c).MustPerm("alert_event_modify")
ae := AlertEvent(urlParamInt64(c, "id"))
renderMessage(c, models.AlertEventUpdateEventNote(ae.Id, ae.HashId, f.EventNote, me.Id))
}
func alertEventDel(c *gin.Context) {
loginUser(c).MustPerm("alert_event_delete")
renderMessage(c, AlertEvent(urlParamInt64(c, "id")).Del())

View File

@ -90,3 +90,8 @@ func canDoOpByToken(c *gin.Context) {
can, err := user.CanDo(queryStr(c, "op"))
renderData(c, can, err)
}
func getUserByName(c *gin.Context) {
user, err := models.UserGetByUsername(queryStr(c, "name"))
renderData(c, user, err)
}

View File

@ -24,6 +24,7 @@ func classpathListGets(c *gin.Context) {
}, nil)
}
//此api暂时不对外开放
func classpathListNodeGets(c *gin.Context) {
query := queryStr(c, "query", "")

View File

@ -527,6 +527,12 @@ func sendEventIfNeed(status []bool, event *models.AlertEvent, stra *models.Alert
func SendEvent(event *models.AlertEvent) {
// update last event
LastEvents.Set(event)
ae, err := models.AlertEventGet("hash_id = ?", event.HashId)
if err == nil && ae != nil {
logger.Debugf("[event exists do not send again][type:%+v][event:%+v]", event.IsPromePull, event)
return
}
ok := EventQueue.PushFront(event)
if !ok {
logger.Errorf("push event:%v err", event)

View File

@ -17,6 +17,9 @@ type AlertEvent struct {
RuleId int64 `json:"rule_id"`
RuleName string `json:"rule_name"`
RuleNote string `json:"rule_note"`
ProcessorUid int64 `json:"processor_uid"`
ProcessorObj User `json:"processor_user_obj" xorm:"-"`
EventNote string `json:"event_note"`
HashId string `json:"hash_id"` // 唯一标识
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警为1时前端使用 ReadableExpression 拉取最近1小时数据
LastSend bool `json:"last_sent" xorm:"-"` // true 代表上次发了false代表还没发:给prometheus做for判断的
@ -114,6 +117,14 @@ func (ae *AlertEvent) FillObjs() error {
ae.NotifyUserObjs = users
}
if ae.ProcessorUid != 0 {
processor, err := UserGetById(ae.ProcessorUid)
if err != nil {
return err
}
ae.ProcessorObj = *processor
}
return nil
}
@ -245,6 +256,7 @@ func AlertEventGets(stime, etime int64, query string, status, priority int, limi
func AlertEventGet(where string, args ...interface{}) (*AlertEvent, error) {
var obj AlertEvent
has, err := DB.Where(where, args...).Get(&obj)
if err != nil {
logger.Errorf("mysql.error: query alert_event(%s)%+v fail: %s", where, args, err)
return nil, internalServerError
@ -256,3 +268,24 @@ func AlertEventGet(where string, args ...interface{}) (*AlertEvent, error) {
return &obj, nil
}
func AlertEventUpdateEventNote(id int64, hashId string, note string, uid int64) error {
session := DB.NewSession()
defer session.Close()
if err := session.Begin(); err != nil {
return err
}
if _, err := session.Exec("UPDATE alert_event SET event_note = ?, processor_uid = ? WHERE id = ?", note, uid, id); err != nil {
logger.Errorf("mysql.error: update alert_event event_note fail: %v", err)
return err
}
if _, err := session.Exec("UPDATE history_alert_event SET event_note = ?, processor_uid = ? WHERE hash_id = ? ORDER BY id DESC LIMIT 1", note, uid, hashId); err != nil {
logger.Errorf("mysql.error: update history_alert_event event_note fail: %v", err)
return err
}
return session.Commit()
}

View File

@ -28,7 +28,7 @@ type CollectRule struct {
type PortConfig struct {
Port int `json:"port"`
Protocol string `json:"protocol"` // tcp or udp
Timeout int `json:"timeout "` // second
Timeout int `json:"timeout"` // second
}
type ProcConfig struct {
@ -41,7 +41,7 @@ type ScriptConfig struct {
Params string `json:"params"`
Stdin string `json:"stdin"`
Env map[string]string `json:"env"`
Timeout int `json:"timeout "` // second
Timeout int `json:"timeout"` // second
}
type LogConfig struct {

View File

@ -15,6 +15,9 @@ type HistoryAlertEvent struct {
RuleId int64 `json:"rule_id"`
RuleName string `json:"rule_name"`
RuleNote string `json:"rule_note"`
ProcessorUid int64 `json:"processor_uid"`
ProcessorObj User `json:"processor_user_objs" xorm:"-"`
EventNote string `json:"event_note"`
HashId string `json:"hash_id"` // 唯一标识
IsPromePull int `json:"is_prome_pull"` // 代表是否是prometheus pull告警为1时前端使用 ReadableExpression 拉取最近1小时数据
ResClasspaths string `json:"res_classpaths"`
@ -79,6 +82,14 @@ func (hae *HistoryAlertEvent) FillObjs() error {
hae.NotifyUserObjs = users
}
if hae.ProcessorUid != 0 {
processor, err := UserGetById(hae.ProcessorUid)
if err != nil {
return err
}
hae.ProcessorObj = *processor
}
return nil
}

View File

@ -99,6 +99,7 @@ insert into `role_operation`(role_name, operation) values('Standard', 'alert_rul
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_modify');
insert into `role_operation`(role_name, operation) values('Standard', 'alert_rule_delete');
insert into `role_operation`(role_name, operation) values('Standard', 'alert_event_delete');
insert into `role_operation`(role_name, operation) values('Standard', 'alert_event_modify');
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_create');
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_modify');
insert into `role_operation`(role_name, operation) values('Standard', 'collect_rule_delete');
@ -295,6 +296,8 @@ CREATE TABLE `alert_event` (
`rule_id` bigint unsigned not null,
`rule_name` varchar(255) not null,
`rule_note` varchar(512) not null default 'alert rule note',
`processor_uid` bigint NOT NULL default 0,
`event_note` varchar(512) not null default 'alert event note',
`res_classpaths` varchar(1024) not null default '' comment 'belong classpaths',
`priority` tinyint(1) not null,
`status` tinyint(1) not null,
@ -314,24 +317,26 @@ CREATE TABLE `alert_event` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `history_alert_event` (
`id` bigint unsigned NOT NULL AUTO_INCREMENT,
`hash_id` varchar(255) NOT NULL COMMENT 'rule_id + point_pk',
`rule_id` bigint unsigned NOT NULL,
`rule_name` varchar(255) NOT NULL,
`rule_note` varchar(512) NOT NULL DEFAULT 'alert rule note',
`res_classpaths` varchar(1024) NOT NULL DEFAULT '' COMMENT 'belong classpaths',
`priority` tinyint(1) NOT NULL,
`status` tinyint(1) NOT NULL,
`is_prome_pull` tinyint(1) NOT NULL,
`is_recovery` tinyint(1) NOT NULL,
`id` bigint unsigned not null AUTO_INCREMENT,
`hash_id` varchar(255) not null COMMENT 'rule_id + point_pk',
`rule_id` bigint unsigned not null,
`rule_name` varchar(255) not null,
`rule_note` varchar(512) not null default 'alert rule note',
`processor_uid` bigint not null default 0,
`event_note` varchar(512) not null default 'alert event note',
`res_classpaths` varchar(1024) not null default '' COMMENT 'belong classpaths',
`priority` tinyint(1) not null,
`status` tinyint(1) not null,
`is_prome_pull` tinyint(1) not null,
`is_recovery` tinyint(1) not null,
`history_points` text COMMENT 'metric, history points',
`trigger_time` bigint NOT NULL,
`notify_channels` varchar(255) NOT NULL DEFAULT '',
`notify_groups` varchar(255) NOT NULL DEFAULT '',
`notify_users` varchar(255) NOT NULL DEFAULT '',
`runbook_url` varchar(255) DEFAULT NULL,
`readable_expression` varchar(1024) NOT NULL COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
`tags` varchar(1024) NOT NULL DEFAULT 'merge data_tags rule_tags and res_tags',
`trigger_time` bigint not null,
`notify_channels` varchar(255) not null default '',
`notify_groups` varchar(255) not null default '',
`notify_users` varchar(255) not null default '',
`runbook_url` varchar(255) default NULL,
`readable_expression` varchar(1024) not null COMMENT 'e.g. mem.bytes.used.percent(all,60s) > 0',
`tags` varchar(1024) not null default 'merge data_tags rule_tags and res_tags',
PRIMARY KEY (`id`),
KEY `hash_id` (`hash_id`),
KEY `rule_id` (`rule_id`),

View File

@ -153,6 +153,11 @@ func changeCollectRule(rule *models.CollectRule) error {
return err
}
tags := strings.Fields(rule.AppendTags)
for i := 0; i < len(tags); i++ {
tags[i] = strings.Replace(tags[i], "=", ":", 1)
}
config := ScriptCollectFormat{
Instances: []struct {
MinCollectionInterval int `json:"min_collection_interval,omitempty"`
@ -162,6 +167,7 @@ func changeCollectRule(rule *models.CollectRule) error {
Env map[string]string `json:"env"`
Stdin string `json:"stdin"`
Timeout int `json:"timeout"`
Tags []string `json:"tags,omitempty"`
}{{
MinCollectionInterval: rule.Step,
FilePath: conf.Path,
@ -169,6 +175,7 @@ func changeCollectRule(rule *models.CollectRule) error {
Env: conf.Env,
Stdin: conf.Stdin,
Timeout: conf.Timeout,
Tags: tags,
}},
}
@ -257,6 +264,7 @@ type ScriptCollectFormat struct {
Env map[string]string `json:"env"`
Stdin string `json:"stdin"`
Timeout int `json:"timeout"`
Tags []string `json:"tags,omitempty"`
} `json:"instances"`
}