diff --git a/dockerfiles/n9e/Dockerfile b/dockerfiles/n9e/Dockerfile index 8a846109..acff95d2 100644 --- a/dockerfiles/n9e/Dockerfile +++ b/dockerfiles/n9e/Dockerfile @@ -1,6 +1,6 @@ FROM golang as builder WORKDIR /home/n9e/builder -ENV GOPROXY=https://mirrors.aliyun.com/goproxy/ +ENV GOPROXY=https://goproxy.cn,direct RUN git clone https://hub.fastgit.org/didi/nightingale.git RUN nightingale/control build diff --git a/etc/dict.json b/etc/dict.json index 9a30346d..2b057680 100644 --- a/etc/dict.json +++ b/etc/dict.json @@ -59,15 +59,6 @@ "cannot delete root user": "root用户不能删除", "user not found": "用户未找到", - "Repositories": "Repositories", - "List of repositories to monitor": "List of repositories to monitor", - "Access token": "Access token", - "Github API access token. Unauthenticated requests are limited to 60 per hour": "Github API access token. Unauthenticated requests are limited to 60 per hour", - "Enterprise base url": "Enterprise base url", - "Github API enterprise url. Github Enterprise accounts must specify their base url": "Github API enterprise url. Github Enterprise accounts must specify their base url", - "HTTP timeout": "HTTP timeout", - "Timeout for HTTP requests": "Timeout for HTTP requests", - "Unable to get captcha": "无法获得验证码", "Invalid captcha answer": "错误的验证码", "Username %s is invalid": "用户名 %s 不符合规范", diff --git a/etc/plugins/github.yml b/etc/plugins/github.yml new file mode 100644 index 00000000..65b8c083 --- /dev/null +++ b/etc/plugins/github.yml @@ -0,0 +1,9 @@ +mode: whitelist # whitelist(default),all +metrics: + - name: github_repository_forks + - name: github_repository_networks + - name: github_repository_open_issues + - name: github_repository_size + - name: github_repository_stars + - name: github_repository_subscribers + - name: github_repository_watchers diff --git a/etc/plugins/mongodb.yml b/etc/plugins/mongodb.yml index 6dd5467d..9dcf9fae 100644 --- a/etc/plugins/mongodb.yml +++ b/etc/plugins/mongodb.yml @@ -1 +1,183 @@ -mode: overlay +mode: whitelist # whitelist(default),all + +metrics: + - name: mongodb_assert_msg + type: COUNTER + - name: mongodb_assert_regular + type: COUNTER + - name: mongodb_assert_rollovers + type: COUNTER + - name: mongodb_assert_user + type: COUNTER + - name: mongodb_assert_warning + type: COUNTER + - name: mongodb_commands + type: COUNTER + - name: mongodb_count_command_failed + type: COUNTER + - name: mongodb_count_command_total + type: COUNTER + - name: mongodb_connections_available + - name: mongodb_connections_current + - name: mongodb_connections_total_created + type: COUNTER + +trash: + - name: mongodb_active_reads + type: COUNTER + - name: mongodb_active_writes + type: COUNTER + - name: mongodb_aggregate_command_failed + type: COUNTER + - name: mongodb_aggregate_command_total + type: COUNTER + - name: mongodb_available_reads + - name: mongodb_available_writes + - name: mongodb_col_stats_avg_obj_size + - name: mongodb_col_stats_count + - name: mongodb_col_stats_ok + - name: mongodb_col_stats_size + - name: mongodb_col_stats_storage_size + - name: mongodb_col_stats_total_index_size + - name: mongodb_commands_per_sec + - name: mongodb_cursor_no_timeout + - name: mongodb_cursor_no_timeout_count + - name: mongodb_cursor_pinned + - name: mongodb_cursor_pinned_count + - name: mongodb_cursor_timed_out + - name: mongodb_cursor_timed_out_count + - name: mongodb_cursor_total + - name: mongodb_cursor_total_count + - name: mongodb_db_stats_avg_obj_size + - name: mongodb_db_stats_collections + - name: mongodb_db_stats_data_size + - name: mongodb_db_stats_index_size + - name: mongodb_db_stats_indexes + - name: mongodb_db_stats_num_extents + - name: mongodb_db_stats_objects + - name: mongodb_db_stats_ok + - name: mongodb_db_stats_storage_size + - name: mongodb_delete_command_failed + type: COUNTER + - name: mongodb_delete_command_total + type: COUNTER + - name: mongodb_deletes + - name: mongodb_deletes_per_sec + - name: mongodb_distinct_command_failed + type: COUNTER + - name: mongodb_distinct_command_total + type: COUNTER + - name: mongodb_document_deleted + - name: mongodb_document_inserted + - name: mongodb_document_returned + - name: mongodb_document_updated + - name: mongodb_find_and_modify_command_failed + type: COUNTER + - name: mongodb_find_and_modify_command_total + type: COUNTER + - name: mongodb_find_command_failed + type: COUNTER + - name: mongodb_find_command_total + type: COUNTER + - name: mongodb_flushes + type: COUNTER + - name: mongodb_flushes_per_sec + - name: mongodb_flushes_total_time_ns + type: COUNTER + - name: mongodb_get_more_command_failed + type: COUNTER + - name: mongodb_get_more_command_total + type: COUNTER + - name: mongodb_getmores + - name: mongodb_getmores_per_sec + - name: mongodb_insert_command_failed + type: COUNTER + - name: mongodb_insert_command_total + type: COUNTER + - name: mongodb_inserts + - name: mongodb_inserts_per_sec + - name: mongodb_jumbo_chunks + - name: mongodb_latency_commands + type: COUNTER + - name: mongodb_latency_commands_count + type: COUNTER + - name: mongodb_latency_reads + - name: mongodb_latency_reads_count + - name: mongodb_latency_writes + - name: mongodb_latency_writes_count + - name: mongodb_net_in_bytes + - name: mongodb_net_in_bytes_count + - name: mongodb_net_out_bytes + - name: mongodb_net_out_bytes_count + - name: mongodb_open_connections + - name: mongodb_operation_scan_and_order + - name: mongodb_operation_write_conflicts + - name: mongodb_page_faults + type: COUNTER + - name: mongodb_percent_cache_dirty + - name: mongodb_percent_cache_used + - name: mongodb_resident_megabytes + - name: mongodb_storage_freelist_search_bucket_exhausted + - name: mongodb_storage_freelist_search_requests + - name: mongodb_storage_freelist_search_scanned + - name: mongodb_tcmalloc_central_cache_free_bytes + - name: mongodb_tcmalloc_current_allocated_bytes + - name: mongodb_tcmalloc_current_total_thread_cache_bytes + - name: mongodb_tcmalloc_heap_size + - name: mongodb_tcmalloc_max_total_thread_cache_bytes + - name: mongodb_tcmalloc_pageheap_commit_count + - name: mongodb_tcmalloc_pageheap_committed_bytes + - name: mongodb_tcmalloc_pageheap_decommit_count + - name: mongodb_tcmalloc_pageheap_free_bytes + - name: mongodb_tcmalloc_pageheap_reserve_count + - name: mongodb_tcmalloc_pageheap_scavenge_count + - name: mongodb_tcmalloc_pageheap_total_commit_bytes + - name: mongodb_tcmalloc_pageheap_total_decommit_bytes + - name: mongodb_tcmalloc_pageheap_total_reserve_bytes + - name: mongodb_tcmalloc_pageheap_unmapped_bytes + - name: mongodb_tcmalloc_spinlock_total_delay_ns + - name: mongodb_tcmalloc_thread_cache_free_bytes + - name: mongodb_tcmalloc_total_free_bytes + - name: mongodb_tcmalloc_transfer_cache_free_bytes + - name: mongodb_total_available + - name: mongodb_total_created + type: COUNTER + - name: mongodb_total_docs_scanned + - name: mongodb_total_in_use + - name: mongodb_total_keys_scanned + - name: mongodb_total_refreshing + - name: mongodb_total_tickets_reads + - name: mongodb_total_tickets_writes + - name: mongodb_ttl_deletes + - name: mongodb_ttl_deletes_per_sec + - name: mongodb_ttl_passes + - name: mongodb_ttl_passes_per_sec + - name: mongodb_update_command_failed + type: COUNTER + - name: mongodb_update_command_total + type: COUNTER + - name: mongodb_updates + - name: mongodb_updates_per_sec + - name: mongodb_uptime_ns + - name: mongodb_vsize_megabytes + - name: mongodb_wtcache_app_threads_page_read_count + type: COUNTER + - name: mongodb_wtcache_app_threads_page_read_time + type: COUNTER + - name: mongodb_wtcache_app_threads_page_write_count + type: COUNTER + - name: mongodb_wtcache_bytes_read_into + - name: mongodb_wtcache_bytes_written_from + - name: mongodb_wtcache_current_bytes + - name: mongodb_wtcache_internal_pages_evicted + - name: mongodb_wtcache_max_bytes_configured + - name: mongodb_wtcache_modified_pages_evicted + - name: mongodb_wtcache_pages_evicted_by_app_thread + - name: mongodb_wtcache_pages_queued_for_eviction + - name: mongodb_wtcache_pages_read_into + - name: mongodb_wtcache_pages_requested_from + - name: mongodb_wtcache_pages_written_from + - name: mongodb_wtcache_server_evicting_pages + - name: mongodb_wtcache_tracked_dirty_bytes + - name: mongodb_wtcache_unmodified_pages_evicted + - name: mongodb_wtcache_worker_thread_evictingpages diff --git a/etc/plugins/mysql.yml b/etc/plugins/mysql.yml index afa2298e..07f34de3 100644 --- a/etc/plugins/mysql.yml +++ b/etc/plugins/mysql.yml @@ -1,4 +1,4 @@ -mode: whitelist # whitelist(default),overlay +mode: whitelist # whitelist(default),all metrics: - name: mysql_queries type: COUNTER diff --git a/etc/plugins/redis.yml b/etc/plugins/redis.yml index 6dd5467d..45964710 100644 --- a/etc/plugins/redis.yml +++ b/etc/plugins/redis.yml @@ -1 +1,82 @@ -mode: overlay +mode: whitelist # whitelist(default),all + +metrics: + - name: redis_maxmemory + - name: redis_used_memory + - name: redis_used_memory_peak + - name: redis_used_memory_rss + - name: redis_mem_fragmentation_ratio + - name: redis_total_commands_processed + type: COUNTER + - name: redis_total_connections_received + type: COUNTER + - name: redis_expired_keys + - name: mongodb_queries + - name: mongodb_queries_per_sec + - name: mongodb_queued_reads + - name: mongodb_queued_writes + +trash: + - name: redis_aof_current_rewrite_time_sec + - name: redis_aof_enabled + - name: redis_aof_last_rewrite_time_sec + - name: redis_aof_rewrite_in_progress + - name: redis_aof_rewrite_scheduled + - name: redis_blocked_clients + - name: redis_client_biggest_input_buf + - name: redis_client_longest_output_list + - name: redis_clients + - name: redis_cluster_enabled + - name: redis_cmdstat_calls + - name: redis_cmdstat_usec + - name: redis_cmdstat_usec_per_call + - name: redis_connected_slaves + - name: redis_evicted_keys + - name: redis_instantaneous_input_kbps + - name: redis_instantaneous_ops_per_sec + - name: redis_instantaneous_output_kbps + - name: redis_keyspace_avg_ttl + - name: redis_keyspace_expires + - name: redis_keyspace_hitrate + - name: redis_keyspace_hits + type: COUNTER + - name: redis_keyspace_keys + - name: redis_keyspace_misses + type: COUNTER + - name: redis_latest_fork_usec + - name: redis_loading + - name: redis_lru_clock + type: COUNTER + - name: redis_master_repl_offset + - name: redis_migrate_cached_sockets + - name: redis_pubsub_channels + - name: redis_pubsub_patterns + - name: redis_rdb_bgsave_in_progress + - name: redis_rdb_changes_since_last_save + - name: redis_rdb_current_bgsave_time_sec + - name: redis_rdb_last_bgsave_time_sec + - name: redis_rdb_last_save_time + type: COUNTER + - name: redis_rdb_last_save_time_elapsed + - name: redis_rejected_connections + - name: redis_repl_backlog_active + - name: redis_repl_backlog_first_byte_offset + - name: redis_repl_backlog_histlen + - name: redis_repl_backlog_size + - name: redis_sync_full + - name: redis_sync_partial_err + - name: redis_sync_partial_ok + - name: redis_total_net_input_bytes + type: COUNTER + - name: redis_total_net_output_bytes + type: COUNTER + - name: redis_total_system_memory + type: COUNTER + - name: redis_uptime + type: COUNTER + - name: redis_used_cpu_sys + - name: redis_used_cpu_sys_children + - name: redis_used_cpu_user + - name: redis_used_cpu_user_children + - name: redis_used_memory_lua + diff --git a/etc/prober.yml b/etc/prober.yml index 9ef20f1d..19518c0c 100644 --- a/etc/prober.yml +++ b/etc/prober.yml @@ -1,8 +1,14 @@ -region: default workerProcesses: 5 + logger: dir: logs/prober - level: DEBUG + level: INFO keepHours: 24 pluginsConfig: etc/plugins -ignoreConfig: false + +report: + enabled: true + region: default + interval: 4000 + timeout: 3000 + api: api/hbs/heartbeat diff --git a/etc/screen/linux_host b/etc/screen/linux_host index ef8c5adc..222026c9 100644 --- a/etc/screen/linux_host +++ b/etc/screen/linux_host @@ -1,38 +1,38 @@ [ - { - "name": "巡检大盘", - "node_path": "", - "tags": [ - { - "name": "系统指标", - "weight": 0, - "charts": [ - { - "configs": "{\"title\":\"\",\"type\":\"chart\",\"now\":\"1602317728057\",\"start\":\"1602314128057\",\"end\":\"1602317728057\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"threshold\":50,\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":38,\"endpointsKey\":\"endpoints\",\"endpoints\":[\"10.179.23.35\",\"10.86.76.13\",\"10.86.92.17\",\"172.25.79.3\"],\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"cpu.idle\",\"selectedTagkv\":[],\"counterListCount\":2,\"aggrFunc\":\"min\"}],\"id\":2}", - "weight": 0 - }, - { - "configs": "{\"title\":\"\",\"type\":\"chart\",\"now\":\"1607929052831\",\"start\":\"1607925452831\",\"end\":\"1607929052831\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":38,\"endpointsKey\":\"endpoints\",\"endpoints\":[\"10.178.25.123\",\"10.86.92.17\",\"208181fc148d4548b61641421f2cddcb\"],\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"mem.bytes.used.percent\",\"aggrFunc\":\"max\",\"selectedTagkv\":[],\"counterListCount\":1}],\"chartTypeOptions\":{\"chartType\":\"singleValue\",\"targetValue\":\"avg\",\"subType\":\"liquidFillGauge\",\"valueMap\":\"range\",\"mapConf\":[{\"from\":0,\"to\":40,\"color\":\"#38c75c\"},{\"from\":40,\"to\":80,\"color\":\"#e7b836\"},{\"from\":80,\"to\":100,\"color\":\"#e52a2a\"}],\"suffix\":\"\"},\"id\":15}", - "weight": 1 - }, - { - "configs": "{\"title\":\"\",\"type\":\"chart\",\"now\":\"1607929254144\",\"start\":\"1607925654144\",\"end\":\"1607929254144\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":38,\"endpointsKey\":\"endpoints\",\"endpoints\":[\"10.178.25.123\",\"10.86.92.17\",\"208181fc148d4548b61641421f2cddcb\"],\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"mem.bytes.used.percent\",\"selectedTagkv\":[],\"counterListCount\":1,\"aggrFunc\":\"max\"}],\"chartTypeOptions\":{\"chartType\":\"singleValue\",\"targetValue\":\"avg\",\"subType\":\"solidGauge\",\"valueMap\":\"range\",\"mapConf\":[{\"from\":0,\"to\":90,\"color\":\"#2ebd4a\"}]}}", - "weight": 2 - }, - { - "configs": "{\"title\":\"agent状态\",\"type\":\"chart\",\"now\":\"1607929022984\",\"start\":\"1607925422984\",\"end\":\"1607929022984\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":38,\"endpointsKey\":\"endpoints\",\"endpoints\":[\"10.178.25.123\",\"10.86.92.17\",\"208181fc148d4548b61641421f2cddcb\"],\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"proc.agent.alive\",\"selectedTagkv\":[],\"counterListCount\":1,\"aggrFunc\":\"max\"}],\"chartTypeOptions\":{\"chartType\":\"singleValue\",\"targetValue\":\"avg\",\"subType\":\"normal\",\"valueMap\":\"value\",\"mapConf\":[{\"value\":1,\"color\":\"#34ef41\",\"text\":\"OK\"},{\"value\":0,\"color\":\"#f52e2e\",\"text\":\"ERR\"}]}}", - "weight": 3 - }, - { - "configs": "{\"title\":\"扇形\",\"type\":\"chart\",\"now\":\"1607929254144\",\"start\":\"1607925654144\",\"end\":\"1607929254144\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":38,\"endpointsKey\":\"endpoints\",\"endpoints\":[\"10.178.25.123\",\"10.86.92.17\",\"208181fc148d4548b61641421f2cddcb\"],\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"disk.bytes.used.percent\",\"selectedTagkv\":[{\"tagk\":\"endpoint\",\"tagv\":[\"10.178.25.123\",\"10.86.92.17\"]},{\"tagk\":\"mount\",\"tagv\":[\"=all\"]}],\"counterListCount\":1}],\"chartTypeOptions\":{\"chartType\":\"pie\",\"pieType\":\"pie\",\"targetValue\":\"avg\"},\"id\":27}", - "weight": 4 - }, - { - "configs": "{\"title\":\"\",\"type\":\"chart\",\"now\":\"1602317048253\",\"start\":\"1602313448253\",\"end\":\"1602317048253\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":38,\"endpointsKey\":\"endpoints\",\"endpoints\":[\"10.179.23.35\",\"10.86.76.13\",\"10.86.92.17\",\"172.25.79.3\"],\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"mem.bytes.used.percent\",\"selectedTagkv\":[],\"counterListCount\":2}],\"chartTypeOptions\":{\"chartType\":\"table\",\"tableType\":\"current\",\"columnsKey\":[\"avg\",\"last\"],\"valueMap\":\"range\",\"mapConf\":[{}]},\"id\":16}", - "weight": 5 - } + { + "name": "日常巡检大盘", + "node_path": "", + "tags": [ + { + "name": "机器负载", + "weight": 0, + "charts": [ + { + "configs": "{\"title\":\"CPU使用率\",\"type\":\"chart\",\"now\":\"1611191937020\",\"start\":\"1611188337020\",\"end\":\"1611191937020\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"threshold\":75,\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":1,\"endpointsKey\":\"endpoints\",\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"cpu.util\",\"selectedTagkv\":[],\"counterListCount\":6}],\"id\":152}", + "weight": 0 + }, + { + "configs": "{\"title\":\"进程总数\",\"type\":\"chart\",\"now\":\"1611192346064\",\"start\":\"1611188746064\",\"end\":\"1611192346064\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":1,\"endpointsKey\":\"endpoints\",\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"sys.ps.process.total\",\"selectedTagkv\":[],\"counterListCount\":6,\"aggrFunc\":\"avg\"}],\"chartTypeOptions\":{\"chartType\":\"singleValue\",\"targetValue\":\"avg\",\"subType\":\"normal\",\"valueMap\":\"range\",\"mapConf\":[{\"from\":0,\"to\":3000,\"color\":\"#07cf82\"},{\"from\":3000,\"to\":5000,\"color\":\"#ef900b\"},{\"from\":5000,\"to\":999999,\"color\":\"#f50505\"}],\"suffix\":\"个\"},\"id\":10}", + "weight": 1 + }, + { + "configs": "{\"title\":\"硬盘使用率\",\"type\":\"chart\",\"now\":\"1611211473892\",\"start\":\"1611207873892\",\"end\":\"1611211473892\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":1,\"endpointsKey\":\"endpoints\",\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"disk.bytes.used.percent\",\"selectedTagkv\":[{\"tagk\":\"endpoint\",\"tagv\":[\"=all\"]},{\"tagk\":\"mount\",\"tagv\":[\"=all\"]}],\"counterListCount\":11}],\"chartTypeOptions\":{\"chartType\":\"table\",\"tableType\":\"stats\",\"columnsKey\":[\"last\"],\"valueMap\":\"range\",\"mapConf\":[{}]},\"id\":13,\"sortOrder\":{\"columnKey\":\"last\",\"order\":\"descend\"}}", + "weight": 2 + }, + { + "configs": "{\"title\":\"内存使用率均值-纯演示扇形图\",\"type\":\"chart\",\"now\":\"1611231230645\",\"start\":\"1611227630645\",\"end\":\"1611231230645\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":1,\"endpointsKey\":\"endpoints\",\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"mem.bytes.used.percent\",\"selectedTagkv\":[{\"tagk\":\"endpoint\",\"tagv\":[\"=all\"]}],\"counterListCount\":6,\"aggrFunc\":\"avg\"}],\"chartTypeOptions\":{\"chartType\":\"singleValue\",\"targetValue\":\"avg\",\"subType\":\"solidGauge\",\"valueMap\":\"range\",\"mapConf\":[{\"from\":0,\"to\":40,\"color\":\"#06c663\"},{\"from\":40,\"to\":70,\"color\":\"#da8e0b\"},{\"from\":70,\"to\":90,\"color\":\"#f48b71\"},{\"from\":90,\"to\":111,\"color\":\"#fa0505\"}]},\"id\":11}", + "weight": 3 + }, + { + "configs": "{\"title\":\"IO使用率-纯演示水位图\",\"type\":\"chart\",\"now\":\"1611231230645\",\"start\":\"1611227630645\",\"end\":\"1611231230645\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":1,\"endpointsKey\":\"endpoints\",\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"disk.io.util\",\"selectedTagkv\":[{\"tagk\":\"endpoint\",\"tagv\":[\"=all\"]},{\"tagk\":\"device\",\"tagv\":[\"=all\"]}],\"counterListCount\":24,\"aggrFunc\":\"avg\"}],\"chartTypeOptions\":{\"chartType\":\"singleValue\",\"targetValue\":\"avg\",\"subType\":\"liquidFillGauge\",\"valueMap\":\"range\",\"mapConf\":[{\"color\":\"#07b904\",\"from\":0,\"to\":50},{\"from\":50,\"to\":100,\"color\":\"#e01a1a\"}]},\"id\":12}", + "weight": 4 + }, + { + "configs": "{\"title\":\"进程数-纯演示饼图\",\"type\":\"chart\",\"now\":\"1611231230645\",\"start\":\"1611227630645\",\"end\":\"1611231230645\",\"comparisonOptions\":[{\"label\":\"1小时\",\"labelEn\":\"1 hour\",\"value\":\"3600000\"},{\"label\":\"2小时\",\"labelEn\":\"2 hours\",\"value\":\"7200000\"},{\"label\":\"1天\",\"labelEn\":\"1 day\",\"value\":\"86400000\"},{\"label\":\"2天\",\"labelEn\":\"2 days\",\"value\":\"172800000\"},{\"label\":\"7天\",\"labelEn\":\"7 days\",\"value\":\"604800000\"}],\"legend\":false,\"shared\":true,\"linkVisible\":true,\"metrics\":[{\"selectedNid\":1,\"endpointsKey\":\"endpoints\",\"selectedEndpoint\":[\"=all\"],\"selectedMetric\":\"sys.ps.process.total\",\"selectedTagkv\":[{\"tagk\":\"endpoint\",\"tagv\":[\"=all\"]}],\"counterListCount\":6}],\"chartTypeOptions\":{\"chartType\":\"pie\",\"pieType\":\"donut\",\"targetValue\":\"avg\"},\"id\":14}", + "weight": 5 + } + ] + } ] - } - ] - } + } ] \ No newline at end of file diff --git a/src/common/report/report.go b/src/common/report/report.go index b9d2ed7c..81bbd843 100644 --- a/src/common/report/report.go +++ b/src/common/report/report.go @@ -21,6 +21,7 @@ type ReportSection struct { HTTPPort string `yaml:"http_port"` RPCPort string `yaml:"rpc_port"` Remark string `yaml:"remark"` + Region string `yaml:"region"` } var Config ReportSection @@ -55,6 +56,7 @@ func report(addrs []string) { "rpc_port": Config.RPCPort, "http_port": Config.HTTPPort, "remark": Config.Remark, + "region": Config.Region, } var body reportRes diff --git a/src/models/mon_hbs.go b/src/models/mon_hbs.go index c8b177a0..098f2dad 100644 --- a/src/models/mon_hbs.go +++ b/src/models/mon_hbs.go @@ -20,7 +20,7 @@ func (i *Instance) Add() error { } func (i *Instance) Update() error { - _, err := DB["hbs"].Where("id=?", i.Id).MustCols("ts", "http_port", "rpc_port").Update(i) + _, err := DB["hbs"].Where("id=?", i.Id).MustCols("ts", "http_port", "rpc_port", "region").Update(i) return err } diff --git a/src/modules/agent/cache/cache.go b/src/modules/agent/cache/cache.go index b52e0c79..fa74be6b 100644 --- a/src/modules/agent/cache/cache.go +++ b/src/modules/agent/cache/cache.go @@ -5,15 +5,12 @@ import ( "time" "github.com/didi/nightingale/src/common/dataobj" - process "github.com/shirou/gopsutil/process" ) var MetricHistory *History -var ProcsCache *ProcessCache func Init() { MetricHistory = NewHistory() - ProcsCache = NewProcsCache() } func NewHistory() *History { @@ -64,50 +61,3 @@ func (h *History) clean() { } } } - -type ProcessCache struct { - sync.RWMutex - Data map[int32]*process.Process -} - -func NewProcsCache() *ProcessCache { - pc := ProcessCache{ - Data: make(map[int32]*process.Process), - } - go pc.Clean() - return &pc -} - -func (pc *ProcessCache) Set(pid int32, p *process.Process) { - pc.Lock() - defer pc.Unlock() - pc.Data[pid] = p -} - -func (pc *ProcessCache) Get(pid int32) (*process.Process, bool) { - pc.RLock() - defer pc.RUnlock() - p, exists := pc.Data[pid] - return p, exists -} - -func (pc *ProcessCache) Clean() { - ticker := time.NewTicker(10 * time.Minute) - for { - select { - case <-ticker.C: - pc.clean() - } - } -} - -func (pc *ProcessCache) clean() { - pc.Lock() - defer pc.Unlock() - for pid, procs := range pc.Data { - running, _ := procs.IsRunning() - if !running { - delete(pc.Data, pid) - } - } -} diff --git a/src/modules/agent/sys/procs/scheduler.go b/src/modules/agent/sys/procs/scheduler.go index ec746778..85136834 100644 --- a/src/modules/agent/sys/procs/scheduler.go +++ b/src/modules/agent/sys/procs/scheduler.go @@ -4,14 +4,13 @@ import ( "strings" "time" - process "github.com/shirou/gopsutil/process" - "github.com/toolkits/pkg/logger" - "github.com/didi/nightingale/src/common/dataobj" "github.com/didi/nightingale/src/models" - "github.com/didi/nightingale/src/modules/agent/cache" "github.com/didi/nightingale/src/modules/agent/config" "github.com/didi/nightingale/src/modules/agent/core" + + "github.com/toolkits/pkg/logger" + "github.com/toolkits/pkg/nux" ) type ProcScheduler struct { @@ -45,52 +44,97 @@ func (p *ProcScheduler) Stop() { close(p.Quit) } +var ( + rBytes map[int]uint64 + wBytes map[int]uint64 + procJiffy map[int]uint64 + jiffy uint64 +) + func ProcCollect(p *models.ProcCollect) { - ps, err := process.Processes() + ps, err := AllProcs() if err != nil { logger.Error(err) return } - var memUsedTotal uint64 = 0 - var memUtilTotal = 0.0 - var cpuUtilTotal = 0.0 + + newRBytes := make(map[int]uint64) + newWBytes := make(map[int]uint64) + newProcJiffy := make(map[int]uint64) + newJiffy := readJiffy() + + for _, proc := range ps { + newRBytes[proc.Pid] = proc.RBytes + newWBytes[proc.Pid] = proc.WBytes + if pj, err := readProcJiffy(proc.Pid); err == nil { + newProcJiffy[proc.Pid] = pj + } + } + var items []*dataobj.MetricValue - cnt := 0 - for _, procs := range ps { - if isProc(procs, p.CollectMethod, p.Target) { + var cnt int + var fdNum int + var memory uint64 + var cpu float64 + var ioWrite, ioRead uint64 + var uptime uint64 + + for _, proc := range ps { + if isProc(proc, p.CollectMethod, p.Target) { cnt++ - procCache, exists := cache.ProcsCache.Get(procs.Pid) - if !exists { - cache.ProcsCache.Set(procs.Pid, procs) - procCache = procs + memory += proc.Mem + fdNum += proc.FdCount + rOld := rBytes[proc.Pid] + if rOld != 0 && rOld <= proc.RBytes { + ioRead += proc.RBytes - rOld } - mem, err := procCache.MemoryInfo() - if err != nil { - logger.Error(err) + + wOld := wBytes[proc.Pid] + if wOld != 0 && wOld <= proc.WBytes { + ioWrite += proc.WBytes - wOld + } + + uptime = readUptime(proc.Pid) + + // jiffy 为零,表示第一次采集信息,不做cpu计算 + if jiffy == 0 { continue } - memUsedTotal += mem.RSS - memUtil, err := procCache.MemoryPercent() - if err != nil { - logger.Error(err) - continue - } - memUtilTotal += float64(memUtil) - cpuUtil, err := procCache.Percent(0) - if err != nil { - logger.Error(err) - continue - } - cpuUtilTotal += cpuUtil + + cpu += float64(newProcJiffy[proc.Pid] - procJiffy[proc.Pid]) } } procNumItem := core.GaugeValue("proc.num", cnt, p.Tags) - memUsedItem := core.GaugeValue("proc.mem.used", memUsedTotal, p.Tags) - memUtilItem := core.GaugeValue("proc.mem.util", memUtilTotal, p.Tags) - cpuUtilItem := core.GaugeValue("proc.cpu.util", cpuUtilTotal, p.Tags) - items = []*dataobj.MetricValue{procNumItem, memUsedItem, memUtilItem, cpuUtilItem} + procFdItem := core.GaugeValue("proc.uptime", uptime, p.Tags) + procUptimeItem := core.GaugeValue("proc.fdnum", fdNum, p.Tags) + memUsedItem := core.GaugeValue("proc.mem.used", memory*1024, p.Tags) + ioReadItem := core.GaugeValue("proc.io.read.bytes", ioRead, p.Tags) + ioWriteItem := core.GaugeValue("proc.io.write.bytes", ioWrite, p.Tags) + items = []*dataobj.MetricValue{procNumItem, memUsedItem, procFdItem, procUptimeItem, ioReadItem, ioWriteItem} + + if jiffy != 0 { + cpuUtil := cpu / float64(newJiffy-jiffy) * 100 + if cpuUtil > 100 { + cpuUtil = 100 + } + + cpuUtilItem := core.GaugeValue("proc.cpu.util", cpuUtil, p.Tags) + items = append(items, cpuUtilItem) + } + + sysMem, err := nux.MemInfo() + if err != nil { + logger.Error(err) + } + + if sysMem != nil && sysMem.MemTotal != 0 { + memUsedUtil := float64(memory*1024) / float64(sysMem.MemTotal) * 100 + memUtilItem := core.GaugeValue("proc.mem.util", memUsedUtil, p.Tags) + items = append(items, memUtilItem) + } + now := time.Now().Unix() for _, item := range items { item.Step = int64(p.Step) @@ -99,18 +143,16 @@ func ProcCollect(p *models.ProcCollect) { } core.Push(items) + + rBytes = newRBytes + wBytes = newWBytes + procJiffy = newProcJiffy + jiffy = readJiffy() } -func isProc(p *process.Process, method, target string) bool { - name, err := p.Name() - if err != nil { - return false - } - cmdlines, err := p.Cmdline() - if err != nil { - return false - } - if method == "name" && target == name { +func isProc(p *Proc, method, target string) bool { + cmdlines := p.Cmdline + if method == "name" && target == p.Name { return true } else if (method == "cmdline" || method == "cmd") && strings.Contains(cmdlines, target) { return true diff --git a/src/modules/agent/sys/procs/sys.go b/src/modules/agent/sys/procs/sys.go new file mode 100644 index 00000000..a5349efe --- /dev/null +++ b/src/modules/agent/sys/procs/sys.go @@ -0,0 +1,223 @@ +package procs + +import ( + "bufio" + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "strconv" + "strings" + "time" + + "github.com/toolkits/pkg/file" + "github.com/toolkits/pkg/logger" +) + +type Proc struct { + Pid int + Name string + Exe string + Cmdline string + Mem uint64 + Cpu float64 + jiffy uint64 + + RBytes uint64 + WBytes uint64 + Uptime uint64 + FdCount int +} + +func (this *Proc) String() string { + return fmt.Sprintf("", + this.Pid, this.Name, this.Uptime, this.Exe, this.Mem, this.Cpu) +} + +func AllProcs() (ps []*Proc, err error) { + var dirs []string + dirs, err = file.DirsUnder("/proc") + if err != nil { + return + } + + size := len(dirs) + if size == 0 { + return + } + + for i := 0; i < size; i++ { + pid, e := strconv.Atoi(dirs[i]) + if e != nil { + continue + } + statusFile := fmt.Sprintf("/proc/%d/status", pid) + cmdlineFile := fmt.Sprintf("/proc/%d/cmdline", pid) + + if !file.IsExist(statusFile) || !file.IsExist(cmdlineFile) { + continue + } + + name, memory, e := ReadNameAndMem(statusFile) + if e != nil { + logger.Error("read pid status file err:", e) + continue + } + + cmdlineBytes, e := file.ToBytes(cmdlineFile) + if e != nil { + continue + } + + cmdlineBytesLen := len(cmdlineBytes) + if cmdlineBytesLen == 0 { + continue + } + + noNut := make([]byte, 0, cmdlineBytesLen) + for j := 0; j < cmdlineBytesLen; j++ { + if cmdlineBytes[j] != 0 { + noNut = append(noNut, cmdlineBytes[j]) + } + } + + p := Proc{Pid: pid, Name: name, Cmdline: string(noNut), Mem: memory} + ps = append(ps, &p) + } + + for _, p := range ps { + p.RBytes, p.WBytes = readIO(p.Pid) + p.FdCount = readProcFd(p.Pid) + p.Uptime = readUptime(p.Pid) + } + + return +} + +func ReadNameAndMem(path string) (name string, memory uint64, err error) { + var content []byte + content, err = ioutil.ReadFile(path) + if err != nil { + return + } + + reader := bufio.NewReader(bytes.NewBuffer(content)) + + for { + var bs []byte + bs, err = file.ReadLine(reader) + if err == io.EOF { + return + } + + line := string(bs) + + colonIndex := strings.Index(line, ":") + if colonIndex == -1 { + logger.Warning("line is illegal", path) + continue + } + + if strings.TrimSpace(line[0:colonIndex]) == "Name" { + name = strings.TrimSpace(line[colonIndex+1:]) + } else if strings.TrimSpace(line[0:colonIndex]) == "VmRSS" { + kbIndex := strings.Index(line, "kB") + memory, _ = strconv.ParseUint(strings.TrimSpace(line[colonIndex+1:kbIndex]), 10, 64) + break + } + + } + return +} + +func readJiffy() uint64 { + f, err := os.Open("/proc/stat") + if err != nil { + return 0 + } + defer f.Close() + scanner := bufio.NewScanner(f) + scanner.Scan() + s := scanner.Text() + if !strings.HasPrefix(s, "cpu ") { + return 0 + } + ss := strings.Split(s, " ") + var ret uint64 + for _, x := range ss { + if x == "" || x == "cpu" { + continue + } + if v, e := strconv.ParseUint(x, 10, 64); e == nil { + ret += v + } + } + return ret +} + +func readProcFd(pid int) int { + var fds []string + fds, err := file.FilesUnder(fmt.Sprintf("/proc/%d/fd", pid)) + if err != nil { + return 0 + } + return len(fds) +} + +func readProcJiffy(pid int) (uint64, error) { + f, err := os.Open(fmt.Sprintf("/proc/%d/stat", pid)) + if err != nil { + return 0, err + } + defer f.Close() + scanner := bufio.NewScanner(f) + scanner.Scan() + s := scanner.Text() + ss := strings.Split(s, " ") + if len(ss) < 15 { + return 0, fmt.Errorf("/porc/%s/stat illegal:%v", pid, ss) + } + var ret uint64 + for i := 13; i < 15; i++ { + v, e := strconv.ParseUint(ss[i], 10, 64) + if e != nil { + return 0, err + } + ret += v + } + return ret, nil +} + +func readIO(pid int) (r uint64, w uint64) { + f, err := os.Open(fmt.Sprintf("/proc/%d/io", pid)) + if err != nil { + return + } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + s := scanner.Text() + if strings.HasPrefix(s, "read_bytes") || strings.HasPrefix(s, "write_bytes") { + v := strings.Split(s, " ") + if len(v) == 2 { + value, _ := strconv.ParseUint(v[1], 10, 64) + if s[0] == 'r' { + r = value + } else { + w = value + } + } + } + } + return +} + +func readUptime(pid int) uint64 { + fileInfo, err := os.Stat(fmt.Sprintf("/proc/%d", pid)) + if err != nil { + return 0 + } + duration := time.Now().Sub(fileInfo.ModTime()) + return uint64(duration.Seconds()) +} diff --git a/src/modules/monapi/collector/basecollector.go b/src/modules/monapi/collector/basecollector.go index 37018638..cbafe398 100644 --- a/src/modules/monapi/collector/basecollector.go +++ b/src/modules/monapi/collector/basecollector.go @@ -12,10 +12,10 @@ import ( type BaseCollector struct { name string category Category - newRule func() interface{} + newRule func() TelegrafPlugin } -func NewBaseCollector(name string, category Category, newRule func() interface{}) *BaseCollector { +func NewBaseCollector(name string, category Category, newRule func() TelegrafPlugin) *BaseCollector { return &BaseCollector{ name: name, category: category, @@ -23,7 +23,7 @@ func NewBaseCollector(name string, category Category, newRule func() interface{} } } -type telegrafPlugin interface { +type TelegrafPlugin interface { TelegrafInput() (telegraf.Input, error) } @@ -37,12 +37,7 @@ func (p BaseCollector) TelegrafInput(rule *models.CollectRule) (telegraf.Input, return nil, err } - plugin, ok := r2.(telegrafPlugin) - if !ok { - return nil, errUnsupported - } - - return plugin.TelegrafInput() + return r2.TelegrafInput() } func (p BaseCollector) Get(id int64) (interface{}, error) { diff --git a/src/modules/monapi/collector/collector.go b/src/modules/monapi/collector/collector.go index d1124cf5..376dd556 100644 --- a/src/modules/monapi/collector/collector.go +++ b/src/modules/monapi/collector/collector.go @@ -7,7 +7,6 @@ import ( "github.com/didi/nightingale/src/models" "github.com/didi/nightingale/src/toolkits/i18n" "github.com/influxdata/telegraf" - "github.com/toolkits/pkg/logger" ) var ( @@ -24,16 +23,27 @@ const ( LocalCategory Category = "local" // used for agent ) +// Collector is an abstract, pluggable interface for monapi & prober. type Collector interface { + // Name return the collector name Name() string + // Category return the collector category, remote | local Category() Category + // Get return a collectRule by collectRule.Id Get(id int64) (interface{}, error) + // Gets return collectRule list by node ids Gets(nids []int64) ([]interface{}, error) + // GetByNameAndNid return collectRule by collectRule.Name & collectRule.Nid GetByNameAndNid(name string, nid int64) (interface{}, error) + // Create a collectRule by []byte format, witch could be able to unmarshal with a collectRule struct Create(data []byte, username string) error + // Update a collectRule by []byte format, witch could be able to unmarshal with a collectRule struct Update(data []byte, username string) error + // Delete a collectRule by collectRule.Id with operator's name Delete(id int64, username string) error + // Template return a template used for UI render Template() (interface{}, error) + // TelegrafInput return a telegraf.Input interface, this is called by prober.manager every collectRule.Step TelegrafInput(*models.CollectRule) (telegraf.Input, error) } @@ -72,6 +82,5 @@ func GetLocalCollectors() []string { } func _s(format string, a ...interface{}) string { - logger.Debugf(` "%s": "%s",`, format, format) return i18n.Sprintf(format, a...) } diff --git a/src/modules/monapi/collector/template.go b/src/modules/monapi/collector/template.go index f8e8c314..d827338c 100644 --- a/src/modules/monapi/collector/template.go +++ b/src/modules/monapi/collector/template.go @@ -4,22 +4,21 @@ import ( "encoding/json" "fmt" "reflect" - "strconv" "strings" "sync" "unicode" + + "github.com/toolkits/pkg/logger" ) var fieldCache sync.Map // map[reflect.Type]structFields type Field struct { - skip bool `json:"-"` - def string `json:"-"` - // definitions map[string][]Field `json:"-"` - + skip bool `json:"-"` Name string `json:"name,omitempty"` Label string `json:"label,omitempty"` Default interface{} `json:"default,omitempty"` + Enum []interface{} `json:"enum,omitempty"` Example string `json:"example,omitempty"` Description string `json:"description,omitempty"` Required bool `json:"required,omitempty"` @@ -45,12 +44,7 @@ func cachedTypeContent(t reflect.Type) Field { func typeContent(t reflect.Type) Field { definitions := map[string][]Field{t.String(): nil} - - ret := Field{ - // definitions: map[string][]Field{ - // t.String(): nil, - // }, - } + ret := Field{} for i := 0; i < t.NumField(); i++ { sf := t.Field(i) @@ -139,9 +133,20 @@ func getTagOpt(sf reflect.StructField) (opt Field) { opt.Name = name opt.Label = _s(sf.Tag.Get("label")) - opt.def = sf.Tag.Get("default") opt.Example = sf.Tag.Get("example") opt.Description = _s(sf.Tag.Get("description")) + if s := sf.Tag.Get("enum"); s != "" { + if err := json.Unmarshal([]byte(s), &opt.Enum); err != nil { + logger.Warningf("%s.enum %s Unmarshal err %s", + sf.Name, s, err) + } + } + if s := sf.Tag.Get("default"); s != "" { + if err := json.Unmarshal([]byte(s), &opt.Default); err != nil { + logger.Warningf("%s.default %s Unmarshal err %s", + sf.Name, s, err) + } + } return } @@ -191,29 +196,15 @@ func fieldType(t reflect.Type, in *Field, definitions map[string][]Field) { t = t.Elem() } - var def interface{} - switch t.Kind() { case reflect.Int, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint32, reflect.Uint64: in.Type = "integer" - if in.def != "" { - def, _ = strconv.ParseInt(in.def, 10, 64) - } case reflect.Float32, reflect.Float64: in.Type = "float" - if in.def != "" { - def, _ = strconv.ParseFloat(in.def, 64) - } case reflect.Bool: in.Type = "boolean" - if in.def != "" { - def = in.def == "true" - } case reflect.String: in.Type = "string" - if in.def != "" { - def = in.def - } case reflect.Struct: name := t.String() if _, ok := definitions[name]; !ok { @@ -238,17 +229,8 @@ func fieldType(t reflect.Type, in *Field, definitions map[string][]Field) { } else { panic(fmt.Sprintf("unspport type %s items %s", t.String(), t2.String())) } - if t2.Kind() == reflect.String && in.def != "" { - var s []string - json.Unmarshal([]byte(in.def), &s) - def = s - } default: panic(fmt.Sprintf("unspport type %s", t.String())) // in.Type = "string" } - - if def != nil { - in.Default = def - } } diff --git a/src/modules/monapi/plugins/all/all.go b/src/modules/monapi/plugins/all/all.go index 54e635ab..0bbea17d 100644 --- a/src/modules/monapi/plugins/all/all.go +++ b/src/modules/monapi/plugins/all/all.go @@ -4,10 +4,10 @@ import ( // remote // _ "github.com/didi/nightingale/src/modules/monapi/plugins/api" // telegraf style + _ "github.com/didi/nightingale/src/modules/monapi/plugins/github" _ "github.com/didi/nightingale/src/modules/monapi/plugins/mongodb" _ "github.com/didi/nightingale/src/modules/monapi/plugins/mysql" _ "github.com/didi/nightingale/src/modules/monapi/plugins/redis" - // _ "github.com/didi/nightingale/src/modules/monapi/plugins/github" // local _ "github.com/didi/nightingale/src/modules/monapi/plugins/log" diff --git a/src/modules/monapi/plugins/github/github.go b/src/modules/monapi/plugins/github/github.go index b60dcf17..85e73198 100644 --- a/src/modules/monapi/plugins/github/github.go +++ b/src/modules/monapi/plugins/github/github.go @@ -5,13 +5,31 @@ import ( "time" "github.com/didi/nightingale/src/modules/monapi/collector" + "github.com/didi/nightingale/src/modules/monapi/plugins/github/github" + "github.com/didi/nightingale/src/toolkits/i18n" "github.com/influxdata/telegraf" ) func init() { collector.CollectorRegister(NewGitHubCollector()) // for monapi + i18n.DictRegister(langDict) } +var ( + langDict = map[string]map[string]string{ + "zh": map[string]string{ + "Repositories": "代码仓库", + "List of repositories to monitor": "要监视的代码仓库存列表", + "Access token": "访问令牌", + "Github API access token. Unauthenticated requests are limited to 60 per hour": "Github 接口的访问令牌. 匿名状态下,每小时请求限制为60", + "Enterprise base url": "Github 企业版地址", + "Github API enterprise url. Github Enterprise accounts must specify their base url": "如果使用Github企业版,请配置企业版API地址", + "HTTP timeout": "请求超时时间", + "Timeout for HTTP requests": "http请求超时时间, 单位: 秒", + }, + } +) + type GitHubCollector struct { *collector.BaseCollector } @@ -20,15 +38,15 @@ func NewGitHubCollector() *GitHubCollector { return &GitHubCollector{BaseCollector: collector.NewBaseCollector( "github", collector.RemoteCategory, - func() interface{} { return &GitHubRule{} }, + func() collector.TelegrafPlugin { return &GitHubRule{} }, )} } type GitHubRule struct { - Repositories []string `label:"Repositories" json:"repositories" description:"List of repositories to monitor"` - AccessToken string `label:"Access token" json:"access_token" description:"Github API access token. Unauthenticated requests are limited to 60 per hour"` + Repositories []string `label:"Repositories" json:"repositories,required" example:"didi/nightingale" description:"List of repositories to monitor"` + AccessToken string `label:"Access token" json:"access_token" description:"Github API access token. Unauthenticated requests are limited to 60 per hour"` EnterpriseBaseURL string `label:"Enterprise base url" json:"enterprise_base_url" description:"Github API enterprise url. Github Enterprise accounts must specify their base url"` - HTTPTimeout int `label:"HTTP timeout" json:"http_timeout" description:"Timeout for HTTP requests"` + HTTPTimeout int `label:"HTTP timeout" json:"http_timeout" default:"5" description:"Timeout for HTTP requests"` } func (p *GitHubRule) Validate() error { @@ -46,7 +64,7 @@ func (p *GitHubRule) TelegrafInput() (telegraf.Input, error) { return nil, err } - return &GitHub{ + return &github.GitHub{ Repositories: p.Repositories, AccessToken: p.AccessToken, EnterpriseBaseURL: p.EnterpriseBaseURL, diff --git a/src/modules/monapi/plugins/github/README.md b/src/modules/monapi/plugins/github/github/README.md similarity index 100% rename from src/modules/monapi/plugins/github/README.md rename to src/modules/monapi/plugins/github/github/README.md diff --git a/src/modules/monapi/plugins/github/github.telegraf.go b/src/modules/monapi/plugins/github/github/github.go similarity index 98% rename from src/modules/monapi/plugins/github/github.telegraf.go rename to src/modules/monapi/plugins/github/github/github.go index 856e685b..b1788768 100644 --- a/src/modules/monapi/plugins/github/github.telegraf.go +++ b/src/modules/monapi/plugins/github/github/github.go @@ -193,7 +193,7 @@ func getFields(repositoryInfo *github.Repository) map[string]interface{} { func init() { inputs.Add("github", func() telegraf.Input { return &GitHub{ - HTTPTimeout: time.Second * 5, + HTTPTimeout: internal.Duration{Duration: time.Second * 5}, } }) } diff --git a/src/modules/monapi/plugins/github/github_test.go b/src/modules/monapi/plugins/github/github/github_test.go similarity index 100% rename from src/modules/monapi/plugins/github/github_test.go rename to src/modules/monapi/plugins/github/github/github_test.go diff --git a/src/modules/monapi/plugins/mongodb/mongodb.go b/src/modules/monapi/plugins/mongodb/mongodb.go index b23fa93d..0a931f1a 100644 --- a/src/modules/monapi/plugins/mongodb/mongodb.go +++ b/src/modules/monapi/plugins/mongodb/mongodb.go @@ -20,14 +20,14 @@ var ( "zh": map[string]string{ "Servers": "服务", "An array of URLs of the form": "服务地址", - "Cluster status": "采集集群", - "When true, collect cluster status.": "采集集群统计信息", - "Per DB stats": "采集单个数据库(db)统计信息", - "When true, collect per database stats": "采集一个数据库的统计信息", - "Col stats": "采集集合(Collection)统计信息", - "When true, collect per collection stats": "采集一个集合的统计信息", - "Col stats dbs": "采集集合的列表", - "List of db where collections stats are collected, If empty, all db are concerned": "如果设置为空,则采集数据库里所有集合的统计信息", + "Cluster status": "集群状态", + "When true, collect cluster status.": "开启时,采集集群状态", + "Per DB stats": "数据库信息", + "When true, collect per database stats": "开启时,采集数据库的统计信息", + "Col stats": "集合信息", + "When true, collect per collection stats": "开启时,采集集合的统计信息", + "Col stats dbs": "集合列表信息", + "List of db where collections stats are collected, If empty, all db are concerned": "如果未设置,则采集数据库里所有集合的统计信息, 开启`集合信息`时有效", }, } ) @@ -40,7 +40,7 @@ func NewMongodbCollector() *MongodbCollector { return &MongodbCollector{BaseCollector: collector.NewBaseCollector( "mongodb", collector.RemoteCategory, - func() interface{} { return &MongodbRule{} }, + func() collector.TelegrafPlugin { return &MongodbRule{} }, )} } diff --git a/src/modules/monapi/plugins/mysql/mysql.go b/src/modules/monapi/plugins/mysql/mysql.go index bdcc6231..6110ae09 100644 --- a/src/modules/monapi/plugins/mysql/mysql.go +++ b/src/modules/monapi/plugins/mysql/mysql.go @@ -23,7 +23,7 @@ func NewMysqlCollector() *MysqlCollector { return &MysqlCollector{BaseCollector: collector.NewBaseCollector( "mysql", collector.RemoteCategory, - func() interface{} { return &MysqlRule{} }, + func() collector.TelegrafPlugin { return &MysqlRule{} }, )} } @@ -34,17 +34,17 @@ var ( "Databases": "数据库", "if the list is empty, then metrics are gathered from all database tables": "如果列表为空,则收集所有数据库表", "Process List": "进程列表", - "gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST": "从 INFORMATION_SCHEMA.PROCESSLIST 收集线程状态信息", - "User Statistics": "User Statistics", - "gather user statistics from INFORMATION_SCHEMA.USER_STATISTICS": "从 INFORMATION_SCHEMA.USER_STATISTICS 收集用户状态信息", - "Auto Increment": "Auto Increment", - "gather auto_increment columns and max values from information schema": "采集 auto_increment 和 max values 信息", - "Innodb Metrics": "Innodb Metrics", - "gather metrics from INFORMATION_SCHEMA.INNODB_METRICS": "采集 INFORMATION_SCHEMA.INNODB_METRICS 信息", - "Slave Status": "Slave Status", - "gather metrics from SHOW SLAVE STATUS command output": "采集 SHOW SLAVE STATUS command output", + "gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST": "采集 INFORMATION_SCHEMA.PROCESSLIST", + "User Statistics": "用户统计", + "gather user statistics from INFORMATION_SCHEMA.USER_STATISTICS": "采集 INFORMATION_SCHEMA.USER_STATISTICS", + "Auto Increment": "自动递增变量", + "gather auto_increment columns and max values from information schema": "采集 auto_increment 和 max values", + "Innodb Metrics": "Innodb统计", + "gather metrics from INFORMATION_SCHEMA.INNODB_METRICS": "采集 INFORMATION_SCHEMA.INNODB_METRICS", + "Slave Status": "Slave状态", + "gather metrics from SHOW SLAVE STATUS command output": "采集 SHOW SLAVE STATUS", "Binary Logs": "Binary Logs", - "gather metrics from SHOW BINARY LOGS command output": "采集 SHOW BINARY LOGS command output", + "gather metrics from SHOW BINARY LOGS command output": "采集 SHOW BINARY LOGS", "Table IO Waits": "Table IO Waits", "gather metrics from PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMARY_BY_TABLE": "采集 PERFORMANCE_SCHEMA.TABLE_IO_WAITS_SUMMARY_BY_TABLE", "Table Lock Waits": "Table Lock Waits", @@ -54,21 +54,31 @@ var ( "Event Waits": "Event Waits", "gather metrics from PERFORMANCE_SCHEMA.EVENT_WAITS": "采集 PERFORMANCE_SCHEMA.EVENT_WAITS", "Tables": "Tables", - "gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list": "采集 INFORMATION_SCHEMA.TABLES for databases provided above list", + "gather metrics from INFORMATION_SCHEMA.TABLES for databases provided above list": "采集 INFORMATION_SCHEMA.TABLES", "File Events Stats": "File Events Stats", - "gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME": "采集 PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME", - "Perf Events Statements": "Perf Events Statements", + "gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME": "采集 PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME", + "Perf Events Statements Digest Text Limit": "标准语句的最大长度", + "Perf Events Statements Limit": "根据响应时间限制语句的事件数量", + "Perf Events Statements Timelimit": "限制最后出现的事件", + "Perf Events Statements": "采集 PERFORMANCE_SCHEMA", "gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST": "采集 PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST", - "Interval Slow": "Interval Slow", + "specify servers via a url matching
[username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify|custom]]
see https://github.com/go-sql-driver/mysql#dsn-data-source-name": "通过URL设置指定服务器
[username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify|custom]]
参考 https://github.com/go-sql-driver/mysql#dsn-data-source-name", + "Interval Slow": "周期限制", + "Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES)": "限制一些查询的最小间隔(比如 SHOW GLOBAL VARIABLES)", + "Global Vars": "全局变量", + "gather metrics from PERFORMANCE_SCHEMA.GLOBAL_VARIABLES": "采集 PERFORMANCE_SCHEMA.GLOBAL_VARIABLES", + "digest_text_limit for metrics form perf_events_statements": "查询performance_schema时, DIGEST_TEXT限制", + "limit for metrics form perf_events_statements": "查询performance_schema时, 数量限制", + "time_limit for metrics form perf_events_statements": "查询performance_schema时, last_seen 时间限制", }, } ) type MysqlRule struct { - Servers []string `label:"Servers" json:"servers,required" description:"specify servers via a url matching\n[username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify|custom]]\nsee https://github.com/go-sql-driver/mysql#dsn-data-source-name" example:"user:passwd@tcp(127.0.0.1:3306)/?tls=false"` - PerfEventsStatementsDigestTextLimit int64 `label:"Perf Events Statements Digest Text Limit" json:"perf_events_statements_digest_text_limit" default:"120" description:"the limits for metrics form perf_events_statements"` - PerfEventsStatementsLimit int64 `label:"Perf Events Statements Limit" json:"perf_events_statements_limit" default:"250" description:"the limits for metrics form perf_events_statements"` - PerfEventsStatementsTimeLimit int64 `label:"Perf Events Statements Timelimit" json:"perf_events_statements_time_limit" default:"86400" description:"the limits for metrics form perf_events_statements"` + Servers []string `label:"Servers" json:"servers,required" description:"specify servers via a url matching
[username[:password]@][protocol[(address)]]/[?tls=[true|false|skip-verify|custom]]
see https://github.com/go-sql-driver/mysql#dsn-data-source-name" example:"user:passwd@tcp(127.0.0.1:3306)/?tls=false"` + PerfEventsStatementsDigestTextLimit int64 `label:"Perf Events Statements Digest Text Limit" json:"perf_events_statements_digest_text_limit" default:"120" description:"digest_text_limit for metrics form perf_events_statements"` + PerfEventsStatementsLimit int64 `label:"Perf Events Statements Limit" json:"perf_events_statements_limit" default:"250" description:"limit for metrics form perf_events_statements"` + PerfEventsStatementsTimeLimit int64 `label:"Perf Events Statements Timelimit" json:"perf_events_statements_time_limit" default:"86400" description:"time_limit for metrics form perf_events_statements"` TableSchemaDatabases []string `label:"Databases" json:"table_schema_databases" description:"if the list is empty, then metrics are gathered from all database tables"` GatherProcessList bool `label:"Process List" json:"gather_process_list" description:"gather thread state counts from INFORMATION_SCHEMA.PROCESSLIST"` GatherUserStatistics bool `label:"User Statistics" json:"gather_user_statistics" description:"gather user statistics from INFORMATION_SCHEMA.USER_STATISTICS"` @@ -84,7 +94,7 @@ type MysqlRule struct { GatherFileEventsStats bool `label:"File Events Stats" json:"gather_file_events_stats" description:"gather metrics from PERFORMANCE_SCHEMA.FILE_SUMMARY_BY_EVENT_NAME"` GatherPerfEventsStatements bool `label:"Perf Events Statements" json:"gather_perf_events_statements" description:"gather metrics from PERFORMANCE_SCHEMA.EVENTS_STATEMENTS_SUMMARY_BY_DIGEST"` GatherGlobalVars bool `label:"Global Vars" json:"gather_global_variables" description:"gather metrics from PERFORMANCE_SCHEMA.GLOBAL_VARIABLES" default:"true"` - IntervalSlow string `label:"Interval Slow" json:"interval_slow" desc:"Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES)" example:"30m"` + IntervalSlow string `label:"Interval Slow" json:"interval_slow" description:"Some queries we may want to run less often (such as SHOW GLOBAL VARIABLES)" example:"30m"` MetricVersion int `label:"-" json:"-"` } @@ -129,7 +139,7 @@ func (p *MysqlRule) TelegrafInput() (telegraf.Input, error) { GatherFileEventsStats: p.GatherFileEventsStats, GatherPerfEventsStatements: p.GatherPerfEventsStatements, GatherGlobalVars: p.GatherGlobalVars, - IntervalSlow: "", + IntervalSlow: p.IntervalSlow, MetricVersion: 2, Log: plugins.GetLogger(), }, nil diff --git a/src/modules/monapi/plugins/redis/redis.go b/src/modules/monapi/plugins/redis/redis.go index b0982911..b9fc6fea 100644 --- a/src/modules/monapi/plugins/redis/redis.go +++ b/src/modules/monapi/plugins/redis/redis.go @@ -2,6 +2,7 @@ package redis import ( "fmt" + "strings" "github.com/didi/nightingale/src/modules/monapi/collector" "github.com/didi/nightingale/src/modules/monapi/plugins" @@ -23,7 +24,7 @@ func NewRedisCollector() *RedisCollector { return &RedisCollector{BaseCollector: collector.NewBaseCollector( "redis", collector.RemoteCategory, - func() interface{} { return &RedisRule{} }, + func() collector.TelegrafPlugin { return &RedisRule{} }, )} } @@ -34,17 +35,20 @@ var ( "Type": "类型", "Servers": "服务", "specify servers": "指定服务器地址", + "metric type": "数据类型", "Optional. Specify redis commands to retrieve values": "设置服务器命令,采集数据名称", "Password": "密码", "specify server password": "服务密码", + "redis-cli command": "redis-cli命令,如果参数中带有空格,请以数组方式设置参数", + "metric name": "变量名称,采集时会加上前缀 redis_commands_", }, } ) type RedisCommand struct { - Command []string `label:"Command" json:"command,required" description:"" ` - Field string `label:"Field" json:"field,required" description:"metric name"` - Type string `label:"Type" json:"type" description:"integer|string|float(default)"` + Command []string `label:"Command" json:"command,required" example:"get sample_key" description:"redis-cli command"` + Field string `label:"Field" json:"field,required" example:"sample_key" description:"metric name"` + Type string `label:"Type" json:"type" enum:"[\"float\", \"integer\"]" default:"float" description:"metric type"` } type RedisRule struct { @@ -61,6 +65,20 @@ func (p *RedisRule) Validate() error { if len(cmd.Command) == 0 { return fmt.Errorf("redis.rule.commands[%d].command must be set", i) } + + var command []string + for i, cmd := range cmd.Command { + if i == 0 { + for _, v := range strings.Fields(cmd) { + command = append(command, v) + } + continue + } + + command = append(command, cmd) + } + cmd.Command = command + if cmd.Field == "" { return fmt.Errorf("redis.rule.commands[%d].field must be set", i) } diff --git a/src/modules/prober/config/plugin.go b/src/modules/prober/config/plugin.go index 3aa2f79a..6ccc9768 100644 --- a/src/modules/prober/config/plugin.go +++ b/src/modules/prober/config/plugin.go @@ -20,7 +20,7 @@ var ( const ( PluginModeWhitelist = iota - PluginModeOverlay + PluginModeAll ) type Metric struct { @@ -48,8 +48,8 @@ func (p *pluginConfig) Validate() error { switch strings.ToLower(p.Mode) { case "whitelist": p.mode = PluginModeWhitelist - case "overlay": - p.mode = PluginModeOverlay + case "all": + p.mode = PluginModeAll default: p.mode = PluginModeWhitelist } @@ -63,8 +63,12 @@ func InitPluginsConfig(cf *ConfYaml) { config := newPluginConfig() pluginConfigs[plugin] = config - file := filepath.Join(cf.PluginsConfig, plugin+".yml") + file := filepath.Join(cf.PluginsConfig, plugin+".local.yml") b, err := ioutil.ReadFile(file) + if err != nil { + file = filepath.Join(cf.PluginsConfig, plugin+".yml") + b, err = ioutil.ReadFile(file) + } if err != nil { logger.Debugf("readfile %s err %s", plugin, err) continue