Merge pull request #447 from guardicore/441/bugfix/slow-report-generation-with-plenty-of-machines

[WIP] 441/bugfix/slow report generation with plenty of machines
This commit is contained in:
Shay Nehmad 2019-10-03 18:50:11 +03:00 committed by GitHub
commit 3b6714ef5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 189 additions and 44 deletions

View File

@ -3,10 +3,14 @@ Define a Document Schema for the Monkey document.
"""
from mongoengine import Document, StringField, ListField, BooleanField, EmbeddedDocumentField, ReferenceField, \
DateTimeField, DynamicField, DoesNotExist
import ring
from monkey_island.cc.models.monkey_ttl import MonkeyTtl, create_monkey_ttl_document
from monkey_island.cc.consts import DEFAULT_MONKEY_TTL_EXPIRY_DURATION_IN_SECONDS
from monkey_island.cc.models.command_control_channel import CommandControlChannel
from monkey_island.cc.utils import local_ip_addresses
MAX_MONKEYS_AMOUNT_TO_CACHE = 100
class Monkey(Document):
@ -84,6 +88,35 @@ class Monkey(Document):
os = "windows"
return os
@staticmethod
@ring.lru()
def get_label_by_id(object_id):
current_monkey = Monkey.get_single_monkey_by_id(object_id)
label = Monkey.get_hostname_by_id(object_id) + " : " + current_monkey.ip_addresses[0]
if len(set(current_monkey.ip_addresses).intersection(local_ip_addresses())) > 0:
label = "MonkeyIsland - " + label
return label
@staticmethod
@ring.lru()
def get_hostname_by_id(object_id):
"""
:param object_id: the object ID of a Monkey in the database.
:return: The hostname of that machine.
:note: Use this and not monkey.hostname for performance - this is lru-cached.
"""
return Monkey.get_single_monkey_by_id(object_id).hostname
def set_hostname(self, hostname):
"""
Sets a new hostname for a machine and clears the cache for getting it.
:param hostname: The new hostname for the machine.
"""
self.hostname = hostname
self.save()
Monkey.get_hostname_by_id.delete(self.id)
Monkey.get_label_by_id.delete(self.id)
def get_network_info(self):
"""
Formats network info from monkey's model
@ -91,6 +124,17 @@ class Monkey(Document):
"""
return {'ips': self.ip_addresses, 'hostname': self.hostname}
@staticmethod
@ring.lru(
expire=1 # data has TTL of 1 second. This is useful for rapid calls for report generation.
)
def is_monkey(object_id):
try:
_ = Monkey.get_single_monkey_by_id(object_id)
return True
except:
return False
@staticmethod
def get_tunneled_monkeys():
return Monkey.objects(tunnel__exists=True)

View File

@ -112,3 +112,62 @@ class TestMonkey(IslandTestCase):
and linux_monkey not in tunneled_monkeys
and len(tunneled_monkeys) == 2)
self.assertTrue(test, "Tunneling test")
def test_get_label_by_id(self):
self.fail_if_not_testing_env()
self.clean_monkey_db()
hostname_example = "a_hostname"
ip_example = "1.1.1.1"
linux_monkey = Monkey(guid=str(uuid.uuid4()),
description="Linux shay-Virtual-Machine",
hostname=hostname_example,
ip_addresses=[ip_example])
linux_monkey.save()
cache_info_before_query = Monkey.get_label_by_id.storage.backend.cache_info()
self.assertEquals(cache_info_before_query.hits, 0)
# not cached
label = Monkey.get_label_by_id(linux_monkey.id)
self.assertIsNotNone(label)
self.assertIn(hostname_example, label)
self.assertIn(ip_example, label)
# should be cached
_ = Monkey.get_label_by_id(linux_monkey.id)
cache_info_after_query = Monkey.get_label_by_id.storage.backend.cache_info()
self.assertEquals(cache_info_after_query.hits, 1)
linux_monkey.set_hostname("Another hostname")
# should be a miss
label = Monkey.get_label_by_id(linux_monkey.id)
cache_info_after_second_query = Monkey.get_label_by_id.storage.backend.cache_info()
# still 1 hit only
self.assertEquals(cache_info_after_second_query.hits, 1)
self.assertEquals(cache_info_after_second_query.misses, 2)
def test_is_monkey(self):
self.fail_if_not_testing_env()
self.clean_monkey_db()
a_monkey = Monkey(guid=str(uuid.uuid4()))
a_monkey.save()
cache_info_before_query = Monkey.is_monkey.storage.backend.cache_info()
self.assertEquals(cache_info_before_query.hits, 0)
# not cached
self.assertTrue(Monkey.is_monkey(a_monkey.id))
fake_id = "123456789012"
self.assertFalse(Monkey.is_monkey(fake_id))
# should be cached
self.assertTrue(Monkey.is_monkey(a_monkey.id))
self.assertFalse(Monkey.is_monkey(fake_id))
cache_info_after_query = Monkey.is_monkey.storage.backend.cache_info()
self.assertEquals(cache_info_after_query.hits, 2)

View File

@ -2,6 +2,7 @@ from bson import ObjectId
from monkey_island.cc.database import mongo
import monkey_island.cc.services.node
from monkey_island.cc.models import Monkey
__author__ = "itay.mizeretz"
@ -141,15 +142,18 @@ class EdgeService:
@staticmethod
def get_edge_label(edge):
NodeService = monkey_island.cc.services.node.NodeService
from_label = NodeService.get_monkey_label(NodeService.get_monkey_by_id(edge["from"]))
if edge["to"] == ObjectId("000000000000000000000000"):
from_id = edge["from"]
to_id = edge["to"]
from_label = Monkey.get_label_by_id(from_id)
if to_id == ObjectId("000000000000000000000000"):
to_label = 'MonkeyIsland'
else:
to_id = NodeService.get_monkey_by_id(edge["to"])
if to_id is None:
to_label = NodeService.get_node_label(NodeService.get_node_by_id(edge["to"]))
if Monkey.is_monkey(to_id):
to_label = Monkey.get_label_by_id(to_id)
else:
to_label = NodeService.get_monkey_label(to_id)
to_label = NodeService.get_node_label(NodeService.get_node_by_id(to_id))
RIGHT_ARROW = u"\u2192"
return "%s %s %s" % (from_label, RIGHT_ARROW, to_label)

View File

@ -22,10 +22,6 @@ class NodeService:
if ObjectId(node_id) == NodeService.get_monkey_island_pseudo_id():
return NodeService.get_monkey_island_node()
edges = EdgeService.get_displayed_edges_by_to(node_id, for_report)
accessible_from_nodes = []
exploits = []
new_node = {"id": node_id}
node = NodeService.get_node_by_id(node_id)
@ -46,16 +42,29 @@ class NodeService:
new_node["ip_addresses"] = node["ip_addresses"]
new_node["domain_name"] = node["domain_name"]
accessible_from_nodes = []
accessible_from_nodes_hostnames = []
exploits = []
edges = EdgeService.get_displayed_edges_by_to(node_id, for_report)
for edge in edges:
accessible_from_nodes.append(NodeService.get_monkey_label(NodeService.get_monkey_by_id(edge["from"])))
from_node_id = edge["from"]
from_node_label = Monkey.get_label_by_id(from_node_id)
from_node_hostname = Monkey.get_hostname_by_id(from_node_id)
accessible_from_nodes.append(from_node_label)
accessible_from_nodes_hostnames.append(from_node_hostname)
for exploit in edge["exploits"]:
exploit["origin"] = NodeService.get_monkey_label(NodeService.get_monkey_by_id(edge["from"]))
exploit["origin"] = from_node_label
exploits.append(exploit)
exploits.sort(cmp=NodeService._cmp_exploits_by_timestamp)
new_node["exploits"] = exploits
new_node["accessible_from_nodes"] = accessible_from_nodes
new_node["accessible_from_nodes_hostnames"] = accessible_from_nodes_hostnames
if len(edges) > 0:
new_node["services"] = edges[-1]["services"]
else:
@ -112,6 +121,7 @@ class NodeService:
@staticmethod
def get_monkey_label(monkey):
# todo
label = monkey["hostname"] + " : " + monkey["ip_addresses"][0]
ip_addresses = local_ip_addresses()
if len(set(monkey["ip_addresses"]).intersection(ip_addresses)) > 0:
@ -137,15 +147,18 @@ class NodeService:
@staticmethod
def monkey_to_net_node(monkey, for_report=False):
label = monkey['hostname'] if for_report else NodeService.get_monkey_label(monkey)
is_monkey_dead = Monkey.get_single_monkey_by_id(monkey["_id"]).is_dead()
monkey_id = monkey["_id"]
label = Monkey.get_hostname_by_id(monkey_id) if for_report else Monkey.get_label_by_id(monkey_id)
monkey_group = NodeService.get_monkey_group(monkey)
return \
{
"id": monkey["_id"],
"id": monkey_id,
"label": label,
"group": NodeService.get_monkey_group(monkey),
"group": monkey_group,
"os": NodeService.get_monkey_os(monkey),
"dead": is_monkey_dead,
# The monkey is running IFF the group contains "_running". Therefore it's dead IFF the group does NOT
# contain "_running". This is a small optimisation, to not call "is_dead" twice.
"dead": "_running" not in monkey_group,
"domain_name": "",
"pba_results": monkey["pba_results"] if "pba_results" in monkey else []
}

View File

@ -22,7 +22,6 @@ from monkey_island.cc.utils import local_ip_addresses, get_subnets
__author__ = "itay.mizeretz"
logger = logging.getLogger(__name__)
@ -118,22 +117,17 @@ class ReportService:
@staticmethod
def get_scanned():
formatted_nodes = []
nodes = \
[NodeService.get_displayed_node_by_id(node['_id'], True) for node in mongo.db.node.find({}, {'_id': 1})] \
+ [NodeService.get_displayed_node_by_id(monkey['_id'], True) for monkey in
mongo.db.monkey.find({}, {'_id': 1})]
nodes = ReportService.get_all_displayed_nodes()
for node in nodes:
nodes_that_can_access_current_node = node['accessible_from_nodes_hostnames']
formatted_nodes.append(
{
'label': node['label'],
'ip_addresses': node['ip_addresses'],
'accessible_from_nodes':
list((x['hostname'] for x in
(NodeService.get_displayed_node_by_id(edge['from'], True)
for edge in EdgeService.get_displayed_edges_by_to(node['id'], True)))),
'accessible_from_nodes': nodes_that_can_access_current_node,
'services': node['services'],
'domain_name': node['domain_name'],
'pba_results': node['pba_results'] if 'pba_results' in node else 'None'
@ -143,25 +137,37 @@ class ReportService:
return formatted_nodes
@staticmethod
def get_all_displayed_nodes():
nodes_without_monkeys = [NodeService.get_displayed_node_by_id(node['_id'], True) for node in
mongo.db.node.find({}, {'_id': 1})]
nodes_with_monkeys = [NodeService.get_displayed_node_by_id(monkey['_id'], True) for monkey in
mongo.db.monkey.find({}, {'_id': 1})]
nodes = nodes_without_monkeys + nodes_with_monkeys
return nodes
@staticmethod
def get_exploited():
exploited = \
exploited_with_monkeys = \
[NodeService.get_displayed_node_by_id(monkey['_id'], True) for monkey in
mongo.db.monkey.find({}, {'_id': 1})
if not NodeService.get_monkey_manual_run(NodeService.get_monkey_by_id(monkey['_id']))] \
+ [NodeService.get_displayed_node_by_id(node['_id'], True)
for node in mongo.db.node.find({'exploited': True}, {'_id': 1})]
mongo.db.monkey.find({}, {'_id': 1}) if
not NodeService.get_monkey_manual_run(NodeService.get_monkey_by_id(monkey['_id']))]
exploited_without_monkeys = [NodeService.get_displayed_node_by_id(node['_id'], True) for node in
mongo.db.node.find({'exploited': True}, {'_id': 1})]
exploited = exploited_with_monkeys + exploited_without_monkeys
exploited = [
{
'label': monkey['label'],
'ip_addresses': monkey['ip_addresses'],
'domain_name': monkey['domain_name'],
'label': exploited_node['label'],
'ip_addresses': exploited_node['ip_addresses'],
'domain_name': exploited_node['domain_name'],
'exploits': list(set(
[ReportService.EXPLOIT_DISPLAY_DICT[exploit['exploiter']] for exploit in monkey['exploits'] if
exploit['result']]))
[ReportService.EXPLOIT_DISPLAY_DICT[exploit['exploiter']] for exploit in exploited_node['exploits']
if exploit['result']]))
}
for monkey in exploited]
for exploited_node in exploited]
logger.info('Exploited nodes generated for reporting')
@ -208,8 +214,9 @@ class ReportService:
# Pick out all ssh keys not yet included in creds
ssh_keys = [{'username': key_pair['name'], 'type': 'Clear SSH private key',
'origin': origin} for key_pair in telem['data']['ssh_info']
if key_pair['private_key'] and {'username': key_pair['name'], 'type': 'Clear SSH private key',
'origin': origin} not in creds]
if
key_pair['private_key'] and {'username': key_pair['name'], 'type': 'Clear SSH private key',
'origin': origin} not in creds]
creds.extend(ssh_keys)
return creds
@ -698,6 +705,8 @@ class ReportService:
cross_segment_issues = ReportService.get_cross_segment_issues()
monkey_latest_modify_time = Monkey.get_latest_modifytime()
scanned_nodes = ReportService.get_scanned()
exploited_nodes = ReportService.get_exploited()
report = \
{
'overview':
@ -716,8 +725,8 @@ class ReportService:
},
'glance':
{
'scanned': ReportService.get_scanned(),
'exploited': ReportService.get_exploited(),
'scanned': scanned_nodes,
'exploited': exploited_nodes,
'stolen_creds': ReportService.get_stolen_creds(),
'azure_passwords': ReportService.get_azure_creds(),
'ssh_keys': ReportService.get_ssh_keys(),
@ -750,7 +759,6 @@ class ReportService:
report_as_json = json_util.dumps(report_dict).replace('.', ',,,')
return json_util.loads(report_as_json)
@staticmethod
def is_latest_report_exists():
"""

View File

@ -1,4 +1,5 @@
from monkey_island.cc.database import mongo
from monkey_island.cc.models import Monkey
from monkey_island.cc.services import mimikatz_utils
from monkey_island.cc.services.node import NodeService
from monkey_island.cc.services.config import ConfigService
@ -12,6 +13,7 @@ def process_system_info_telemetry(telemetry_json):
process_credential_info(telemetry_json)
process_mimikatz_and_wmi_info(telemetry_json)
process_aws_data(telemetry_json)
update_db_with_new_hostname(telemetry_json)
test_antivirus_existence(telemetry_json)
@ -97,3 +99,7 @@ def process_aws_data(telemetry_json):
monkey_id = NodeService.get_monkey_by_guid(telemetry_json['monkey_guid']).get('_id')
mongo.db.monkey.update_one({'_id': monkey_id},
{'$set': {'aws_instance_id': telemetry_json['data']['aws']['instance_id']}})
def update_db_with_new_hostname(telemetry_json):
Monkey.get_single_monkey_by_id(telemetry_json['_id']).set_hostname(telemetry_json['data']['hostname'])

View File

@ -6,6 +6,7 @@ import array
import struct
import ipaddress
from netifaces import interfaces, ifaddresses, AF_INET
from ring import lru
__author__ = 'Barak'
@ -46,9 +47,13 @@ else:
# name of interface is (namestr[i:i+16].split('\0', 1)[0]
finally:
return result
# End of local ips function
# The local IP addresses list should not change often. Therefore, we can cache the result and never call this function
# more than once. This stopgap measure is here since this function is called a lot of times during the report
# generation.
# This means that if the interfaces of the Island machine change, the Island process needs to be restarted.
@lru(maxsize=1)
def local_ip_addresses():
ip_list = []
for interface in interfaces():
@ -57,6 +62,11 @@ def local_ip_addresses():
return ip_list
# The subnets list should not change often. Therefore, we can cache the result and never call this function
# more than once. This stopgap measure is here since this function is called a lot of times during the report
# generation.
# This means that if the interfaces or subnets of the Island machine change, the Island process needs to be restarted.
@lru(maxsize=1)
def get_subnets():
subnets = []
for interface in interfaces():

View File

@ -26,3 +26,4 @@ mongoengine
mongomock
requests
dpath
ring