Merge pull request #1950 from guardicore/1940-dal-layer

Island: Define repository interfaces
This commit is contained in:
VakarisZ 2022-05-24 14:00:38 +03:00 committed by GitHub
commit 23c2b42d5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
27 changed files with 327 additions and 16 deletions

View File

@ -1,6 +1,7 @@
from mongoengine import Document, FloatField
# TODO rename to Simulation, add other metadata
class AgentControls(Document):
# Timestamp of the last "kill all agents" command

View File

@ -0,0 +1,17 @@
from dataclasses import dataclass
from typing import Mapping, Sequence
# This is the most concise way to represent a graph:
# Machine id as key, Arch list as a value
# Not sure how compatible this will be with ORM objects though,
# might require more complex casting logic
@dataclass
class NetworkMap:
nodes: Mapping[str, Sequence[Arc]]
@dataclass
class Arc:
dst_machine: Machine
status: str

View File

@ -32,6 +32,8 @@ def get_report() -> dict:
return _decode_dot_char_before_mongo_insert(decrypt_dict(sensitive_fields, report_dict))
# TODO remove this unnecessary encoding. I think these are legacy methods from back in the day
# when usernames were used as keys. If not, we shouldn't use unknown data as keys.
def _encode_dot_char_before_mongo_insert(report_dict):
"""
mongodb doesn't allow for '.' and '$' in a key's name, this function replaces the '.'

View File

@ -7,6 +7,7 @@ from mongoengine import Document, EmbeddedDocumentListField
from monkey_island.cc.models.zero_trust.event import Event
# TODO just move events to the finding and remove this collection
class MonkeyFindingDetails(Document):
# SCHEMA
events = EmbeddedDocumentListField(document_type=Event, required=False)

View File

@ -0,0 +1,15 @@
from abc import ABC
from typing import Optional, Sequence
from monkey_island.cc.models import Monkey
class IAgentRepository(ABC):
# TODO rename Monkey document to Agent
def save_agent(self, agent: Monkey):
pass
def get_agents(
self, id: Optional[str] = None, running: Optional[bool] = None
) -> Sequence[Monkey]:
pass

View File

@ -0,0 +1,24 @@
from abc import ABC
from typing import Sequence
from monkey_island.cc.models.attack import Mitigation
class IAttackRepository(ABC):
# Att&ck just add not implemented raises
######################################
# This will likely stay the same as mitigations are external data
def save_mitigations(self, mitigations: Sequence[Mitigation]):
pass
# This will likely remain if we plan to keep the report actionable
def get_mitigation_by_technique(self, technique_id: str) -> Mitigation:
pass
# This could go away, since attack report is not costly to generate and we'll refactor it
def save_attack_report(self, attack_report: dict):
raise NotImplementedError
# This will probably go away once we use endpoints instead
def get_attack_report(self):
raise NotImplementedError

View File

@ -0,0 +1,30 @@
from abc import ABC
from typing import Any, Mapping, Sequence
class IConfigRepository(ABC):
# Config
###############################################
# This returns the current config
# TODO investigate if encryption should be here or where
# TODO potentially should be a DTO as well, but it's structure is defined in schema already
def get_config(self) -> Mapping:
pass
def set_config(self, config: dict):
pass
# Used when only a subset of config is submitted, for example only PBAFiles
# Used by passing keys, like ['monkey', 'post_breach_actions', 'linux_filename']
# Using a list is less ambiguous IMO, than using . notation
def set_config_field(self, key_list: Sequence[str], value: Any):
pass
# Used when only a subset of config is needed, for example only PBAFiles
# Used by passing keys, like ['monkey', 'post_breach_actions', 'linux_filename']
# Using a list is less ambiguous IMO, than using . notation
# TODO Still in doubt about encryption, this should probably be determined automatically
def get_config_field(self, key_list: Sequence[str]) -> Any:
pass

View File

@ -0,0 +1,14 @@
from abc import ABC
from typing import Optional, Sequence
class ILogRepository(ABC):
# Define log object
def get_logs(self, agent_id: Optional[str] = None) -> Sequence[Log]:
pass
def save_log(self, log: Log):
pass
def delete_log(self, agent_id: str):
pass

View File

@ -0,0 +1,20 @@
from abc import ABC
from typing import Optional, Sequence
class IMachineRepository(ABC):
# TODO define Machine object(ORM model)
def save_machine(self, machine: Machine):
pass
# TODO define Machine object(ORM model)
# TODO define or re-use machine state.
# TODO investigate where should the state be stored in edge or both edge and machine
def get_machines(
self,
id: Optional[str] = None,
ips: Optional[Sequence[str]] = None,
state: Optional[MachineState] = None,
is_island: Optional[bool] = None,
) -> Sequence[Machine]:
pass

View File

@ -0,0 +1,11 @@
from abc import ABC
class INetworkMapRepository(ABC):
# TODO Define NetMap object
def get_map(self) -> NetMap:
pass
def save_netmap(self, netmap: NetMap):
pass

View File

@ -0,0 +1,14 @@
from abc import ABC
from monkey_island.cc.models import Report
class IReportRepository(ABC):
# Report (potentially should go away if split up into proper endpoints/services)
#################################
def save_report(self, report: Report):
pass
# Should return only one
def get_report(self) -> Report:
pass

View File

@ -0,0 +1,11 @@
from abc import ABC
class ISimulationRepository(ABC):
# TODO define simulation object. It should contain metadata about simulation,
# like start, end times, mode and last forced stop of all monkeys
def save_simulation(self, simulation: Simulation):
pass
def get_simulation(self):
pass

View File

@ -0,0 +1,13 @@
from abc import ABC
from typing import Sequence
from monkey_island.cc.models import StolenCredentials
# Consider removing this interface and just using the telemetry type
class IStolenCredentialsRepository(ABC):
def get_stolen_credentials(self) -> Sequence[StolenCredentials]:
pass
def save_stolen_credentials(self, stolen_credentials: StolenCredentials):
pass

View File

@ -0,0 +1,20 @@
from abc import ABC
from typing import Optional, Sequence
from monkey_island.cc.models.telemetries.telemetry import Telemetry
class ITelemetryRepository(ABC):
def save_telemetry(self, telemetry: Telemetry):
pass
# TODO define all telemetry types
# Potentially we'll need to define each telem type separately. As it stands there's no way to
# get exploit telemetries by exploiter for example
def get_telemetries(
self,
id: Optional[str] = None,
type: Optional[TelemetryType] = None,
monkey_id: Optional[str] = None,
) -> Sequence[Telemetry]:
pass

View File

@ -0,0 +1,12 @@
from abc import ABC
from typing import Optional
from monkey_island.cc.models.attack.attack_mitigations import AttackMitigations
class IMitigationsRepository(ABC):
def get_mitigations(self, technique_id: Optional[str] = None) -> AttackMitigations:
pass
def save_mitigations(self, mitigations: AttackMitigations):
pass

View File

@ -0,0 +1,11 @@
from abc import ABC
from typing import Optional, Sequence
from monkey_island.cc.models.zero_trust.event import Event
class IEventRepository(ABC):
def get_events(self, finding_id: Optional[str] = None) -> Sequence[Event]:
pass
# Events are saved in IFindingRepository, because finding had many events

View File

@ -0,0 +1,13 @@
from abc import ABC
from typing import Optional
# Zero trust finding
from monkey_island.cc.models.zero_trust.finding import Finding
class IFindingRepository(ABC):
def get_findings(self, test: Optional[str] = None) -> Finding:
pass
def save_finding(self, finding: Finding):
pass

View File

@ -15,7 +15,7 @@ MAX_SAME_CATEGORY_TELEMS = 10000
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# TODO this will break with the IRepository implementation. Remove it
class TestTelemStore:
TELEMS_EXPORTED = False
@ -60,6 +60,7 @@ class TestTelemStore:
TestTelemStore.TELEMS_EXPORTED = True
logger.info("Telemetries exported!")
# Should be private
@staticmethod
def get_unique_file_path_for_export_telem(target_dir: str, test_telem: ExportedTelem):
telem_filename = TestTelemStore._get_filename_by_export_telem(test_telem)

View File

@ -70,6 +70,9 @@ class ConfigService:
:param is_island: If True, will include island specific configuration parameters.
:return: The entire global config.
"""
# is_initial_config and should_decrypt are only there to compare if we are on the
# default configuration or did user modified it already
config = (
mongo.db.config.find_one({"name": "initial" if is_initial_config else "newconfig"})
or {}
@ -95,9 +98,12 @@ class ConfigService:
:return: The value of the requested config key.
"""
config_key = functools.reduce(lambda x, y: x + "." + y, config_key_as_arr)
# This should just call get_config from repository. If None, then call get_default prob
config = mongo.db.config.find_one(
{"name": "initial" if is_initial_config else "newconfig"}, {config_key: 1}
)
for config_key_part in config_key_as_arr:
config = config[config_key_part]
if should_decrypt:
@ -141,6 +147,7 @@ class ConfigService:
def get_config_schema():
return SCHEMA
# Not added to interface because it's doable by get_config_field + set_config_field
@staticmethod
def add_item_to_config_set_if_dont_exist(item_path_array, item_value, should_encrypt):
item_key = ".".join(item_path_array)

View File

@ -65,6 +65,9 @@ class EdgeService(Edge):
except DoesNotExist:
return []
# TODO it's not entirelly clear why the tunnel is unset in
# monkey/monkey_island/cc/services/telemetry/processing/tunnel.py:15
# Either way this can be done by fetching, modifying and saving
def disable_tunnel(self):
self.tunnel = False
self.save()

View File

@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
def set_stop_all(time: float):
# This will use Agent and Simulation repositories
for monkey in Monkey.objects():
monkey.config.should_stop = True
monkey.save()

View File

@ -21,6 +21,8 @@ class NetEdgeService:
def _get_standard_net_edges():
return [DisplayedEdgeService.edge_to_net_edge(x) for x in EdgeService.get_all_edges()]
# If we save the island machine as a standard machine, we won't need these
# methods
@staticmethod
def _get_uninfected_island_net_edges():
edges = []

View File

@ -80,6 +80,11 @@ class NodeService:
domain_name = " (" + node["domain_name"] + ")"
return node["os"]["version"] + " : " + node["ip_addresses"][0] + domain_name
# A lot of methods like these duplicate between monkey and node.
# That's a result of poor entity model, because both nodes and monkeys
# store the same information. It's best to extract the machine specific data
# to "Machine" entity (like IP's and os) and agent specific data to "Agent" (like alive,
# parent, etc)
@staticmethod
def get_monkey_os(monkey):
os = "unknown"
@ -183,15 +188,15 @@ class NodeService:
tunnel_host_id = NodeService.get_monkey_by_ip(tunnel_host_ip)["_id"]
NodeService.unset_all_monkey_tunnels(monkey_id)
mongo.db.monkey.update(
{"_id": monkey_id}, {"$set": {"tunnel": tunnel_host_id}}, upsert=False
{"_id": monkey_id}, {"$set": {"tunnel": tunnel_host_id}}, upsert=False
)
monkey_label = NodeService.get_label_for_endpoint(monkey_id)
tunnel_host_label = NodeService.get_label_for_endpoint(tunnel_host_id)
tunnel_edge = EdgeService.get_or_create_edge(
src_node_id=monkey_id,
dst_node_id=tunnel_host_id,
src_label=monkey_label,
dst_label=tunnel_host_label,
src_node_id=monkey_id,
dst_node_id=tunnel_host_id,
src_label=monkey_label,
dst_label=tunnel_host_label,
)
tunnel_edge.tunnel = True
tunnel_edge.ip_address = tunnel_host_ip
@ -200,13 +205,13 @@ class NodeService:
@staticmethod
def insert_node(ip_address, domain_name=""):
new_node_insert_result = mongo.db.node.insert_one(
{
"ip_addresses": [ip_address],
"domain_name": domain_name,
"exploited": False,
"propagated": False,
"os": {"type": "unknown", "version": "unknown"},
}
{
"ip_addresses": [ip_address],
"domain_name": domain_name,
"exploited": False,
"propagated": False,
"os": {"type": "unknown", "version": "unknown"},
}
)
return mongo.db.node.find_one({"_id": new_node_insert_result.inserted_id})
@ -221,6 +226,11 @@ class NodeService:
def get_monkey_by_id(monkey_id):
return mongo.db.monkey.find_one({"_id": ObjectId(monkey_id)})
# GUID is generated from uuid.getnode() and represents machine it was ran on
# All monkeys that ran on the same machine will have the same GUID, but
# we can just store the monkeys on the same machine document/have one to many relationship
# GUID could be stored on machine to uniquely identify the same machine even after the
# ip, domain name or other changes. Not entirely sure it's necessary
@staticmethod
def get_monkey_by_guid(monkey_guid):
return mongo.db.monkey.find_one({"guid": monkey_guid})
@ -237,10 +247,12 @@ class NodeService:
def get_node_by_id(node_id):
return mongo.db.node.find_one({"_id": ObjectId(node_id)})
# This is only used to determine if report is the latest or if we need to
# generate a new one. This info should end up in Simulation entity instead.
@staticmethod
def update_monkey_modify_time(monkey_id):
mongo.db.monkey.update(
{"_id": monkey_id}, {"$set": {"modifytime": datetime.now()}}, upsert=False
{"_id": monkey_id}, {"$set": {"modifytime": datetime.now()}}, upsert=False
)
@staticmethod
@ -256,9 +268,11 @@ class NodeService:
@staticmethod
def add_communication_info(monkey, info):
mongo.db.monkey.update(
{"guid": monkey["guid"]}, {"$set": {"command_control_channel": info}}, upsert=False
{"guid": monkey["guid"]}, {"$set": {"command_control_channel": info}}, upsert=False
)
# TODO this returns a mock island agent
# It's better to just initialize the island machine on reset I think
@staticmethod
def get_monkey_island_monkey():
ip_addresses = local_ip_addresses()
@ -329,7 +343,7 @@ class NodeService:
@staticmethod
def get_hostname_by_id(node_id):
return NodeService.get_node_hostname(
mongo.db.monkey.find_one({"_id": node_id}, {"hostname": 1})
mongo.db.monkey.find_one({"_id": node_id}, {"hostname": 1})
)
@staticmethod

View File

@ -57,6 +57,7 @@ class ReportService:
def initialize(cls, aws_service: AWSService):
cls._aws_service = aws_service
# This should pull from Simulation entity
@staticmethod
def get_first_monkey_time():
return (
@ -88,6 +89,7 @@ class ReportService:
return st
# This shoud be replaced by a query to edges and get tunnel edges?
@staticmethod
def get_tunnels():
return [
@ -103,6 +105,7 @@ class ReportService:
for tunnel in mongo.db.monkey.find({"tunnel": {"$exists": True}}, {"tunnel": 1})
]
# This should be replaced by machine query for "scanned" status
@staticmethod
def get_scanned():
formatted_nodes = []
@ -110,6 +113,8 @@ class ReportService:
nodes = ReportService.get_all_displayed_nodes()
for node in nodes:
# This information should be evident from the map, not sure a table/list is a good way
# to display it anyways
nodes_that_can_access_current_node = node["accessible_from_nodes_hostnames"]
formatted_nodes.append(
{

View File

@ -6,6 +6,20 @@ Vulture doesn't mark these as dead again.
from infection_monkey.exploit.log4shell_utils.ldap_server import LDAPServerFactory
from monkey_island.cc import app
from monkey_island.cc.models import Report
from monkey_island.cc.models.networkmap import Arc, NetworkMap
from monkey_island.cc.repository.attack.IMitigationsRepository import IMitigationsRepository
from monkey_island.cc.repository.IAgentRepository import IAgentRepository
from monkey_island.cc.repository.IAttackRepository import IAttackRepository
from monkey_island.cc.repository.IConfigRepository import IConfigRepository
from monkey_island.cc.repository.ILogRepository import ILogRepository
from monkey_island.cc.repository.IMachineRepository import IMachineRepository
from monkey_island.cc.repository.INetworkMapRepository import INetworkMapRepository
from monkey_island.cc.repository.IReportRepository import IReportRepository
from monkey_island.cc.repository.ISimulationRepository import ISimulationRepository
from monkey_island.cc.repository.IStolenCredentials import IStolenCredentialsRepository
from monkey_island.cc.repository.ITelemetryRepository import ITelemetryRepository
from monkey_island.cc.repository.zero_trust.IEventRepository import IEventRepository
from monkey_island.cc.repository.zero_trust.IFindingRepository import IFindingRepository
fake_monkey_dir_path # unused variable (monkey/tests/infection_monkey/post_breach/actions/test_users_custom_pba.py:37)
set_os_linux # unused variable (monkey/tests/infection_monkey/post_breach/actions/test_users_custom_pba.py:37)
@ -169,3 +183,38 @@ GCPHandler # unused function (envs/monkey_zoo/blackbox/test_blackbox.py:57)
architecture # unused variable (monkey/infection_monkey/exploit/caching_agent_repository.py:25)
response_code # unused variable (monkey/monkey_island/cc/services/aws/aws_command_runner.py:26)
# TODO DELETE AFTER RESOURCE REFACTORING
NetworkMap
Arc.dst_machine
IMitigationsRepository.get_mitigations
IMitigationsRepository.save_mitigations
IAgentRepository.save_agent
IAgentRepository.get_agents
agent
IAttackRepository.get_attack_report
IAttackRepository.save_attack_report
IAttackRepository.get_mitigation_by_technique
IAttackRepository.save_mitigations
IConfigRepository.set_config
IConfigRepository.set_config_field
IConfigRepository.get_config_field
ILogRepository.get_logs
ILogRepository.save_log
ILogRepository.delete_log
IMachineRepository.save_machine
IMachineRepository.get_machines
INetworkMapRepository.get_map
INetworkMapRepository.save_netmap
IReportRepository
ISimulationRepository.save_simulation
ISimulationRepository.get_simulation
IStolenCredentialsRepository.get_stolen_credentials
IStolenCredentialsRepository.save_stolen_credentials
ITelemetryRepository.get_telemetries
IEventRepository.get_events
IFindingRepository.get_findings
key_list
simulation
stolen_credentials
netmap