Merge pull request #1950 from guardicore/1940-dal-layer

Island: Define repository interfaces
2022-05-24 14:00:38 +03:00 · 2022-05-24 14:00:38 +03:00 · 23c2b42d5c
parent ccc29e448c 1077a84623
commit 23c2b42d5c
27 changed files with 327 additions and 16 deletions
--- a/monkey/monkey_island/cc/models/agent_controls/agent_controls.py
+++ b/monkey/monkey_island/cc/models/agent_controls/agent_controls.py
@ -1,6 +1,7 @@
 from mongoengine import Document, FloatField


+# TODO rename to Simulation, add other metadata
 class AgentControls(Document):

    # Timestamp of the last "kill all agents" command
--- a/monkey/monkey_island/cc/models/networkmap.py
+++ b/monkey/monkey_island/cc/models/networkmap.py
@ -0,0 +1,17 @@
+from dataclasses import dataclass
+from typing import Mapping, Sequence
+
+
+# This is the most concise way to represent a graph:
+# Machine id as key, Arch list as a value
+# Not sure how compatible this will be with ORM objects though,
+# might require more complex casting logic
+@dataclass
+class NetworkMap:
+    nodes: Mapping[str, Sequence[Arc]]
+
+
+@dataclass
+class Arc:
+    dst_machine: Machine
+    status: str
--- a/monkey/monkey_island/cc/models/report/report_dal.py
+++ b/monkey/monkey_island/cc/models/report/report_dal.py
@ -32,6 +32,8 @@ def get_report() -> dict:
    return _decode_dot_char_before_mongo_insert(decrypt_dict(sensitive_fields, report_dict))


+# TODO remove this unnecessary encoding. I think these are legacy methods from back in the day
+# when usernames were used as keys. If not, we shouldn't use unknown data as keys.
 def _encode_dot_char_before_mongo_insert(report_dict):
    """
    mongodb doesn't allow for '.' and '$' in a key's name, this function replaces the '.'
--- a/monkey/monkey_island/cc/models/zero_trust/monkey_finding_details.py
+++ b/monkey/monkey_island/cc/models/zero_trust/monkey_finding_details.py
@ -7,6 +7,7 @@ from mongoengine import Document, EmbeddedDocumentListField
 from monkey_island.cc.models.zero_trust.event import Event


+# TODO just move events to the finding and remove this collection
 class MonkeyFindingDetails(Document):
    # SCHEMA
    events = EmbeddedDocumentListField(document_type=Event, required=False)
--- a/monkey/monkey_island/cc/repository/IAgentRepository.py
+++ b/monkey/monkey_island/cc/repository/IAgentRepository.py
@ -0,0 +1,15 @@
+from abc import ABC
+from typing import Optional, Sequence
+
+from monkey_island.cc.models import Monkey
+
+
+class IAgentRepository(ABC):
+    # TODO rename Monkey document to Agent
+    def save_agent(self, agent: Monkey):
+        pass
+
+    def get_agents(
+        self, id: Optional[str] = None, running: Optional[bool] = None
+    ) -> Sequence[Monkey]:
+        pass
--- a/monkey/monkey_island/cc/repository/IAttackRepository.py
+++ b/monkey/monkey_island/cc/repository/IAttackRepository.py
@ -0,0 +1,24 @@
+from abc import ABC
+from typing import Sequence
+
+from monkey_island.cc.models.attack import Mitigation
+
+
+class IAttackRepository(ABC):
+    # Att&ck just add not implemented raises
+    ######################################
+    # This will likely stay the same as mitigations are external data
+    def save_mitigations(self, mitigations: Sequence[Mitigation]):
+        pass
+
+    # This will likely remain if we plan to keep the report actionable
+    def get_mitigation_by_technique(self, technique_id: str) -> Mitigation:
+        pass
+
+    # This could go away, since attack report is not costly to generate and we'll refactor it
+    def save_attack_report(self, attack_report: dict):
+        raise NotImplementedError
+
+    # This will probably go away once we use endpoints instead
+    def get_attack_report(self):
+        raise NotImplementedError
--- a/monkey/monkey_island/cc/repository/IConfigRepository.py
+++ b/monkey/monkey_island/cc/repository/IConfigRepository.py
@ -0,0 +1,30 @@
+from abc import ABC
+from typing import Any, Mapping, Sequence
+
+
+class IConfigRepository(ABC):
+
+    # Config
+    ###############################################
+
+    # This returns the current config
+    # TODO investigate if encryption should be here or where
+    # TODO potentially should be a DTO as well, but it's structure is defined in schema already
+    def get_config(self) -> Mapping:
+        pass
+
+    def set_config(self, config: dict):
+        pass
+
+    # Used when only a subset of config is submitted, for example only PBAFiles
+    # Used by passing keys, like ['monkey', 'post_breach_actions', 'linux_filename']
+    # Using a list is less ambiguous IMO, than using . notation
+    def set_config_field(self, key_list: Sequence[str], value: Any):
+        pass
+
+    # Used when only a subset of config is needed, for example only PBAFiles
+    # Used by passing keys, like ['monkey', 'post_breach_actions', 'linux_filename']
+    # Using a list is less ambiguous IMO, than using . notation
+    # TODO Still in doubt about encryption, this should probably be determined automatically
+    def get_config_field(self, key_list: Sequence[str]) -> Any:
+        pass
--- a/monkey/monkey_island/cc/repository/ILogRepository.py
+++ b/monkey/monkey_island/cc/repository/ILogRepository.py
@ -0,0 +1,14 @@
+from abc import ABC
+from typing import Optional, Sequence
+
+
+class ILogRepository(ABC):
+    # Define log object
+    def get_logs(self, agent_id: Optional[str] = None) -> Sequence[Log]:
+        pass
+
+    def save_log(self, log: Log):
+        pass
+
+    def delete_log(self, agent_id: str):
+        pass
--- a/monkey/monkey_island/cc/repository/IMachineRepository.py
+++ b/monkey/monkey_island/cc/repository/IMachineRepository.py
@ -0,0 +1,20 @@
+from abc import ABC
+from typing import Optional, Sequence
+
+
+class IMachineRepository(ABC):
+    # TODO define Machine object(ORM model)
+    def save_machine(self, machine: Machine):
+        pass
+
+    # TODO define Machine object(ORM model)
+    # TODO define or re-use machine state.
+    # TODO investigate where should the state be stored in edge or both edge and machine
+    def get_machines(
+        self,
+        id: Optional[str] = None,
+        ips: Optional[Sequence[str]] = None,
+        state: Optional[MachineState] = None,
+        is_island: Optional[bool] = None,
+    ) -> Sequence[Machine]:
+        pass
--- a/monkey/monkey_island/cc/repository/INetworkMapRepository.py
+++ b/monkey/monkey_island/cc/repository/INetworkMapRepository.py
@ -0,0 +1,11 @@
+from abc import ABC
+
+
+class INetworkMapRepository(ABC):
+
+    # TODO Define NetMap object
+    def get_map(self) -> NetMap:
+        pass
+
+    def save_netmap(self, netmap: NetMap):
+        pass
--- a/monkey/monkey_island/cc/repository/IReportRepository.py
+++ b/monkey/monkey_island/cc/repository/IReportRepository.py
@ -0,0 +1,14 @@
+from abc import ABC
+
+from monkey_island.cc.models import Report
+
+
+class IReportRepository(ABC):
+    # Report (potentially should go away if split up into proper endpoints/services)
+    #################################
+    def save_report(self, report: Report):
+        pass
+
+    # Should return only one
+    def get_report(self) -> Report:
+        pass
--- a/monkey/monkey_island/cc/repository/ISimulationRepository.py
+++ b/monkey/monkey_island/cc/repository/ISimulationRepository.py
@ -0,0 +1,11 @@
+from abc import ABC
+
+
+class ISimulationRepository(ABC):
+    # TODO define simulation object. It should contain metadata about simulation,
+    # like start, end times, mode and last forced stop of all monkeys
+    def save_simulation(self, simulation: Simulation):
+        pass
+
+    def get_simulation(self):
+        pass
--- a/monkey/monkey_island/cc/repository/IStolenCredentials.py
+++ b/monkey/monkey_island/cc/repository/IStolenCredentials.py
@ -0,0 +1,13 @@
+from abc import ABC
+from typing import Sequence
+
+from monkey_island.cc.models import StolenCredentials
+
+
+# Consider removing this interface and just using the telemetry type
+class IStolenCredentialsRepository(ABC):
+    def get_stolen_credentials(self) -> Sequence[StolenCredentials]:
+        pass
+
+    def save_stolen_credentials(self, stolen_credentials: StolenCredentials):
+        pass
--- a/monkey/monkey_island/cc/repository/ITelemetryRepository.py
+++ b/monkey/monkey_island/cc/repository/ITelemetryRepository.py
@ -0,0 +1,20 @@
+from abc import ABC
+from typing import Optional, Sequence
+
+from monkey_island.cc.models.telemetries.telemetry import Telemetry
+
+
+class ITelemetryRepository(ABC):
+    def save_telemetry(self, telemetry: Telemetry):
+        pass
+
+    # TODO define all telemetry types
+    # Potentially we'll need to define each telem type separately. As it stands there's no way to
+    # get exploit telemetries by exploiter for example
+    def get_telemetries(
+        self,
+        id: Optional[str] = None,
+        type: Optional[TelemetryType] = None,
+        monkey_id: Optional[str] = None,
+    ) -> Sequence[Telemetry]:
+        pass
--- a/monkey/monkey_island/cc/repository/init.py
+++ b/monkey/monkey_island/cc/repository/init.py
--- a/monkey/monkey_island/cc/repository/attack/IMitigationsRepository.py
+++ b/monkey/monkey_island/cc/repository/attack/IMitigationsRepository.py
@ -0,0 +1,12 @@
+from abc import ABC
+from typing import Optional
+
+from monkey_island.cc.models.attack.attack_mitigations import AttackMitigations
+
+
+class IMitigationsRepository(ABC):
+    def get_mitigations(self, technique_id: Optional[str] = None) -> AttackMitigations:
+        pass
+
+    def save_mitigations(self, mitigations: AttackMitigations):
+        pass
--- a/monkey/monkey_island/cc/repository/zero_trust/IEventRepository.py
+++ b/monkey/monkey_island/cc/repository/zero_trust/IEventRepository.py
@ -0,0 +1,11 @@
+from abc import ABC
+from typing import Optional, Sequence
+
+from monkey_island.cc.models.zero_trust.event import Event
+
+
+class IEventRepository(ABC):
+    def get_events(self, finding_id: Optional[str] = None) -> Sequence[Event]:
+        pass
+
+    # Events are saved in IFindingRepository, because finding had many events
--- a/monkey/monkey_island/cc/repository/zero_trust/IFindingRepository.py
+++ b/monkey/monkey_island/cc/repository/zero_trust/IFindingRepository.py
@ -0,0 +1,13 @@
+from abc import ABC
+from typing import Optional
+
+# Zero trust finding
+from monkey_island.cc.models.zero_trust.finding import Finding
+
+
+class IFindingRepository(ABC):
+    def get_findings(self, test: Optional[str] = None) -> Finding:
+        pass
+
+    def save_finding(self, finding: Finding):
+        pass
--- a/monkey/monkey_island/cc/repository/zero_trust/init.py
+++ b/monkey/monkey_island/cc/repository/zero_trust/init.py
--- a/monkey/monkey_island/cc/resources/blackbox/utils/telem_store.py
+++ b/monkey/monkey_island/cc/resources/blackbox/utils/telem_store.py
@ -15,7 +15,7 @@ MAX_SAME_CATEGORY_TELEMS = 10000
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

-
+# TODO this will break with the IRepository implementation. Remove it
 class TestTelemStore:
    TELEMS_EXPORTED = False

@ -60,6 +60,7 @@ class TestTelemStore:
        TestTelemStore.TELEMS_EXPORTED = True
        logger.info("Telemetries exported!")

+    # Should be private
    @staticmethod
    def get_unique_file_path_for_export_telem(target_dir: str, test_telem: ExportedTelem):
        telem_filename = TestTelemStore._get_filename_by_export_telem(test_telem)
--- a/monkey/monkey_island/cc/services/config.py
+++ b/monkey/monkey_island/cc/services/config.py
@ -70,6 +70,9 @@ class ConfigService:
        :param is_island: If True, will include island specific configuration parameters.
        :return: The entire global config.
        """
+
+        # is_initial_config and should_decrypt are only there to compare if we are on the
+        # default configuration or did user modified it already
        config = (
            mongo.db.config.find_one({"name": "initial" if is_initial_config else "newconfig"})
            or {}
@ -95,9 +98,12 @@ class ConfigService:
        :return: The value of the requested config key.
        """
        config_key = functools.reduce(lambda x, y: x + "." + y, config_key_as_arr)
+
+        # This should just call get_config from repository. If None, then call get_default prob
        config = mongo.db.config.find_one(
            {"name": "initial" if is_initial_config else "newconfig"}, {config_key: 1}
        )
+
        for config_key_part in config_key_as_arr:
            config = config[config_key_part]
        if should_decrypt:
@ -141,6 +147,7 @@ class ConfigService:
    def get_config_schema():
        return SCHEMA

+    # Not added to interface because it's doable by get_config_field + set_config_field
    @staticmethod
    def add_item_to_config_set_if_dont_exist(item_path_array, item_value, should_encrypt):
        item_key = ".".join(item_path_array)
--- a/monkey/monkey_island/cc/services/edge/edge.py
+++ b/monkey/monkey_island/cc/services/edge/edge.py
@ -65,6 +65,9 @@ class EdgeService(Edge):
        except DoesNotExist:
            return []

+    # TODO it's not entirelly clear why the tunnel is unset in
+    #  monkey/monkey_island/cc/services/telemetry/processing/tunnel.py:15
+    # Either way this can be done by fetching, modifying and saving
    def disable_tunnel(self):
        self.tunnel = False
        self.save()
--- a/monkey/monkey_island/cc/services/infection_lifecycle.py
+++ b/monkey/monkey_island/cc/services/infection_lifecycle.py
@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)


 def set_stop_all(time: float):
+    # This will use Agent and Simulation repositories
    for monkey in Monkey.objects():
        monkey.config.should_stop = True
        monkey.save()
--- a/monkey/monkey_island/cc/services/netmap/net_edge.py
+++ b/monkey/monkey_island/cc/services/netmap/net_edge.py
@ -21,6 +21,8 @@ class NetEdgeService:
    def _get_standard_net_edges():
        return [DisplayedEdgeService.edge_to_net_edge(x) for x in EdgeService.get_all_edges()]

+    # If we save the island machine as a standard machine, we won't need these
+    # methods
    @staticmethod
    def _get_uninfected_island_net_edges():
        edges = []
--- a/monkey/monkey_island/cc/services/node.py
+++ b/monkey/monkey_island/cc/services/node.py
@ -80,6 +80,11 @@ class NodeService:
            domain_name = " (" + node["domain_name"] + ")"
        return node["os"]["version"] + " : " + node["ip_addresses"][0] + domain_name

+    # A lot of methods like these duplicate between monkey and node.
+    # That's a result of poor entity model, because both nodes and monkeys
+    # store the same information. It's best to extract the machine specific data
+    # to "Machine" entity (like IP's and os) and agent specific data to "Agent" (like alive,
+    # parent, etc)
    @staticmethod
    def get_monkey_os(monkey):
        os = "unknown"
@ -183,15 +188,15 @@ class NodeService:
        tunnel_host_id = NodeService.get_monkey_by_ip(tunnel_host_ip)["_id"]
        NodeService.unset_all_monkey_tunnels(monkey_id)
        mongo.db.monkey.update(
-            {"_id": monkey_id}, {"$set": {"tunnel": tunnel_host_id}}, upsert=False
+                {"_id": monkey_id}, {"$set": {"tunnel": tunnel_host_id}}, upsert=False
        )
        monkey_label = NodeService.get_label_for_endpoint(monkey_id)
        tunnel_host_label = NodeService.get_label_for_endpoint(tunnel_host_id)
        tunnel_edge = EdgeService.get_or_create_edge(
-            src_node_id=monkey_id,
-            dst_node_id=tunnel_host_id,
-            src_label=monkey_label,
-            dst_label=tunnel_host_label,
+                src_node_id=monkey_id,
+                dst_node_id=tunnel_host_id,
+                src_label=monkey_label,
+                dst_label=tunnel_host_label,
        )
        tunnel_edge.tunnel = True
        tunnel_edge.ip_address = tunnel_host_ip
@ -200,13 +205,13 @@ class NodeService:
    @staticmethod
    def insert_node(ip_address, domain_name=""):
        new_node_insert_result = mongo.db.node.insert_one(
-            {
-                "ip_addresses": [ip_address],
-                "domain_name": domain_name,
-                "exploited": False,
-                "propagated": False,
-                "os": {"type": "unknown", "version": "unknown"},
-            }
+                {
+                    "ip_addresses": [ip_address],
+                    "domain_name": domain_name,
+                    "exploited": False,
+                    "propagated": False,
+                    "os": {"type": "unknown", "version": "unknown"},
+                }
        )
        return mongo.db.node.find_one({"_id": new_node_insert_result.inserted_id})

@ -221,6 +226,11 @@ class NodeService:
    def get_monkey_by_id(monkey_id):
        return mongo.db.monkey.find_one({"_id": ObjectId(monkey_id)})

+    # GUID is generated from uuid.getnode() and represents machine it was ran on
+    # All monkeys that ran on the same machine will have the same GUID, but
+    # we can just store the monkeys on the same machine document/have one to many relationship
+    # GUID could be stored on machine to uniquely identify the same machine even after the
+    # ip, domain name or other changes. Not entirely sure it's necessary
    @staticmethod
    def get_monkey_by_guid(monkey_guid):
        return mongo.db.monkey.find_one({"guid": monkey_guid})
@ -237,10 +247,12 @@ class NodeService:
    def get_node_by_id(node_id):
        return mongo.db.node.find_one({"_id": ObjectId(node_id)})

+    # This is only used to determine if report is the latest or if we need to
+    # generate a new one. This info should end up in Simulation entity instead.
    @staticmethod
    def update_monkey_modify_time(monkey_id):
        mongo.db.monkey.update(
-            {"_id": monkey_id}, {"$set": {"modifytime": datetime.now()}}, upsert=False
+                {"_id": monkey_id}, {"$set": {"modifytime": datetime.now()}}, upsert=False
        )

    @staticmethod
@ -256,9 +268,11 @@ class NodeService:
    @staticmethod
    def add_communication_info(monkey, info):
        mongo.db.monkey.update(
-            {"guid": monkey["guid"]}, {"$set": {"command_control_channel": info}}, upsert=False
+                {"guid": monkey["guid"]}, {"$set": {"command_control_channel": info}}, upsert=False
        )

+    # TODO this returns a mock island agent
+    # It's better to just initialize the island machine on reset I think
    @staticmethod
    def get_monkey_island_monkey():
        ip_addresses = local_ip_addresses()
@ -329,7 +343,7 @@ class NodeService:
    @staticmethod
    def get_hostname_by_id(node_id):
        return NodeService.get_node_hostname(
-            mongo.db.monkey.find_one({"_id": node_id}, {"hostname": 1})
+                mongo.db.monkey.find_one({"_id": node_id}, {"hostname": 1})
        )

    @staticmethod
--- a/monkey/monkey_island/cc/services/reporting/report.py
+++ b/monkey/monkey_island/cc/services/reporting/report.py
@ -57,6 +57,7 @@ class ReportService:
    def initialize(cls, aws_service: AWSService):
        cls._aws_service = aws_service

+    # This should pull from Simulation entity
    @staticmethod
    def get_first_monkey_time():
        return (
@ -88,6 +89,7 @@ class ReportService:

        return st

+    # This shoud be replaced by a query to edges and get tunnel edges?
    @staticmethod
    def get_tunnels():
        return [
@ -103,6 +105,7 @@ class ReportService:
            for tunnel in mongo.db.monkey.find({"tunnel": {"$exists": True}}, {"tunnel": 1})
        ]

+    # This should be replaced by machine query for "scanned" status
    @staticmethod
    def get_scanned():
        formatted_nodes = []
@ -110,6 +113,8 @@ class ReportService:
        nodes = ReportService.get_all_displayed_nodes()

        for node in nodes:
+            # This information should be evident from the map, not sure a table/list is a good way
+            # to display it anyways
            nodes_that_can_access_current_node = node["accessible_from_nodes_hostnames"]
            formatted_nodes.append(
                {
--- a/vulture_allowlist.py
+++ b/vulture_allowlist.py
@ -6,6 +6,20 @@ Vulture doesn't mark these as dead again.
 from infection_monkey.exploit.log4shell_utils.ldap_server import LDAPServerFactory
 from monkey_island.cc import app
 from monkey_island.cc.models import Report
+from monkey_island.cc.models.networkmap import Arc, NetworkMap
+from monkey_island.cc.repository.attack.IMitigationsRepository import IMitigationsRepository
+from monkey_island.cc.repository.IAgentRepository import IAgentRepository
+from monkey_island.cc.repository.IAttackRepository import IAttackRepository
+from monkey_island.cc.repository.IConfigRepository import IConfigRepository
+from monkey_island.cc.repository.ILogRepository import ILogRepository
+from monkey_island.cc.repository.IMachineRepository import IMachineRepository
+from monkey_island.cc.repository.INetworkMapRepository import INetworkMapRepository
+from monkey_island.cc.repository.IReportRepository import IReportRepository
+from monkey_island.cc.repository.ISimulationRepository import ISimulationRepository
+from monkey_island.cc.repository.IStolenCredentials import IStolenCredentialsRepository
+from monkey_island.cc.repository.ITelemetryRepository import ITelemetryRepository
+from monkey_island.cc.repository.zero_trust.IEventRepository import IEventRepository
+from monkey_island.cc.repository.zero_trust.IFindingRepository import IFindingRepository

 fake_monkey_dir_path  # unused variable (monkey/tests/infection_monkey/post_breach/actions/test_users_custom_pba.py:37)
 set_os_linux  # unused variable (monkey/tests/infection_monkey/post_breach/actions/test_users_custom_pba.py:37)
@ -169,3 +183,38 @@ GCPHandler  # unused function (envs/monkey_zoo/blackbox/test_blackbox.py:57)
 architecture  # unused variable (monkey/infection_monkey/exploit/caching_agent_repository.py:25)

 response_code  # unused variable (monkey/monkey_island/cc/services/aws/aws_command_runner.py:26)
+
+# TODO DELETE AFTER RESOURCE REFACTORING
+NetworkMap
+Arc.dst_machine
+IMitigationsRepository.get_mitigations
+IMitigationsRepository.save_mitigations
+IAgentRepository.save_agent
+IAgentRepository.get_agents
+agent
+IAttackRepository.get_attack_report
+IAttackRepository.save_attack_report
+IAttackRepository.get_mitigation_by_technique
+IAttackRepository.save_mitigations
+IConfigRepository.set_config
+IConfigRepository.set_config_field
+IConfigRepository.get_config_field
+ILogRepository.get_logs
+ILogRepository.save_log
+ILogRepository.delete_log
+IMachineRepository.save_machine
+IMachineRepository.get_machines
+INetworkMapRepository.get_map
+INetworkMapRepository.save_netmap
+IReportRepository
+ISimulationRepository.save_simulation
+ISimulationRepository.get_simulation
+IStolenCredentialsRepository.get_stolen_credentials
+IStolenCredentialsRepository.save_stolen_credentials
+ITelemetryRepository.get_telemetries
+IEventRepository.get_events
+IFindingRepository.get_findings
+key_list
+simulation
+stolen_credentials
+netmap