diff --git a/envs/monkey_zoo/blackbox/island_client/monkey_island_client.py b/envs/monkey_zoo/blackbox/island_client/monkey_island_client.py index 001a8d17a..bc8115db7 100644 --- a/envs/monkey_zoo/blackbox/island_client/monkey_island_client.py +++ b/envs/monkey_zoo/blackbox/island_client/monkey_island_client.py @@ -1,18 +1,22 @@ import json import logging import time -from typing import List, Sequence, Union +from typing import List, Mapping, Sequence, Union from bson import json_util from common.credentials import Credentials +from common.types import AgentID, MachineID from envs.monkey_zoo.blackbox.island_client.monkey_island_requests import MonkeyIslandRequests from envs.monkey_zoo.blackbox.test_configurations.test_configuration import TestConfiguration +from monkey_island.cc.models import Agent, Machine SLEEP_BETWEEN_REQUESTS_SECONDS = 0.5 +GET_AGENTS_ENDPOINT = "api/agents" +GET_LOG_ENDPOINT = "api/agent-logs" +GET_MACHINES_ENDPOINT = "api/machines" MONKEY_TEST_ENDPOINT = "api/test/monkey" TELEMETRY_TEST_ENDPOINT = "api/test/telemetry" -LOG_TEST_ENDPOINT = "api/test/log" LOGGER = logging.getLogger(__name__) @@ -157,11 +161,21 @@ class MonkeyIslandClient(object): ) return MonkeyIslandClient.get_test_query_results(response) - def find_log_in_db(self, query): - response = self.requests.get( - LOG_TEST_ENDPOINT, MonkeyIslandClient.form_find_query_for_request(query) - ) - return MonkeyIslandClient.get_test_query_results(response) + def get_agents(self) -> Sequence[Agent]: + response = self.requests.get(GET_AGENTS_ENDPOINT) + + return [Agent(**a) for a in response.json()] + + def get_machines(self) -> Mapping[MachineID, Machine]: + response = self.requests.get(GET_MACHINES_ENDPOINT) + machines = (Machine(**m) for m in response.json()) + + return {m.id: m for m in machines} + + def get_agent_log(self, agent_id: AgentID) -> str: + response = self.requests.get(f"{GET_LOG_ENDPOINT}/{agent_id}") + + return response.json() @staticmethod def form_find_query_for_request(query: Union[dict, None]) -> dict: diff --git a/envs/monkey_zoo/blackbox/log_handlers/monkey_log.py b/envs/monkey_zoo/blackbox/log_handlers/monkey_log.py deleted file mode 100644 index 2086accbe..000000000 --- a/envs/monkey_zoo/blackbox/log_handlers/monkey_log.py +++ /dev/null @@ -1,38 +0,0 @@ -import logging -import os - -from bson import ObjectId - -LOGGER = logging.getLogger(__name__) - - -class MonkeyLog(object): - def __init__(self, monkey, log_dir_path): - self.monkey = monkey - self.log_dir_path = log_dir_path - - def download_log(self, island_client): - log = island_client.find_log_in_db({"monkey_id": ObjectId(self.monkey["_id"])}) - if not log: - LOGGER.error("Log for monkey {} not found".format(self.monkey["ip_addresses"][0])) - return False - else: - self.write_log_to_file(log) - return True - - def write_log_to_file(self, log): - with open(self.get_log_path_for_monkey(self.monkey), "w") as log_file: - log_file.write(MonkeyLog.parse_log(log)) - - @staticmethod - def parse_log(log): - log = log.strip('"') - log = log.replace("\\n", "\n ") - return log - - @staticmethod - def get_filename_for_monkey_log(monkey): - return "{}.txt".format(monkey["ip_addresses"][0]) - - def get_log_path_for_monkey(self, monkey): - return os.path.join(self.log_dir_path, MonkeyLog.get_filename_for_monkey_log(monkey)) diff --git a/envs/monkey_zoo/blackbox/log_handlers/monkey_logs_downloader.py b/envs/monkey_zoo/blackbox/log_handlers/monkey_logs_downloader.py index 302da8fc7..31f87c309 100644 --- a/envs/monkey_zoo/blackbox/log_handlers/monkey_logs_downloader.py +++ b/envs/monkey_zoo/blackbox/log_handlers/monkey_logs_downloader.py @@ -1,25 +1,65 @@ import logging +from pathlib import Path +from threading import Thread +from typing import List, Mapping -from envs.monkey_zoo.blackbox.log_handlers.monkey_log import MonkeyLog +from common.types import MachineID +from envs.monkey_zoo.blackbox.island_client.monkey_island_client import MonkeyIslandClient +from monkey_island.cc.models import Agent, Machine LOGGER = logging.getLogger(__name__) class MonkeyLogsDownloader(object): - def __init__(self, island_client, log_dir_path): + def __init__(self, island_client: MonkeyIslandClient, log_dir_path: str): self.island_client = island_client - self.log_dir_path = log_dir_path - self.monkey_log_paths = [] + self.log_dir_path = Path(log_dir_path) + self.monkey_log_paths: List[Path] = [] def download_monkey_logs(self): - LOGGER.info("Downloading each monkey log.") - all_monkeys = self.island_client.get_all_monkeys_from_db() - for monkey in all_monkeys: - downloaded_log_path = self._download_monkey_log(monkey) - if downloaded_log_path: - self.monkey_log_paths.append(downloaded_log_path) + try: + LOGGER.info("Downloading each monkey log.") - def _download_monkey_log(self, monkey): - log_handler = MonkeyLog(monkey, self.log_dir_path) - download_successful = log_handler.download_log(self.island_client) - return log_handler.get_log_path_for_monkey(monkey) if download_successful else None + agents = self.island_client.get_agents() + machines = self.island_client.get_machines() + + download_threads: List[Thread] = [] + + # TODO: Does downloading logs concurrently still improve performance after resolving + # https://github.com/guardicore/monkey/issues/2383? + for agent in agents: + t = Thread(target=self._download_log, args=(agent, machines), daemon=True) + t.start() + download_threads.append(t) + + for thread in download_threads: + thread.join() + + except Exception as err: + LOGGER.exception(err) + + def _download_log(self, agent: Agent, machines: Mapping[MachineID, Machine]): + log_file_path = self._get_log_file_path(agent, machines) + log_contents = self.island_client.get_agent_log(agent.id) + + MonkeyLogsDownloader._write_log_to_file(log_file_path, log_contents) + + self.monkey_log_paths.append(log_file_path) + + def _get_log_file_path(self, agent: Agent, machines: Mapping[MachineID, Machine]) -> Path: + try: + machine_ip = machines[agent.machine_id].network_interfaces[0].ip + except IndexError: + LOGGER.error(f"Machine with ID {agent.machine_id} has no network interfaces") + machine_ip = "UNKNOWN" + + start_time = agent.start_time.strftime("%Y-%m-%d_%H-%M-%S") + + return self.log_dir_path / f"agent_{start_time}_{machine_ip}.log" + + @staticmethod + def _write_log_to_file(log_file_path: Path, log_contents: str): + LOGGER.debug(f"Writing {len(log_contents)} bytes to {log_file_path}") + + with open(log_file_path, "w") as f: + f.write(log_contents) diff --git a/monkey/monkey_island/cc/resources/agent_logs.py b/monkey/monkey_island/cc/resources/agent_logs.py index 40152c0fb..53da43590 100644 --- a/monkey/monkey_island/cc/resources/agent_logs.py +++ b/monkey/monkey_island/cc/resources/agent_logs.py @@ -22,8 +22,8 @@ class AgentLogs(AbstractResource): try: log_contents = self._agent_log_repository.get_agent_log(agent_id) except UnknownRecordError as err: - logger.error(f"Error occurred while getting agent log: {err}") - return {}, HTTPStatus.NOT_FOUND + logger.exception(f"Error occurred while getting agent log: {err}") + return "", HTTPStatus.NOT_FOUND return log_contents, HTTPStatus.OK diff --git a/monkey/tests/unit_tests/monkey_island/cc/resources/test_agent_logs.py b/monkey/tests/unit_tests/monkey_island/cc/resources/test_agent_logs.py index 0f7e86b53..a6a29a7e9 100644 --- a/monkey/tests/unit_tests/monkey_island/cc/resources/test_agent_logs.py +++ b/monkey/tests/unit_tests/monkey_island/cc/resources/test_agent_logs.py @@ -26,7 +26,7 @@ def flask_client(build_flask_client): def test_agent_logs_endpoint__get_empty(flask_client): resp = flask_client.get(AGENT_LOGS_URL_1, follow_redirects=True) assert resp.status_code == HTTPStatus.NOT_FOUND - assert resp.json == {} + assert resp.json == "" @pytest.mark.parametrize(