From d977d19d9f5f24dfef0b8ac3f26e34043c9ba26c Mon Sep 17 00:00:00 2001 From: Mike Salvatore Date: Thu, 5 May 2022 15:01:37 -0400 Subject: [PATCH] Common: Lazy-load AWSInstance --- monkey/common/aws/aws_instance.py | 116 +++++------------- monkey/common/aws/aws_metadata.py | 74 +++++++++++ .../monkey_island/cc/services/aws_service.py | 28 +---- .../test_aws_metadata.py} | 60 +++------ 4 files changed, 124 insertions(+), 154 deletions(-) create mode 100644 monkey/common/aws/aws_metadata.py rename monkey/tests/unit_tests/common/{cloud/aws/test_aws_instance.py => aws/test_aws_metadata.py} (72%) diff --git a/monkey/common/aws/aws_instance.py b/monkey/common/aws/aws_instance.py index cd5d42921..fd2eb696d 100644 --- a/monkey/common/aws/aws_instance.py +++ b/monkey/common/aws/aws_instance.py @@ -1,25 +1,12 @@ -import json -import logging -import re -from dataclasses import dataclass -from typing import Optional, Tuple +import threading -import requests +from common.aws.aws_metadata import fetch_aws_instance_metadata -AWS_INSTANCE_METADATA_LOCAL_IP_ADDRESS = "169.254.169.254" -AWS_LATEST_METADATA_URI_PREFIX = "http://{0}/latest/".format(AWS_INSTANCE_METADATA_LOCAL_IP_ADDRESS) -ACCOUNT_ID_KEY = "accountId" - -logger = logging.getLogger(__name__) - -AWS_TIMEOUT = 2 +AWS_FETCH_METADATA_TIMEOUT = 10.0 # Seconds -@dataclass -class AWSInstanceInfo: - instance_id: Optional[str] = None - region: Optional[str] = None - account_id: Optional[str] = None +class AWSTimeoutError(Exception): + """Raised when communications with AWS timeout""" class AWSInstance: @@ -28,83 +15,42 @@ class AWSInstance: """ def __init__(self): - self._is_instance, self._instance_info = AWSInstance._fetch_instance_info() + self._instance_id = None + self._region = None + self._account_id = None + self._initialization_complete = threading.Event() + + fetch_thread = threading.Thread(target=self._fetch_aws_instance_metadata, daemon=True) + fetch_thread.start() + + def _fetch_aws_instance_metadata(self): + metadata = fetch_aws_instance_metadata() + self._instance_id = metadata[0] + self._region = metadata[1] + self._account_id = metadata[2] + + self._initialization_complete.set() @property def is_instance(self) -> bool: - return self._is_instance + self._wait_for_initialization_to_complete() + return self._instance_id is not None @property def instance_id(self) -> str: - return self._instance_info.instance_id + self._wait_for_initialization_to_complete() + return self._instance_id @property def region(self) -> str: - return self._instance_info.region + self._wait_for_initialization_to_complete() + return self._region @property def account_id(self) -> str: - return self._instance_info.account_id + self._wait_for_initialization_to_complete() + return self._account_id - @staticmethod - def _fetch_instance_info() -> Tuple[bool, AWSInstanceInfo]: - try: - response = requests.get( - AWS_LATEST_METADATA_URI_PREFIX + "meta-data/instance-id", - timeout=AWS_TIMEOUT, - ) - if not response: - return False, AWSInstanceInfo() - - info = AWSInstanceInfo() - info.instance_id = response.text if response else False - info.region = AWSInstance._parse_region( - requests.get( - AWS_LATEST_METADATA_URI_PREFIX + "meta-data/placement/availability-zone", - timeout=AWS_TIMEOUT, - ).text - ) - except (requests.RequestException, IOError) as e: - logger.debug("Failed init of AWSInstance while getting metadata: {}".format(e)) - return False, AWSInstanceInfo() - - try: - info.account_id = AWSInstance._extract_account_id( - requests.get( - AWS_LATEST_METADATA_URI_PREFIX + "dynamic/instance-identity/document", - timeout=AWS_TIMEOUT, - ).text - ) - except (requests.RequestException, json.decoder.JSONDecodeError, IOError) as e: - logger.debug( - "Failed init of AWSInstance while getting dynamic instance data: {}".format(e) - ) - return False, AWSInstanceInfo() - - return True, info - - @staticmethod - def _parse_region(region_url_response): - # For a list of regions, see: - # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts - # .RegionsAndAvailabilityZones.html - # This regex will find any AWS region format string in the response. - re_phrase = r"((?:us|eu|ap|ca|cn|sa)-[a-z]*-[0-9])" - finding = re.findall(re_phrase, region_url_response, re.IGNORECASE) - if finding: - return finding[0] - else: - return None - - @staticmethod - def _extract_account_id(instance_identity_document_response): - """ - Extracts the account id from the dynamic/instance-identity/document metadata path. - Based on https://forums.aws.amazon.com/message.jspa?messageID=409028 which has a few more - solutions, - in case Amazon break this mechanism. - :param instance_identity_document_response: json returned via the web page - ../dynamic/instance-identity/document - :return: The account id - """ - return json.loads(instance_identity_document_response)[ACCOUNT_ID_KEY] + def _wait_for_initialization_to_complete(self): + if not self._initialization_complete.wait(AWS_FETCH_METADATA_TIMEOUT): + raise AWSTimeoutError("Timed out while attempting to retrieve metadata from AWS") diff --git a/monkey/common/aws/aws_metadata.py b/monkey/common/aws/aws_metadata.py new file mode 100644 index 000000000..cc2463ac2 --- /dev/null +++ b/monkey/common/aws/aws_metadata.py @@ -0,0 +1,74 @@ +import json +import logging +import re + +import requests + +AWS_INSTANCE_METADATA_LOCAL_IP_ADDRESS = "169.254.169.254" +AWS_LATEST_METADATA_URI_PREFIX = f"http://{AWS_INSTANCE_METADATA_LOCAL_IP_ADDRESS}/latest/" +ACCOUNT_ID_KEY = "accountId" + +logger = logging.getLogger(__name__) + +AWS_TIMEOUT = 2 + + +def fetch_aws_instance_metadata(): + instance_id = None + region = None + account_id = None + + try: + response = requests.get( + AWS_LATEST_METADATA_URI_PREFIX + "meta-data/instance-id", + timeout=AWS_TIMEOUT, + ) + if not response: + return (None, None, None) + + instance_id = response.text + + region = _parse_region( + requests.get( + AWS_LATEST_METADATA_URI_PREFIX + "meta-data/placement/availability-zone", + timeout=AWS_TIMEOUT, + ).text + ) + + account_id = _extract_account_id( + requests.get( + AWS_LATEST_METADATA_URI_PREFIX + "dynamic/instance-identity/document", + timeout=AWS_TIMEOUT, + ).text + ) + except (requests.RequestException, IOError, json.decoder.JSONDecodeError) as err: + logger.debug(f"Failed init of AWSInstance while getting metadata: {err}") + return (None, None, None) + + return (instance_id, region, account_id) + + +def _parse_region(region_url_response): + # For a list of regions, see: + # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts + # .RegionsAndAvailabilityZones.html + # This regex will find any AWS region format string in the response. + re_phrase = r"((?:us|eu|ap|ca|cn|sa)-[a-z]*-[0-9])" + finding = re.findall(re_phrase, region_url_response, re.IGNORECASE) + if finding: + return finding[0] + else: + return None + + +def _extract_account_id(instance_identity_document_response): + """ + Extracts the account id from the dynamic/instance-identity/document metadata path. + Based on https://forums.aws.amazon.com/message.jspa?messageID=409028 which has a few more + solutions, + in case Amazon break this mechanism. + :param instance_identity_document_response: json returned via the web page + ../dynamic/instance-identity/document + :return: The account id + """ + return json.loads(instance_identity_document_response)[ACCOUNT_ID_KEY] diff --git a/monkey/monkey_island/cc/services/aws_service.py b/monkey/monkey_island/cc/services/aws_service.py index 9d2ab75b1..12432b8b7 100644 --- a/monkey/monkey_island/cc/services/aws_service.py +++ b/monkey/monkey_island/cc/services/aws_service.py @@ -1,7 +1,5 @@ import logging -from functools import wraps -from threading import Event -from typing import Callable, Optional +from typing import Optional import boto3 import botocore @@ -30,53 +28,29 @@ def filter_instance_data_from_aws_response(response): aws_instance: Optional[AWSInstance] = None -AWS_INFO_FETCH_TIMEOUT = 10.0 # Seconds -init_done = Event() def initialize(): global aws_instance aws_instance = AWSInstance() - init_done.set() -def wait_init_done(fnc: Callable): - @wraps(fnc) - def inner(): - awaited = init_done.wait(AWS_INFO_FETCH_TIMEOUT) - if not awaited: - logger.error( - f"AWS service couldn't initialize in time! " - f"Current timeout is {AWS_INFO_FETCH_TIMEOUT}, " - f"but AWS info took longer to fetch from metadata server." - ) - return - fnc() - - return inner - - -@wait_init_done def is_on_aws(): return aws_instance.is_instance -@wait_init_done def get_region(): return aws_instance.region -@wait_init_done def get_account_id(): return aws_instance.account_id -@wait_init_done def get_client(client_type): return boto3.client(client_type, region_name=aws_instance.region) -@wait_init_done def get_instances(): """ Get the information for all instances with the relevant roles. diff --git a/monkey/tests/unit_tests/common/cloud/aws/test_aws_instance.py b/monkey/tests/unit_tests/common/aws/test_aws_metadata.py similarity index 72% rename from monkey/tests/unit_tests/common/cloud/aws/test_aws_instance.py rename to monkey/tests/unit_tests/common/aws/test_aws_metadata.py index 2253ffd54..1aac906a4 100644 --- a/monkey/tests/unit_tests/common/cloud/aws/test_aws_instance.py +++ b/monkey/tests/unit_tests/common/aws/test_aws_metadata.py @@ -4,7 +4,7 @@ import pytest import requests import requests_mock -from common.aws.aws_instance import AWS_LATEST_METADATA_URI_PREFIX, AWSInstance +from common.aws.aws_metadata import AWS_LATEST_METADATA_URI_PREFIX, fetch_aws_instance_metadata INSTANCE_ID_RESPONSE = "i-1234567890abcdef0" @@ -59,8 +59,7 @@ def get_test_aws_instance( url, exc=exception["account_id"] ) - test_aws_instance_object = AWSInstance() - return test_aws_instance_object + return fetch_aws_instance_metadata() # all good data @@ -77,20 +76,16 @@ def good_data_mock_instance(): del instance -def test_is_instance_good_data(good_data_mock_instance): - assert good_data_mock_instance.is_instance - - def test_instance_id_good_data(good_data_mock_instance): - assert good_data_mock_instance.instance_id == EXPECTED_INSTANCE_ID + assert good_data_mock_instance[0] == EXPECTED_INSTANCE_ID def test_region_good_data(good_data_mock_instance): - assert good_data_mock_instance.region == EXPECTED_REGION + assert good_data_mock_instance[1] == EXPECTED_REGION def test_account_id_good_data(good_data_mock_instance): - assert good_data_mock_instance.account_id == EXPECTED_ACCOUNT_ID + assert good_data_mock_instance[2] == EXPECTED_ACCOUNT_ID # 'region' bad data @@ -107,20 +102,16 @@ def bad_region_data_mock_instance(): del instance -def test_is_instance_bad_region_data(bad_region_data_mock_instance): - assert bad_region_data_mock_instance.is_instance - - def test_instance_id_bad_region_data(bad_region_data_mock_instance): - assert bad_region_data_mock_instance.instance_id == EXPECTED_INSTANCE_ID + assert bad_region_data_mock_instance[0] == EXPECTED_INSTANCE_ID def test_region_bad_region_data(bad_region_data_mock_instance): - assert bad_region_data_mock_instance.region is None + assert bad_region_data_mock_instance[1] is None def test_account_id_bad_region_data(bad_region_data_mock_instance): - assert bad_region_data_mock_instance.account_id == EXPECTED_ACCOUNT_ID + assert bad_region_data_mock_instance[2] == EXPECTED_ACCOUNT_ID # 'account_id' bad data @@ -137,20 +128,16 @@ def bad_account_id_data_mock_instance(): del instance -def test_is_instance_bad_account_id_data(bad_account_id_data_mock_instance): - assert not bad_account_id_data_mock_instance.is_instance - - def test_instance_id_bad_account_id_data(bad_account_id_data_mock_instance): - assert bad_account_id_data_mock_instance.instance_id is None + assert bad_account_id_data_mock_instance[0] is None def test_region_bad_account_id_data(bad_account_id_data_mock_instance): - assert bad_account_id_data_mock_instance.region is None + assert bad_account_id_data_mock_instance[1] is None def test_account_id_data_bad_account_id_data(bad_account_id_data_mock_instance): - assert bad_account_id_data_mock_instance.account_id is None + assert bad_account_id_data_mock_instance[2] is None # 'region' bad requests @@ -168,24 +155,19 @@ def bad_region_request_mock_instance(region_exception): del instance -@pytest.mark.parametrize("region_exception", [requests.RequestException, IOError]) -def test_is_instance_bad_region_request(bad_region_request_mock_instance): - assert not bad_region_request_mock_instance.is_instance - - @pytest.mark.parametrize("region_exception", [requests.RequestException, IOError]) def test_instance_id_bad_region_request(bad_region_request_mock_instance): - assert bad_region_request_mock_instance.instance_id is None + assert bad_region_request_mock_instance[0] is None @pytest.mark.parametrize("region_exception", [requests.RequestException, IOError]) def test_region_bad_region_request(bad_region_request_mock_instance): - assert bad_region_request_mock_instance.region is None + assert bad_region_request_mock_instance[1] is None @pytest.mark.parametrize("region_exception", [requests.RequestException, IOError]) def test_account_id_bad_region_request(bad_region_request_mock_instance): - assert bad_region_request_mock_instance.account_id is None + assert bad_region_request_mock_instance[2] is None # not found request @@ -204,22 +186,16 @@ def not_found_request_mock_instance(): url = f"{AWS_LATEST_METADATA_URI_PREFIX}dynamic/instance-identity/document" m.get(url) - not_found_aws_instance_object = AWSInstance() - yield not_found_aws_instance_object - del not_found_aws_instance_object - - -def test_is_instance_not_found_request(not_found_request_mock_instance): - assert not_found_request_mock_instance.is_instance is False + return fetch_aws_instance_metadata() def test_instance_id_not_found_request(not_found_request_mock_instance): - assert not_found_request_mock_instance.instance_id is None + assert not_found_request_mock_instance[0] is None def test_region_not_found_request(not_found_request_mock_instance): - assert not_found_request_mock_instance.region is None + assert not_found_request_mock_instance[1] is None def test_account_id_not_found_request(not_found_request_mock_instance): - assert not_found_request_mock_instance.account_id is None + assert not_found_request_mock_instance[2] is None