diff --git a/monkey/common/utils/file_utils.py b/monkey/common/utils/file_utils.py index c9cb78139..a0f09d9ad 100644 --- a/monkey/common/utils/file_utils.py +++ b/monkey/common/utils/file_utils.py @@ -1,7 +1,9 @@ import hashlib import os from pathlib import Path -from typing import Iterable +from typing import BinaryIO, Iterable + +MAX_BLOCK_SIZE = 65536 class InvalidPath(Exception): @@ -15,11 +17,27 @@ def expand_path(path: str) -> Path: return Path(os.path.expandvars(os.path.expanduser(path))) -def get_file_sha256_hash(filepath: Path): - sha256 = hashlib.sha256() +def get_file_sha256_hash(filepath: Path) -> str: + """ + Calculates sha256 hash from a file path + + :param filepath: A Path object which defines file on the system + :return sha256 hash of the file + """ with open(filepath, "rb") as f: - for block in iter(lambda: f.read(65536), b""): - sha256.update(block) + return get_binary_io_sha256_hash(f) + + +def get_binary_io_sha256_hash(binary: BinaryIO) -> str: + """ + Calculates sha256 hash from a file-like object + + :param binary: file-like object from which we calculate the hash + :return: sha256 hash from the file-like object + """ + sha256 = hashlib.sha256() + for block in iter(lambda: binary.read(MAX_BLOCK_SIZE), b""): + sha256.update(block) return sha256.hexdigest() diff --git a/monkey/tests/unit_tests/common/utils/test_common_file_utils.py b/monkey/tests/unit_tests/common/utils/test_common_file_utils.py index aac13839e..5b449b01c 100644 --- a/monkey/tests/unit_tests/common/utils/test_common_file_utils.py +++ b/monkey/tests/unit_tests/common/utils/test_common_file_utils.py @@ -1,4 +1,5 @@ import os +from io import BytesIO import pytest from tests.utils import add_files_to_dir, add_subdirs_to_dir @@ -7,6 +8,7 @@ from common.utils.file_utils import ( InvalidPath, expand_path, get_all_regular_files_in_directory, + get_binary_io_sha256_hash, get_file_sha256_hash, ) @@ -34,6 +36,11 @@ def test_get_file_sha256_hash(stable_file, stable_file_sha256_hash): assert get_file_sha256_hash(stable_file) == stable_file_sha256_hash +def test_get_binary_sha256_hash(): + expected_hash = "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e" + assert get_binary_io_sha256_hash(BytesIO(b"Hello World")) == expected_hash + + SUBDIRS = ["subdir1", "subdir2"] FILES = ["file.jpg.zip", "file.xyz", "1.tar", "2.tgz", "2.png", "2.mpg"]