Add ignore_hash option in settings.ini (#684)

* Settings: add an option to skip checksum verification on Updates.xml

This allows aqt to work even when the server download.qt.io is
unreachable.

Signed-off-by: Alberto Mardegan <mardy@users.sourceforge.net>

* Update settings.ini

* Correct option to skip checksum verification

* Rename ignore_hash option to INSECURE_NOT_FOR_PRODUCTION_ignore_hash

* Update doc about INSECURE_NOT_FOR_PRODUCTION_ignore_hash

* Add hash_algorithm option

* Unofficial mirrors might not provide any hash for Updates.xml

* docs: update configuration.rst

express default value of hash_algorithm

Signed-off-by: Hiroshi Miura <miurahr@linux.com>

* fix: hashlib constructor with usedforsecurity=False

- Python 3.9 and later introduce a keyword argument ``usedforsecurity``
- Set to False because we use hash to check file integrity not for password hash.

Signed-off-by: Hiroshi Miura <miurahr@linux.com>

* Attempt to fix parameter type inconsistency in downloadBinaryFile

---------

Signed-off-by: Alberto Mardegan <mardy@users.sourceforge.net>
Signed-off-by: Hiroshi Miura <miurahr@linux.com>
Co-authored-by: Alberto Mardegan <mardy@users.sourceforge.net>
Co-authored-by: Hiroshi Miura <miurahr@linux.com>
This commit is contained in:
lebarsfa
2023-11-05 23:29:03 +01:00
committed by GitHub
parent d37e940a85
commit 612dc7ba91
6 changed files with 53 additions and 9 deletions

View File

@@ -27,7 +27,7 @@ from xml.etree.ElementTree import Element # noqa
from defusedxml import ElementTree from defusedxml import ElementTree
from aqt.exceptions import ArchiveDownloadError, ArchiveListError, NoPackageFound from aqt.exceptions import ArchiveDownloadError, ArchiveListError, ChecksumDownloadFailure, NoPackageFound
from aqt.helper import Settings, get_hash, getUrl, ssplit from aqt.helper import Settings, get_hash, getUrl, ssplit
from aqt.metadata import QtRepoProperty, Version from aqt.metadata import QtRepoProperty, Version
@@ -390,7 +390,16 @@ class QtArchives:
def _download_update_xml(self, update_xml_path): def _download_update_xml(self, update_xml_path):
"""Hook for unit test.""" """Hook for unit test."""
xml_hash = get_hash(update_xml_path, "sha256", self.timeout) if not Settings.ignore_hash:
try:
xml_hash = get_hash(update_xml_path, Settings.hash_algorithm, self.timeout)
except ChecksumDownloadFailure:
self.logger.warning(
"Failed to download checksum for the file 'Updates.xml'. This may happen on unofficial mirrors."
)
xml_hash = None
else:
xml_hash = None
return getUrl(posixpath.join(self.base, update_xml_path), self.timeout, xml_hash) return getUrl(posixpath.join(self.base, update_xml_path), self.timeout, xml_hash)
def _parse_update_xml(self, os_target_folder, update_xml_text, target_packages: Optional[ModuleToPackage]): def _parse_update_xml(self, os_target_folder, update_xml_text, target_packages: Optional[ModuleToPackage]):

View File

@@ -94,7 +94,15 @@ def getUrl(url: str, timeout: Tuple[float, float], expected_hash: Optional[bytes
raise ArchiveDownloadError(msg) raise ArchiveDownloadError(msg)
result: str = r.text result: str = r.text
filename = url.split("/")[-1] filename = url.split("/")[-1]
actual_hash = hashlib.sha256(bytes(result, "utf-8")).digest() _kwargs = {"usedforsecurity": False} if sys.version_info >= (3, 9) else {}
if Settings.hash_algorithm == "sha256":
actual_hash = hashlib.sha256(bytes(result, "utf-8"), **_kwargs).digest()
elif Settings.hash_algorithm == "sha1":
actual_hash = hashlib.sha1(bytes(result, "utf-8"), **_kwargs).digest()
elif Settings.hash_algorithm == "md5":
actual_hash = hashlib.md5(bytes(result, "utf-8"), **_kwargs).digest()
else:
raise ArchiveChecksumError(f"Unknown hash algorithm: {Settings.hash_algorithm}.\nPlease check settings.ini")
if expected_hash is not None and expected_hash != actual_hash: if expected_hash is not None and expected_hash != actual_hash:
raise ArchiveChecksumError( raise ArchiveChecksumError(
f"Downloaded file {filename} is corrupted! Detect checksum error.\n" f"Downloaded file {filename} is corrupted! Detect checksum error.\n"
@@ -104,7 +112,7 @@ def getUrl(url: str, timeout: Tuple[float, float], expected_hash: Optional[bytes
return result return result
def downloadBinaryFile(url: str, out: Path, hash_algo: str, exp: bytes, timeout: Tuple[float, float]) -> None: def downloadBinaryFile(url: str, out: Path, hash_algo: str, exp: Optional[bytes], timeout: Tuple[float, float]) -> None:
logger = getLogger("aqt.helper") logger = getLogger("aqt.helper")
filename = Path(url).name filename = Path(url).name
with requests.sessions.Session() as session: with requests.sessions.Session() as session:
@@ -126,7 +134,10 @@ def downloadBinaryFile(url: str, out: Path, hash_algo: str, exp: bytes, timeout:
except requests.exceptions.Timeout as e: except requests.exceptions.Timeout as e:
raise ArchiveConnectionError(f"Connection timeout: {e.args}") from e raise ArchiveConnectionError(f"Connection timeout: {e.args}") from e
else: else:
hash = hashlib.new(hash_algo) if sys.version_info >= (3, 9):
hash = hashlib.new(hash_algo, usedforsecurity=False)
else:
hash = hashlib.new(hash_algo)
try: try:
with open(out, "wb") as fd: with open(out, "wb") as fd:
for chunk in r.iter_content(chunk_size=8196): for chunk in r.iter_content(chunk_size=8196):
@@ -456,6 +467,14 @@ class SettingsClass:
def max_retries_to_retrieve_hash(self): def max_retries_to_retrieve_hash(self):
return self.config.getint("requests", "max_retries_to_retrieve_hash", fallback=int(self.max_retries)) return self.config.getint("requests", "max_retries_to_retrieve_hash", fallback=int(self.max_retries))
@property
def hash_algorithm(self):
return self.config.get("requests", "hash_algorithm", fallback="sha256")
@property
def ignore_hash(self):
return self.config.getboolean("requests", "INSECURE_NOT_FOR_PRODUCTION_ignore_hash", fallback=False)
@property @property
def backoff_factor(self): def backoff_factor(self):
return self.config.getfloat("requests", "retry_backoff", fallback=0.1) return self.config.getfloat("requests", "retry_backoff", fallback=0.1)

View File

@@ -1217,12 +1217,12 @@ def installer(
logger.addHandler(qh) logger.addHandler(qh)
# #
timeout = (Settings.connection_timeout, Settings.response_timeout) timeout = (Settings.connection_timeout, Settings.response_timeout)
hash = get_hash(qt_package.archive_path, algorithm="sha256", timeout=timeout) hash = get_hash(qt_package.archive_path, Settings.hash_algorithm, timeout) if not Settings.ignore_hash else None
def download_bin(_base_url): def download_bin(_base_url):
url = posixpath.join(_base_url, qt_package.archive_path) url = posixpath.join(_base_url, qt_package.archive_path)
logger.debug("Download URL: {}".format(url)) logger.debug("Download URL: {}".format(url))
return downloadBinaryFile(url, archive, "sha256", hash, timeout) return downloadBinaryFile(url, archive, Settings.hash_algorithm, hash, timeout)
retry_on_errors( retry_on_errors(
action=lambda: retry_on_bad_connection(download_bin, base_url), action=lambda: retry_on_bad_connection(download_bin, base_url),

View File

@@ -707,7 +707,7 @@ class MetadataFactory:
def fetch_http(self, rest_of_url: str, is_check_hash: bool = True) -> str: def fetch_http(self, rest_of_url: str, is_check_hash: bool = True) -> str:
timeout = (Settings.connection_timeout, Settings.response_timeout) timeout = (Settings.connection_timeout, Settings.response_timeout)
expected_hash = get_hash(rest_of_url, "sha256", timeout) if is_check_hash else None expected_hash = get_hash(rest_of_url, Settings.hash_algorithm, timeout) if is_check_hash else None
base_urls = self.base_url, random.choice(Settings.fallbacks) base_urls = self.base_url, random.choice(Settings.fallbacks)
err: BaseException = AssertionError("unraisable") err: BaseException = AssertionError("unraisable")
@@ -792,7 +792,7 @@ class MetadataFactory:
def _fetch_module_metadata(self, folder: str, predicate: Optional[Callable[[Element], bool]] = None): def _fetch_module_metadata(self, folder: str, predicate: Optional[Callable[[Element], bool]] = None):
rest_of_url = posixpath.join(self.archive_id.to_url(), folder, "Updates.xml") rest_of_url = posixpath.join(self.archive_id.to_url(), folder, "Updates.xml")
xml = self.fetch_http(rest_of_url) xml = self.fetch_http(rest_of_url) if not Settings.ignore_hash else self.fetch_http(rest_of_url, False)
return xml_to_modules( return xml_to_modules(
xml, xml,
predicate=predicate if predicate else MetadataFactory._has_nonempty_downloads, predicate=predicate if predicate else MetadataFactory._has_nonempty_downloads,

View File

@@ -16,6 +16,8 @@ max_retries_on_connection_error: 5
retry_backoff: 0.1 retry_backoff: 0.1
max_retries_on_checksum_error: 5 max_retries_on_checksum_error: 5
max_retries_to_retrieve_hash: 5 max_retries_to_retrieve_hash: 5
hash_algorithm: sha256
INSECURE_NOT_FOR_PRODUCTION_ignore_hash: False
[mirrors] [mirrors]
trusted_mirrors: trusted_mirrors:

View File

@@ -31,6 +31,8 @@ A file is like as follows:
retry_backoff: 0.1 retry_backoff: 0.1
max_retries_on_checksum_error: 5 max_retries_on_checksum_error: 5
max_retries_to_retrieve_hash: 5 max_retries_to_retrieve_hash: 5
hash_algorithm: sha256
INSECURE_NOT_FOR_PRODUCTION_ignore_hash: False
[mirrors] [mirrors]
trusted_mirrors: trusted_mirrors:
@@ -130,6 +132,18 @@ max_retries_on_checksum_error:
This setting controls how many times ``aqt`` will attempt to download a file, This setting controls how many times ``aqt`` will attempt to download a file,
in the case of a checksum error. in the case of a checksum error.
hash_algorithm:
This is either ``sha256``, ``sha1`` or ``md5``. ``sha256`` is the only safe
value to use here. Default is ``sha256`` if not set.
See also ``trusted_mirrors`` setting.
INSECURE_NOT_FOR_PRODUCTION_ignore_hash:
This is either ``True`` or ``False``.
The ``True`` setting disables hash checking when downloading files. Although
this is not recommended, this may help when hashes are not available.
The ``False`` setting will enforce hash checking. This is highly recommended
to avoid corrupted files.
The ``[mirrors]`` section is a configuration for mirror handling. The ``[mirrors]`` section is a configuration for mirror handling.