Use defusedxml to parse Updates.xml files

I don't think we can really call the `Updates.xml` files that we are
parsing 'untrusted xml', because we are checking that they match the
sha256 checksum. However, I don't think there's any good reason not to
use a more secure parsing algorithm.
This commit is contained in:
David Dalcino
2022-03-10 17:21:03 -08:00
parent ce9f75409f
commit 5ad7c1a058
4 changed files with 12 additions and 9 deletions

View File

@@ -21,11 +21,12 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import binascii
import posixpath
import xml.etree.ElementTree as ElementTree
from dataclasses import dataclass, field
from logging import getLogger
from typing import Dict, Iterable, List, Optional, Tuple
from defusedxml import ElementTree
from aqt.exceptions import ArchiveDownloadError, ArchiveListError, ChecksumDownloadFailure, NoPackageFound
from aqt.helper import Settings, get_hash, getUrl, ssplit
from aqt.metadata import QtRepoProperty, Version

View File

@@ -27,15 +27,16 @@ import os
import posixpath
import secrets
import sys
import xml.etree.ElementTree as ElementTree
from logging import getLogger
from logging.handlers import QueueListener
from pathlib import Path
from typing import Callable, Dict, Generator, List, Optional, Tuple
from urllib.parse import urlparse
from xml.etree.ElementTree import Element
import requests
import requests.adapters
from defusedxml import ElementTree
from aqt.exceptions import (
ArchiveChecksumError,
@@ -257,7 +258,7 @@ def ssplit(data: str):
def xml_to_modules(
xml_text: str,
predicate: Callable[[ElementTree.Element], bool],
predicate: Callable[[Element], bool],
) -> Dict[str, Dict[str, str]]:
"""Converts an XML document to a dict of `PackageUpdate` dicts, indexed by `Name` attribute.
Only report elements that satisfy `predicate(element)`.

View File

@@ -27,7 +27,7 @@ import re
import shutil
from logging import getLogger
from typing import Callable, Dict, Generator, Iterable, Iterator, List, Optional, Tuple, Union
from xml.etree import ElementTree as ElementTree
from xml.etree.ElementTree import Element
import bs4
from semantic_version import SimpleSpec as SemanticSimpleSpec
@@ -622,7 +622,7 @@ class MetadataFactory:
)
@staticmethod
def _has_nonempty_downloads(element: ElementTree.Element) -> bool:
def _has_nonempty_downloads(element: Element) -> bool:
"""Returns True if the element has a nonempty '<DownloadableArchives/>' tag"""
downloads = element.find("DownloadableArchives")
update_file = element.find("UpdateFile")
@@ -641,7 +641,7 @@ class MetadataFactory:
)
return f"{version.major}{version.minor}{patch}"
def _fetch_module_metadata(self, folder: str, predicate: Optional[Callable[[ElementTree.Element], bool]] = None):
def _fetch_module_metadata(self, folder: str, predicate: Optional[Callable[[Element], bool]] = None):
rest_of_url = posixpath.join(self.archive_id.to_url(), folder, "Updates.xml")
xml = self.fetch_http(rest_of_url)
return xml_to_modules(
@@ -705,15 +705,15 @@ class MetadataFactory:
qt_version_str = self._get_qt_version_str(version)
nonempty = MetadataFactory._has_nonempty_downloads
def all_modules(element: ElementTree.Element) -> bool:
def all_modules(element: Element) -> bool:
_module, _arch = element.find("Name").text.split(".")[-2:]
return _arch == arch and _module != qt_version_str and nonempty(element)
def specify_modules(element: ElementTree.Element) -> bool:
def specify_modules(element: Element) -> bool:
_module, _arch = element.find("Name").text.split(".")[-2:]
return _arch == arch and _module in modules and nonempty(element)
def no_modules(element: ElementTree.Element) -> bool:
def no_modules(element: Element) -> bool:
name: Optional[str] = element.find("Name").text
return name and name.endswith(f".{qt_version_str}.{arch}") and nonempty(element)

View File

@@ -46,6 +46,7 @@ install_requires =
texttable
bs4
dataclasses;python_version<"3.7"
defusedxml
setup_requires =
setuptools-scm[toml]>=6.0.1
setuptools>=45.0