mirror of
https://github.com/miurahr/aqtinstall.git
synced 2025-12-16 20:27:05 +03:00
Allow MetadataFactory to recover from bad HTML files
This change allows MetadataFactory.iterate_folders to recognize when MetadataFactory has retrieved a malformed HTML file, or an HTML file that does not contain the table and rows that it expects to see. If the URL is correct, it should retrieve the right page, but this is not guaranteed, and MetadataFactory should be able to recover and generate an appropriate error message. One possible situation where this kind of error might occur could be when the user is connected to a public WIFI network, but not logged in. The router would redirect aqt to the network login page, and aqt would have no way of knowing that it did not receive the correct page. Aqt should be resilient to this kind of error.
This commit is contained in:
@@ -40,6 +40,12 @@ class AqtException(Exception):
|
||||
["* " + suggestion for suggestion in self.suggested_action]
|
||||
)
|
||||
|
||||
def append_suggested_follow_up(self, suggestions: Iterable[str]):
|
||||
if self.suggested_action:
|
||||
self.suggested_action = [*self.suggested_action, *suggestions]
|
||||
else:
|
||||
self.suggested_action = suggestions
|
||||
|
||||
|
||||
class ArchiveDownloadError(AqtException):
|
||||
pass
|
||||
|
||||
@@ -450,7 +450,7 @@ class MetadataFactory:
|
||||
return arches
|
||||
|
||||
def fetch_extensions(self, version: Version) -> List[str]:
|
||||
versions_extensions = MetadataFactory.get_versions_extensions(
|
||||
versions_extensions = self.get_versions_extensions(
|
||||
self.fetch_http(self.archive_id.to_url(), False), self.archive_id.category
|
||||
)
|
||||
filtered = filter(
|
||||
@@ -467,7 +467,7 @@ class MetadataFactory:
|
||||
def get_version(ver_ext: Tuple[Version, str]):
|
||||
return ver_ext[0]
|
||||
|
||||
versions_extensions = MetadataFactory.get_versions_extensions(
|
||||
versions_extensions = self.get_versions_extensions(
|
||||
self.fetch_http(self.archive_id.to_url(), False), self.archive_id.category
|
||||
)
|
||||
versions = sorted(filter(None, map(get_version, filter(filter_by, versions_extensions))))
|
||||
@@ -479,7 +479,7 @@ class MetadataFactory:
|
||||
|
||||
def fetch_tools(self) -> List[str]:
|
||||
html_doc = self.fetch_http(self.archive_id.to_url(), False)
|
||||
return list(MetadataFactory.iterate_folders(html_doc, "tools"))
|
||||
return list(self.iterate_folders(html_doc, "tools"))
|
||||
|
||||
def fetch_tool_modules(self, tool_name: str) -> List[str]:
|
||||
tool_data = self._fetch_module_metadata(tool_name)
|
||||
@@ -588,24 +588,32 @@ class MetadataFactory:
|
||||
f"Connection to '{base_url}' failed. Retrying with fallback '{base_urls[i + 1]}'."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def iterate_folders(html_doc: str, filter_category: str = "") -> Generator[str, None, None]:
|
||||
def iterate_folders(self, html_doc: str, filter_category: str = "") -> Generator[str, None, None]:
|
||||
def table_row_to_folder(tr: bs4.element.Tag) -> str:
|
||||
try:
|
||||
return tr.find_all("td")[1].a.contents[0].rstrip("/")
|
||||
except (AttributeError, IndexError):
|
||||
return ""
|
||||
|
||||
soup: bs4.BeautifulSoup = bs4.BeautifulSoup(html_doc, "html.parser")
|
||||
for row in soup.body.table.find_all("tr"):
|
||||
content: str = table_row_to_folder(row)
|
||||
if not content or content == "Parent Directory":
|
||||
continue
|
||||
if content.startswith(filter_category):
|
||||
yield content
|
||||
try:
|
||||
soup: bs4.BeautifulSoup = bs4.BeautifulSoup(html_doc, "html.parser")
|
||||
for row in soup.body.table.find_all("tr"):
|
||||
content: str = table_row_to_folder(row)
|
||||
if not content or content == "Parent Directory":
|
||||
continue
|
||||
if content.startswith(filter_category):
|
||||
yield content
|
||||
except Exception as e:
|
||||
url = posixpath.join(Settings.baseurl, self.archive_id.to_url())
|
||||
raise ArchiveConnectionError(
|
||||
f"Failed to retrieve the expected HTML page at {url}",
|
||||
suggested_action=[
|
||||
"Check your network connection.",
|
||||
f"Make sure that you can access {url} in your web browser.",
|
||||
],
|
||||
) from e
|
||||
|
||||
@staticmethod
|
||||
def get_versions_extensions(html_doc: str, category: str) -> Iterator[Tuple[Optional[Version], str]]:
|
||||
def get_versions_extensions(self, html_doc: str, category: str) -> Iterator[Tuple[Optional[Version], str]]:
|
||||
def folder_to_version_extension(folder: str) -> Tuple[Optional[Version], str]:
|
||||
components = folder.split("_", maxsplit=2)
|
||||
ext = "" if len(components) < 3 else components[2]
|
||||
@@ -617,7 +625,7 @@ class MetadataFactory:
|
||||
|
||||
return map(
|
||||
folder_to_version_extension,
|
||||
MetadataFactory.iterate_folders(html_doc, category),
|
||||
self.iterate_folders(html_doc, category),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
@@ -792,5 +800,5 @@ def show_list(meta: MetadataFactory):
|
||||
else:
|
||||
print(*output, sep=" ")
|
||||
except (ArchiveDownloadError, ArchiveConnectionError) as e:
|
||||
e.suggested_action = suggested_follow_up(meta)
|
||||
e.append_suggested_follow_up(suggested_follow_up(meta))
|
||||
raise e from e
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import posixpath
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
@@ -178,6 +179,31 @@ def test_list_versions_tools(monkeypatch, spec_regex, os_name, target, in_file,
|
||||
assert f"{all_ver_for_spec}" == row
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"html_doc",
|
||||
(
|
||||
"<html><body>Login to my public WIFI network:<form>...</form></body></html>",
|
||||
"<html>malformed-html/",
|
||||
),
|
||||
)
|
||||
def test_list_bad_html(monkeypatch, html_doc: str):
|
||||
monkeypatch.setattr(MetadataFactory, "fetch_http", lambda *args, **kwargs: html_doc)
|
||||
archive_id = ArchiveId("qt", "linux", "desktop")
|
||||
expected_url = posixpath.join(Settings.baseurl, archive_id.to_url())
|
||||
expected_exception = ArchiveConnectionError(
|
||||
f"Failed to retrieve the expected HTML page at {expected_url}",
|
||||
suggested_action=[
|
||||
"Check your network connection.",
|
||||
f"Make sure that you can access {expected_url} in your web browser.",
|
||||
],
|
||||
)
|
||||
|
||||
with pytest.raises(ArchiveConnectionError) as e:
|
||||
MetadataFactory(archive_id).fetch_versions()
|
||||
assert e.type == ArchiveConnectionError
|
||||
assert format(e.value) == format(expected_exception)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"version,extension,in_file,expect_out_file",
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user