Refactoring altlink helper (#109)

* Refactoring altlink helper

Also fix a bug not accept a header context-type when
got charset parameter such as
application/meta4+xml; charset=utf-8

* Add test and accept text/plain mime type

Signed-off-by: Hiroshi Miura <miurahr@linux.com>
This commit is contained in:
Hiroshi Miura
2020-03-05 23:00:31 +09:00
committed by GitHub
parent 0635720d52
commit 3b06814601
3 changed files with 86 additions and 36 deletions

View File

@@ -1,51 +1,53 @@
import logging
import sys
import xml.etree.ElementTree as ElementTree import xml.etree.ElementTree as ElementTree
from typing import List, Optional
import requests import requests
from aqt.settings import Settings from aqt.settings import Settings
def altlink(url, alt, priority=None): def _get_meta(url: str):
'''Download .meta4 metalink version4 xml file and parse it.''' return requests.get(url + '.meta4')
settings = Settings()
black = False
for site in settings.blacklist: def _check_content_type(ct: str) -> bool:
if alt.startswith(site): candidate = ['application/metalink4+xml', 'text/plain']
black = True return any(ct.startswith(t) for t in candidate)
if not black:
def altlink(url: str, alt: str, logger=None):
'''Blacklisting redirected(alt) location based on Settings.blacklist configuration.
When found black url, then try download a url + .meta4 that is a metalink version4
xml file, parse it and retrieve best alternative url.'''
if logger is None:
logger = logging.getLogger(__name__)
blacklist = Settings().blacklist # type: Optional[List[str]]
if blacklist is None or not any(alt.startswith(b) for b in blacklist):
return alt return alt
# specified mirror is black, try select another.
mirrors = {}
try: try:
m = requests.get(url + '.meta4') m = _get_meta(url)
except requests.exceptions.ConnectionError: except requests.exceptions.ConnectionError:
return logger.error("Got connection error. Fall back to recovery plan...")
return alt
else: else:
if m.headers['content-type'] == 'application/metalink4+xml': # Expected response->'application/metalink4+xml; charset=utf-8'
if not _check_content_type(m.headers['content-type']):
logger.error("Unexpected meta4 response;content-type: {}".format(m.headers['content-type']))
return alt
try:
mirror_xml = ElementTree.fromstring(m.text) mirror_xml = ElementTree.fromstring(m.text)
meta_urls = {}
for f in mirror_xml.iter("{urn:ietf:params:xml:ns:metalink}file"): for f in mirror_xml.iter("{urn:ietf:params:xml:ns:metalink}file"):
for u in f.iter("{urn:ietf:params:xml:ns:metalink}url"): for u in f.iter("{urn:ietf:params:xml:ns:metalink}url"):
pri = u.attrib['priority'] meta_urls[u.attrib['priority']] = u.text
mirrors[pri] = u.text mirrors = [meta_urls[i] for i in sorted(meta_urls.keys(), key=lambda x: int(x))]
if len(mirrors) == 0: except Exception:
# there is no valuable data. exc_info = sys.exc_info()
logger.error("Unexpected meta4 file; parse error: {}".format(exc_info[1]))
return alt return alt
if priority is None:
if settings.blacklist is not None:
for ind in range(len(mirrors)):
mirror = mirrors[str(ind + 1)]
black = False
for b in settings.blacklist:
if mirror.startswith(b):
black = True
continue
if black:
continue
return mirror
else: else:
for ind in range(len(mirrors)): # Return first priority item which is not blacklist in mirrors list,
mirror = mirrors[str(ind + 1)] # if not found then return alt in default
return mirror return next(filter(lambda mirror: not any(mirror.startswith(b) for b in blacklist), mirrors), alt)
else:
return mirrors[str(priority)]

View File

@@ -62,7 +62,7 @@ class QtInstaller:
try: try:
r = requests.get(url, allow_redirects=False, stream=True) r = requests.get(url, allow_redirects=False, stream=True)
if r.status_code == 302: if r.status_code == 302:
newurl = altlink(r.url, r.headers['Location']) newurl = altlink(r.url, r.headers['Location'], logger=self.logger)
self.logger.info('Redirected to new URL: {}'.format(newurl)) self.logger.info('Redirected to new URL: {}'.format(newurl))
r = requests.get(newurl, stream=True) r = requests.get(newurl, stream=True)
except requests.exceptions.ConnectionError as e: except requests.exceptions.ConnectionError as e:

48
tests/test_helper.py Normal file
View File

@@ -0,0 +1,48 @@
import pytest
from aqt import helper
@pytest.mark.unit
def test_helper_altlink(monkeypatch):
class Message:
headers = {'content-type': 'text/plain',
'length': 300,}
text = '''<?xml version="1.0" encoding="UTF-8"?>
<metalink xmlns="urn:ietf:params:xml:ns:metalink">
<generator>MirrorBrain/2.17.0</generator>
<origin dynamic="true">http://download.example.io/boo.7z.meta4</origin>
<published>2020-03-04T01:11:48Z</published>
<publisher>
<name>Example Project</name>
<url>https://download.example.io</url>
</publisher>
<file name="boo.7z">
<size>651</size>
<hash type="md5">d49eba3937fb063caa48769e8f28377c</hash>
<hash type="sha-1">25d3a33d00c1e5880679a17fd4b8b831134cfa6f</hash>
<hash type="sha-256">37e50248cf061109e2cb92105cd2c36a6e271701d6d4a72c4e73c6d82aad790a</hash>
<pieces length="262144" type="sha-1">
<hash>bec628a149ed24a3a9b83747776ecca5a1fad11c</hash>
<hash>98b1dee3f741de51167a9428b0560cd2d1f4d945</hash>
<hash>8717a0cb3d14c1958de5981635c9b90b146da165</hash>
<hash>78cd2ae3ae37ca7c080a56a2b34eb33ec44a9ef1</hash>
</pieces>
<url location="cn" priority="1">http://mirrors.geekpie.club/boo.7z</url>
<url location="jp" priority="2">http://ftp.jaist.ac.jp/pub/boo.7z</url>
<url location="jp" priority="3">http://ftp.yz.yamagata-u.ac.jp/pub/boo.7z</url>
</file>
</metalink>
'''
def mock_return(url):
return Message()
monkeypatch.setattr(helper, '_get_meta', mock_return)
url = 'http://foo.baz/qtproject/boo.7z'
alt = 'http://mirrors.geekpie.club/boo.7z'
newurl = helper.altlink(url, alt)
assert newurl.startswith('http://ftp.jaist.ac.jp/')