Refactoring altlink helper (#109)

* Refactoring altlink helper

Also fix a bug not accept a header context-type when
got charset parameter such as
application/meta4+xml; charset=utf-8

* Add test and accept text/plain mime type

Signed-off-by: Hiroshi Miura <miurahr@linux.com>
This commit is contained in:
Hiroshi Miura
2020-03-05 23:00:31 +09:00
committed by GitHub
parent 0635720d52
commit 3b06814601
3 changed files with 86 additions and 36 deletions

View File

@@ -1,51 +1,53 @@
import logging
import sys
import xml.etree.ElementTree as ElementTree
from typing import List, Optional
import requests
from aqt.settings import Settings
def altlink(url, alt, priority=None):
'''Download .meta4 metalink version4 xml file and parse it.'''
settings = Settings()
black = False
for site in settings.blacklist:
if alt.startswith(site):
black = True
if not black:
def _get_meta(url: str):
return requests.get(url + '.meta4')
def _check_content_type(ct: str) -> bool:
candidate = ['application/metalink4+xml', 'text/plain']
return any(ct.startswith(t) for t in candidate)
def altlink(url: str, alt: str, logger=None):
'''Blacklisting redirected(alt) location based on Settings.blacklist configuration.
When found black url, then try download a url + .meta4 that is a metalink version4
xml file, parse it and retrieve best alternative url.'''
if logger is None:
logger = logging.getLogger(__name__)
blacklist = Settings().blacklist # type: Optional[List[str]]
if blacklist is None or not any(alt.startswith(b) for b in blacklist):
return alt
# specified mirror is black, try select another.
mirrors = {}
try:
m = requests.get(url + '.meta4')
m = _get_meta(url)
except requests.exceptions.ConnectionError:
return
logger.error("Got connection error. Fall back to recovery plan...")
return alt
else:
if m.headers['content-type'] == 'application/metalink4+xml':
# Expected response->'application/metalink4+xml; charset=utf-8'
if not _check_content_type(m.headers['content-type']):
logger.error("Unexpected meta4 response;content-type: {}".format(m.headers['content-type']))
return alt
try:
mirror_xml = ElementTree.fromstring(m.text)
meta_urls = {}
for f in mirror_xml.iter("{urn:ietf:params:xml:ns:metalink}file"):
for u in f.iter("{urn:ietf:params:xml:ns:metalink}url"):
pri = u.attrib['priority']
mirrors[pri] = u.text
if len(mirrors) == 0:
# there is no valuable data.
meta_urls[u.attrib['priority']] = u.text
mirrors = [meta_urls[i] for i in sorted(meta_urls.keys(), key=lambda x: int(x))]
except Exception:
exc_info = sys.exc_info()
logger.error("Unexpected meta4 file; parse error: {}".format(exc_info[1]))
return alt
if priority is None:
if settings.blacklist is not None:
for ind in range(len(mirrors)):
mirror = mirrors[str(ind + 1)]
black = False
for b in settings.blacklist:
if mirror.startswith(b):
black = True
continue
if black:
continue
return mirror
else:
for ind in range(len(mirrors)):
mirror = mirrors[str(ind + 1)]
return mirror
else:
return mirrors[str(priority)]
# Return first priority item which is not blacklist in mirrors list,
# if not found then return alt in default
return next(filter(lambda mirror: not any(mirror.startswith(b) for b in blacklist), mirrors), alt)

View File

@@ -62,7 +62,7 @@ class QtInstaller:
try:
r = requests.get(url, allow_redirects=False, stream=True)
if r.status_code == 302:
newurl = altlink(r.url, r.headers['Location'])
newurl = altlink(r.url, r.headers['Location'], logger=self.logger)
self.logger.info('Redirected to new URL: {}'.format(newurl))
r = requests.get(newurl, stream=True)
except requests.exceptions.ConnectionError as e:

48
tests/test_helper.py Normal file
View File

@@ -0,0 +1,48 @@
import pytest
from aqt import helper
@pytest.mark.unit
def test_helper_altlink(monkeypatch):
class Message:
headers = {'content-type': 'text/plain',
'length': 300,}
text = '''<?xml version="1.0" encoding="UTF-8"?>
<metalink xmlns="urn:ietf:params:xml:ns:metalink">
<generator>MirrorBrain/2.17.0</generator>
<origin dynamic="true">http://download.example.io/boo.7z.meta4</origin>
<published>2020-03-04T01:11:48Z</published>
<publisher>
<name>Example Project</name>
<url>https://download.example.io</url>
</publisher>
<file name="boo.7z">
<size>651</size>
<hash type="md5">d49eba3937fb063caa48769e8f28377c</hash>
<hash type="sha-1">25d3a33d00c1e5880679a17fd4b8b831134cfa6f</hash>
<hash type="sha-256">37e50248cf061109e2cb92105cd2c36a6e271701d6d4a72c4e73c6d82aad790a</hash>
<pieces length="262144" type="sha-1">
<hash>bec628a149ed24a3a9b83747776ecca5a1fad11c</hash>
<hash>98b1dee3f741de51167a9428b0560cd2d1f4d945</hash>
<hash>8717a0cb3d14c1958de5981635c9b90b146da165</hash>
<hash>78cd2ae3ae37ca7c080a56a2b34eb33ec44a9ef1</hash>
</pieces>
<url location="cn" priority="1">http://mirrors.geekpie.club/boo.7z</url>
<url location="jp" priority="2">http://ftp.jaist.ac.jp/pub/boo.7z</url>
<url location="jp" priority="3">http://ftp.yz.yamagata-u.ac.jp/pub/boo.7z</url>
</file>
</metalink>
'''
def mock_return(url):
return Message()
monkeypatch.setattr(helper, '_get_meta', mock_return)
url = 'http://foo.baz/qtproject/boo.7z'
alt = 'http://mirrors.geekpie.club/boo.7z'
newurl = helper.altlink(url, alt)
assert newurl.startswith('http://ftp.jaist.ac.jp/')