summary refs log tree commit diff
path: root/pkgs/servers/dict/wiktionary/latest_version.py
blob: 2833a1e05b03ef54933f96ab69a9d1402ab6da2e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import subprocess

from html.parser import HTMLParser
from os.path import abspath, dirname
from urllib.request import urlopen

class WiktionaryLatestVersionParser(HTMLParser):
    def __init__(self, current_version, *args, **kwargs):
        self.latest_version = current_version
        super().__init__(*args, **kwargs)


    def handle_starttag(self, tag, attrs):
        if tag != 'a':
            return

        href = dict(attrs)['href'][0:-1]
        if href == 'latest':
            return

        self.latest_version = max(self.latest_version, href)


def nix_prefetch_url(url, algo='sha256'):
    """Prefetches the content of the given URL."""
    print(f'nix-prefetch-url {url}')
    out = subprocess.check_output(['nix-prefetch-url', '--type', algo, url])
    return out.decode('utf-8').rstrip()


current_version = subprocess.check_output([
    'nix', 'eval', '--raw',
    '-f', dirname(abspath(__file__)) + '/../../../..',
    'dictdDBs.wiktionary.version',
]).decode('utf-8')

parser = WiktionaryLatestVersionParser(current_version)

with urlopen('https://dumps.wikimedia.org/enwiktionary/') as resp:
    parser.feed(resp.read().decode('utf-8'))

print(parser.latest_version)