Files
i2p.newsxml/generate_news.py

169 lines
6.4 KiB
Python
Executable File

#!./env/bin/python
# -*- coding: utf-8 -*-
from datetime import datetime
import collections
from feedgen.feed import FeedGenerator
import glob
import json
from lxml import etree
import re
import os.path
I2P_OS = os.getenv("I2P_OS", "")
I2P_BRANCH = os.getenv("I2P_BRANCH", "")
DATA_DIR = os.path.join('data')
RELEASE_DIR = os.path.join(DATA_DIR, I2P_OS, I2P_BRANCH)
ENTRIES_FILE = os.path.join(DATA_DIR, 'entries.html')
PLATFORM_ENTRIES_FILE = os.path.join(DATA_DIR, I2P_OS, I2P_BRANCH, 'entries.html')
TRANSLATED_ENTRIES_FILES = os.path.join(DATA_DIR, 'translations/entries.*.html')
TRANSLATED_PLATFORM_ENTRIES_FILES = os.path.join(DATA_DIR, I2P_OS, I2P_BRANCH, 'translations/entries.*.html')
RELEASES_FILE = os.path.join(RELEASE_DIR, 'releases.json')
CRL_FILES = os.path.join(DATA_DIR, 'crls/*.crl')
BLOCKLIST_FILE = os.path.join(DATA_DIR, 'blocklist.xml')
BUILD_DIR = os.path.join('build', I2P_OS, I2P_BRANCH)
NEWS_FILE = os.path.join(BUILD_DIR, 'news.atom.xml')
TRANSLATED_NEWS_FILE = os.path.join(BUILD_DIR, 'news_%s.atom.xml')
def load_feed_metadata(fg):
fg.id('urn:uuid:60a76c80-d399-11d9-b91C-543213999af6')
fg.link( href='http://i2p-projekt.i2p/' )
fg.link( href='http://tc73n4kivdroccekirco7rhgxdg5f3cjvbaapabupeyzrqwv5guq.b32.i2p/news.atom.xml', rel='self' )
fg.link( href='http://dn3tvalnjz432qkqsvpfdqrwpqkw3ye4n4i2uyfr4jexvo3sp5ka.b32.i2p/news/news.atom.xml', rel='alternate' )
def load_entries(fg, entries_file, platform_entries_file=None):
metadatas = {}
finalentries = {}
print('Loading entries from %s' % entries_file)
entries = prepare_entries_file(fg, entries_file)
# split() creates a junk final element with trailing </div>
for entry_str in entries[:-1]:
entry_parts = entry_str.split('</details>', 1)
md = extract_entry_metadata(entry_parts[0])
metadatas[md['published']] = md
finalentries[md['id']] = entry_parts[1]
if os.path.exists(platform_entries_file) and platform_entries_file != entries_file and platform_entries_file is not None and platform_entries_file != "data/entries.html":
print('Loading platform entries from %s' % platform_entries_file)
entries = prepare_entries_file(fg, platform_entries_file)
for entry_str in entries[:-1]:
entry_parts = entry_str.split('</details>', 1)
md = extract_entry_metadata(entry_parts[0])
metadatas[md['updated']] = md
finalentries[md['id']] = entry_parts[1]
sorted_metadata = collections.OrderedDict(sorted(metadatas.items()))
for metadata in sorted_metadata.values():
fe = fg.add_entry()
fe.id(metadata['id'])
fe.title(metadata['title'])
fe.summary(metadata['summary'])
fe.link( href=metadata['href'] )
fe.author( name=metadata['author'] )
fe.published(metadata['published'])
fe.updated(metadata['updated'])
fe.content(finalentries[metadata['id']], type='xhtml')
def prepare_entries_file(fg, entries_file=None):
with open(entries_file) as f:
entries_data = f.read().strip('\n')
# Replace HTML non-breaking space with unicode
entries_data = entries_data.replace('&nbsp;', '\u00a0')
# Strip the leading <div> from translations
if entries_data.startswith('<div>'):
entries_data = entries_data[5:]
entries_parts = entries_data.split('</header>')
fg.title(re.findall(r'title="(.*?)"', entries_parts[0])[0])
fg.subtitle(entries_parts[0].split('>')[1])
entries = entries_parts[1].split('</article>')
return entries
def extract_entry_metadata(s):
m = {k:v.strip('"') for k,v in re.findall(r'(\S+)=(".*?"|\S+)', s)}
summary = re.findall(r'<summary>(.*)</summary>', s, re.DOTALL)
if len(summary) > 0:
m['summary'] = summary[0]
return m
def load_releases(fg):
with open(RELEASES_FILE) as json_data:
d = json.load(json_data)
for release in d:
r = fg.i2p.add_release()
r.date(release['date'])
r.version(release['version'])
if 'minVersion' in release:
r.min_version(release['minVersion'])
if 'minJavaVersion' in release:
r.min_java_version(release['minJavaVersion'])
for update_type, update in release['updates'].items():
u = r.add_update(update_type)
if 'clearnet' in update:
for url in update['clearnet']:
u.clearnet(url)
if 'clearnetssl' in update:
for url in update['clearnetssl']:
u.clearnetssl(url)
if 'torrent' in update:
u.torrent(update['torrent'])
if 'url' in update:
for url in update['url']:
u.url(url)
def load_revocations(fg):
# Only add a revocations element if there are CRLs
r = None
for crl in glob.glob(CRL_FILES):
if r is None:
r = fg.i2p.add_revocations()
crl_id = os.path.splitext(os.path.basename(crl))[0]
c = r.add_crl(crl_id)
c.updated(datetime.fromtimestamp(os.path.getmtime(crl)))
with open(crl) as f:
crl_content = f.read().decode('utf8').strip()
c.content('\n%s\n' % crl_content)
def load_blocklist(fg):
# Only add a blocklist element if there is content
b = None
if os.path.isfile(BLOCKLIST_FILE):
with open(BLOCKLIST_FILE) as f:
content = '<xml xmlns:i2p="http://geti2p.net/en/docs/spec/updates">%s</xml>' % f.read()
root = etree.fromstring(content)
b = fg.i2p.add_blocklist()
b.from_xml(root.getchildren()[0])
def generate_feed(entries_file=None, platform_entries_file=None):
language = entries_file and entries_file.split('.')[1] or 'en'
fg = FeedGenerator()
fg.load_extension('i2p')
fg.language(language)
load_feed_metadata(fg)
load_entries(fg, entries_file and entries_file or ENTRIES_FILE, platform_entries_file and platform_entries_file or PLATFORM_ENTRIES_FILE)
load_releases(fg)
load_revocations(fg)
load_blocklist(fg)
if not os.path.exists(BUILD_DIR):
os.makedirs(BUILD_DIR)
fg.atom_file(entries_file and TRANSLATED_NEWS_FILE % language or NEWS_FILE, pretty=True)
if __name__ == '__main__':
# Standard feed
generate_feed()
# Translated feeds
for entries_file in glob.glob(TRANSLATED_ENTRIES_FILES):
generate_feed(entries_file)