mirror of
https://github.com/go-i2p/www.git
synced 2025-06-09 07:16:35 -04:00
basics
This commit is contained in:
97
clean_markdown.py
Normal file
97
clean_markdown.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# filepath: clean_markdown.py
|
||||||
|
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def clean_markdown_file(file_path, dry_run=False):
|
||||||
|
"""Remove curly-braced elements from markdown files"""
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Store original content for comparison
|
||||||
|
original_content = content
|
||||||
|
|
||||||
|
# Replace {{...}} expressions with empty strings
|
||||||
|
# Pattern matches {{ followed by any characters (non-greedy) followed by }}
|
||||||
|
content = re.sub(r'\{\{\s*([^}]*?)\s*\}\}', '', content)
|
||||||
|
|
||||||
|
# Replace {%...%} template tags with empty strings
|
||||||
|
content = re.sub(r'\{%[^%]*?%\}', '', content)
|
||||||
|
|
||||||
|
# Replace broken links that might result from removing template variables
|
||||||
|
# e.g., [network database](%7B%7B%20netdb%20%7D%7D) -> [network database]()
|
||||||
|
content = re.sub(r'\]\(%7B%7B[^)]*?%7D%7D\)', ']()', content)
|
||||||
|
|
||||||
|
# Handle other URL-encoded template variables
|
||||||
|
content = re.sub(r'%7B%7B[^%]*?%7D%7D', '', content)
|
||||||
|
|
||||||
|
# Fix escaped backslashes that might appear in code blocks
|
||||||
|
content = re.sub(r'\\\\([`*_{}[\]()#+-.!])', r'\1', content)
|
||||||
|
|
||||||
|
# Clean up any double spaces created by removals
|
||||||
|
content = re.sub(r' +', ' ', content)
|
||||||
|
|
||||||
|
# Only write if content changed and not in dry run mode
|
||||||
|
if content != original_content and not dry_run:
|
||||||
|
with open(file_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(content)
|
||||||
|
print(f"Cleaned: {file_path}")
|
||||||
|
return True
|
||||||
|
elif content != original_content and dry_run:
|
||||||
|
print(f"Would clean: {file_path} (dry run)")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print(f"No changes needed: {file_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing {file_path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Clean markdown files by removing template variables and expressions.")
|
||||||
|
parser.add_argument("paths", nargs='+', help="Markdown files or directories to process")
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Show what would be changed without making changes")
|
||||||
|
parser.add_argument("--recursive", "-r", action="store_true", help="Process directories recursively")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
files_processed = 0
|
||||||
|
files_changed = 0
|
||||||
|
|
||||||
|
for path in args.paths:
|
||||||
|
path_obj = Path(path)
|
||||||
|
if path_obj.is_file() and path_obj.suffix.lower() in ['.md', '.markdown']:
|
||||||
|
files_processed += 1
|
||||||
|
if clean_markdown_file(path_obj, args.dry_run):
|
||||||
|
files_changed += 1
|
||||||
|
elif path_obj.is_dir():
|
||||||
|
if args.recursive:
|
||||||
|
for md_file in path_obj.glob('**/*.md'):
|
||||||
|
files_processed += 1
|
||||||
|
if clean_markdown_file(md_file, args.dry_run):
|
||||||
|
files_changed += 1
|
||||||
|
for md_file in path_obj.glob('**/*.markdown'):
|
||||||
|
files_processed += 1
|
||||||
|
if clean_markdown_file(md_file, args.dry_run):
|
||||||
|
files_changed += 1
|
||||||
|
else:
|
||||||
|
for md_file in path_obj.glob('*.md'):
|
||||||
|
files_processed += 1
|
||||||
|
if clean_markdown_file(md_file, args.dry_run):
|
||||||
|
files_changed += 1
|
||||||
|
for md_file in path_obj.glob('*.markdown'):
|
||||||
|
files_processed += 1
|
||||||
|
if clean_markdown_file(md_file, args.dry_run):
|
||||||
|
files_changed += 1
|
||||||
|
else:
|
||||||
|
print(f"Skipping {path}: Not a markdown file or directory")
|
||||||
|
|
||||||
|
print(f"\nSummary: Processed {files_processed} files, changed {files_changed} files")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
12
cmd.sh
12
cmd.sh
@ -1,9 +1,11 @@
|
|||||||
#!/usr/bin/env sh
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
#outdir is processed+path to output file
|
#outdir is processed/lang/path to output file
|
||||||
outdir=processed/$(dirname $1)
|
outdir="processed/$LANGUAGE/"$(dirname $1)
|
||||||
mkdir -p "$outdir"
|
|
||||||
# $1 is the input file
|
# $1 is the input file
|
||||||
input=$1
|
input=$1
|
||||||
output=$(basename $1 .rst).md
|
output=$(basename $1).md
|
||||||
python3 "$main" --to-markdown --assets-dir static/ -o "$outdir/$output" "$input" "translations/$LANGUAGE/LC_MESSAGES/blog.po"
|
mkdir -p "$outdir"
|
||||||
|
echo python3 "$main" --to-markdown --assets-dir static/ -o "$outdir/$output" "$input" "translations/$LANGUAGE/LC_MESSAGES/$posource"
|
||||||
|
python3 "$main" --to-markdown --assets-dir static/ -o "$outdir/$output" "$input" "translations/$LANGUAGE/LC_MESSAGES/$posource" 2>> err.$LANGUAGE.log 1>> log.$LANGUAGE.log
|
||||||
|
python3 clean_markdown.py "$outdir/$output"
|
452
main.py
452
main.py
@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
RST Translation Processor
|
Translation Processor
|
||||||
|
|
||||||
A script to process reStructuredText files by replacing translation tags
|
A script to process reStructuredText and HTML files by replacing translation tags
|
||||||
with content from .po files, handling image references, and optionally
|
with content from .po files, handling image references, and optionally
|
||||||
converting to markdown.
|
converting to markdown.
|
||||||
"""
|
"""
|
||||||
@ -12,7 +12,8 @@ import os
|
|||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
from typing import Dict, Optional, Tuple
|
import urllib.parse
|
||||||
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import polib
|
import polib
|
||||||
|
|
||||||
@ -52,29 +53,122 @@ def load_translations(po_file_path: str) -> Dict[str, str]:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def replace_template_vars(content: str) -> str:
|
||||||
|
"""
|
||||||
|
Replace template variables like {{spec_url()}} with their values.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: Content with template variables
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content with template variables replaced
|
||||||
|
"""
|
||||||
|
# Define base URLs for different types of links
|
||||||
|
base_urls = {
|
||||||
|
'spec_url': 'https://i2p.net/spec/',
|
||||||
|
'proposal_url': 'https://i2p.net/spec/proposals/proposal',
|
||||||
|
'i2p_url': 'https://i2p.net/',
|
||||||
|
'site_url': 'https://i2p.net/',
|
||||||
|
'get_url': 'https://i2p.net/'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Handle {{spec_url("name")}} pattern
|
||||||
|
def replace_spec_url(match):
|
||||||
|
func_name = match.group(1)
|
||||||
|
arg = match.group(2).strip('"\'') if match.group(2) else ""
|
||||||
|
|
||||||
|
if func_name in base_urls:
|
||||||
|
if func_name == 'proposal_url':
|
||||||
|
return f"{base_urls[func_name]}{arg}.html"
|
||||||
|
else:
|
||||||
|
return f"{base_urls[func_name]}{arg}"
|
||||||
|
|
||||||
|
# Handle special cases for other template functions
|
||||||
|
if func_name == 'url_for':
|
||||||
|
# Extract the filename from patterns like url_for('static', filename='images/...')
|
||||||
|
filename_match = re.search(r'filename=[\'"](.*?)[\'"]', arg)
|
||||||
|
if filename_match:
|
||||||
|
return f"/_static/{filename_match.group(1)}"
|
||||||
|
elif func_name == 'i2pconv':
|
||||||
|
# For i2p domain conversions, return as is
|
||||||
|
return arg
|
||||||
|
|
||||||
|
return match.group(0) # Return unchanged if not recognized
|
||||||
|
|
||||||
|
# This pattern matches template functions like {{spec_url("ntcp2")}}
|
||||||
|
template_pattern = r'{{([a-zA-Z_]+)\(([^}]*?)\)}}'
|
||||||
|
processed_content = re.sub(template_pattern, replace_spec_url, content)
|
||||||
|
|
||||||
|
# Handle other simple variable substitutions like {{ _('text') }}
|
||||||
|
def replace_simple_var(match):
|
||||||
|
var_name = match.group(1).strip()
|
||||||
|
# For translation function calls like _('text'), return just the text
|
||||||
|
if var_name.startswith("_('") and var_name.endswith("')"):
|
||||||
|
return var_name[3:-2] # Extract the text between quotes
|
||||||
|
return match.group(0) # Return unchanged if not recognized
|
||||||
|
|
||||||
|
simple_var_pattern = r'{{([^}]+?)}}'
|
||||||
|
return re.sub(simple_var_pattern, replace_simple_var, processed_content)
|
||||||
|
|
||||||
|
|
||||||
def replace_translations(content: str, translations: Dict[str, str]) -> str:
|
def replace_translations(content: str, translations: Dict[str, str]) -> str:
|
||||||
"""
|
"""
|
||||||
Replace translation tags in the content with translated text.
|
Replace translation tags in the content with translated text.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
content: RST content with translation tags
|
content: Content with translation tags
|
||||||
translations: Dictionary of translations
|
translations: Dictionary of translations
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Content with translations applied
|
Content with translations applied
|
||||||
"""
|
"""
|
||||||
def replace_match(match):
|
# First, handle simple {% trans %}...{% endtrans %} blocks
|
||||||
|
def replace_simple_match(match):
|
||||||
text = match.group(1).strip()
|
text = match.group(1).strip()
|
||||||
return translations.get(text, text)
|
return translations.get(text, text)
|
||||||
|
|
||||||
# Match {% trans %}...{% endtrans %} patterns
|
# Match {% trans %}...{% endtrans %} patterns
|
||||||
pattern = r'{%\s*trans\s*%}(.*?){%\s*endtrans\s*%}'
|
simple_pattern = r'{%\s*trans\s*%}(.*?){%\s*endtrans\s*%}'
|
||||||
return re.sub(pattern, replace_match, content, flags=re.DOTALL)
|
content = re.sub(simple_pattern, replace_simple_match, content, flags=re.DOTALL)
|
||||||
|
|
||||||
|
# Now handle more complex translation blocks with arguments
|
||||||
|
def replace_complex_match(match):
|
||||||
|
# Extract parameters if present
|
||||||
|
params_str = match.group(1) or ""
|
||||||
|
text = match.group(2).strip()
|
||||||
|
|
||||||
|
# Process parameters (for future use)
|
||||||
|
params = {}
|
||||||
|
if params_str:
|
||||||
|
param_matches = re.finditer(r'(\w+)=["\'](.*?)["\']', params_str)
|
||||||
|
for param_match in param_matches:
|
||||||
|
key, value = param_match.groups()
|
||||||
|
params[key] = value
|
||||||
|
|
||||||
|
# Replace parameter references in the text
|
||||||
|
if f"{{{key}}}" in text:
|
||||||
|
text = text.replace(f"{{{key}}}", value)
|
||||||
|
|
||||||
|
# Apply translation
|
||||||
|
translated = translations.get(text, text)
|
||||||
|
|
||||||
|
# If there are parameter values, they need to be maintained in the translation
|
||||||
|
for key, value in params.items():
|
||||||
|
if f"{{{key}}}" in translated:
|
||||||
|
translated = translated.replace(f"{{{key}}}", value)
|
||||||
|
|
||||||
|
return translated
|
||||||
|
|
||||||
|
# Match {% trans param1="value" -%}...{%- endtrans %} patterns with optional parameters
|
||||||
|
complex_pattern = r'{%\s*trans\s*(.*?)-%}(.*?){%-\s*endtrans\s*%}'
|
||||||
|
content = re.sub(complex_pattern, replace_complex_match, content, flags=re.DOTALL)
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
def process_images(content: str, base_dir: str, assets_dir: str) -> str:
|
def process_rst_images(content: str, base_dir: str, assets_dir: str) -> str:
|
||||||
"""
|
"""
|
||||||
Process image references in the content.
|
Process image references in RST content.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
content: RST content with image references
|
content: RST content with image references
|
||||||
@ -123,41 +217,192 @@ def process_images(content: str, base_dir: str, assets_dir: str) -> str:
|
|||||||
return re.sub(pattern, process_match, content)
|
return re.sub(pattern, process_match, content)
|
||||||
|
|
||||||
|
|
||||||
def convert_to_markdown(rst_content: str) -> str:
|
def process_html_images(content: str, base_dir: str, assets_dir: str) -> str:
|
||||||
"""
|
"""
|
||||||
Convert RST content to Markdown.
|
Process image references in HTML content.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
rst_content: RST content to convert
|
content: HTML content with image references
|
||||||
|
base_dir: Base directory of the input file
|
||||||
|
assets_dir: Directory to store images
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content with updated image references
|
||||||
|
"""
|
||||||
|
# Create assets directory if it doesn't exist
|
||||||
|
images_dir = os.path.join(assets_dir, "images")
|
||||||
|
os.makedirs(images_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Find image references in HTML
|
||||||
|
# This pattern matches <img src="..."> tags
|
||||||
|
pattern = r'<img\s+[^>]*src=["\']((?!https?://)[^"\']+)["\'][^>]*>'
|
||||||
|
|
||||||
|
def process_match(match):
|
||||||
|
img_tag = match.group(0)
|
||||||
|
path = match.group(1)
|
||||||
|
|
||||||
|
# Skip URLs
|
||||||
|
if path.startswith(('http://', 'https://')):
|
||||||
|
return img_tag
|
||||||
|
|
||||||
|
# Handle templated paths
|
||||||
|
if "{{" in path:
|
||||||
|
# Extract paths from template expressions like {{ url_for('static', filename='images/file.png') }}
|
||||||
|
template_match = re.search(r'filename=[\'"](.*?)[\'"]', path)
|
||||||
|
if template_match:
|
||||||
|
path = f"/_static/{template_match.group(1)}"
|
||||||
|
else:
|
||||||
|
return img_tag # Can't process this template
|
||||||
|
|
||||||
|
# Remove leading /_static/ if present
|
||||||
|
if path.startswith('/_static/'):
|
||||||
|
path = path[9:] # Remove /_static/ prefix
|
||||||
|
|
||||||
|
# Handle relative paths
|
||||||
|
if not os.path.isabs(path):
|
||||||
|
full_old_path = os.path.join(base_dir, path)
|
||||||
|
if not os.path.exists(full_old_path):
|
||||||
|
# Try looking in static directory
|
||||||
|
full_old_path = os.path.join(base_dir, 'static', path)
|
||||||
|
else:
|
||||||
|
full_old_path = path
|
||||||
|
|
||||||
|
# Extract filename from path
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
new_rel_path = os.path.join("images", filename)
|
||||||
|
new_full_path = os.path.join(assets_dir, new_rel_path)
|
||||||
|
|
||||||
|
# Copy the image if it exists
|
||||||
|
if os.path.exists(full_old_path):
|
||||||
|
try:
|
||||||
|
shutil.copy2(full_old_path, new_full_path)
|
||||||
|
print(f"Copied image: {full_old_path} -> {new_full_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error copying image {full_old_path}: {e}", file=sys.stderr)
|
||||||
|
return img_tag # Return original if error
|
||||||
|
else:
|
||||||
|
print(f"Warning: Image file not found: {full_old_path}", file=sys.stderr)
|
||||||
|
return img_tag # Return original if not found
|
||||||
|
|
||||||
|
# Return the updated img tag with new path
|
||||||
|
return img_tag.replace(match.group(1), f"images/{filename}")
|
||||||
|
|
||||||
|
return re.sub(pattern, process_match, content)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_markdown_output(markdown: str) -> str:
|
||||||
|
"""
|
||||||
|
Clean up markdown output by handling URL-encoded template variables.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
markdown: Markdown content with possible URL-encoded templates
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Cleaned markdown content
|
||||||
|
"""
|
||||||
|
# Replace URL-encoded template variables
|
||||||
|
def replace_encoded_templates(match):
|
||||||
|
# Decode the URL-encoded string
|
||||||
|
encoded_text = match.group(0)
|
||||||
|
decoded_text = urllib.parse.unquote(encoded_text)
|
||||||
|
|
||||||
|
# Extract values from template expressions like {{ url_for('static', filename='images/file.png') }}
|
||||||
|
if decoded_text.startswith('{{') and decoded_text.endswith('}}'):
|
||||||
|
template_content = decoded_text.strip('{}').strip()
|
||||||
|
|
||||||
|
# Handle url_for template function
|
||||||
|
if 'url_for' in template_content and 'filename=' in template_content:
|
||||||
|
filename_match = re.search(r'filename=[\'"](.*?)[\'"]', template_content)
|
||||||
|
if filename_match:
|
||||||
|
return filename_match.group(1)
|
||||||
|
|
||||||
|
# Return empty string for other template functions
|
||||||
|
return ''
|
||||||
|
|
||||||
|
return encoded_text
|
||||||
|
|
||||||
|
# Find URL-encoded sequences that might be template variables
|
||||||
|
encoded_pattern = r'%7B%7B.*?%7D%7D'
|
||||||
|
markdown = re.sub(encoded_pattern, replace_encoded_templates, markdown)
|
||||||
|
|
||||||
|
# Clean up any broken image links that might have resulted from template replacements
|
||||||
|
# Change  to  when possible
|
||||||
|
def fix_image_links(match):
|
||||||
|
alt_text = match.group(1)
|
||||||
|
link = match.group(2)
|
||||||
|
|
||||||
|
# If link is empty or looks like a broken template
|
||||||
|
if not link or link.startswith('%7B') or link.startswith('{{'):
|
||||||
|
# Try to extract image filename from alt text or use a placeholder
|
||||||
|
filename = alt_text.replace(' ', '-').lower()
|
||||||
|
if filename:
|
||||||
|
return f""
|
||||||
|
|
||||||
|
return match.group(0) # Return unchanged
|
||||||
|
|
||||||
|
# Fix image links
|
||||||
|
image_pattern = r'!\[(.*?)\]\((.*?)\)'
|
||||||
|
markdown = re.sub(image_pattern, fix_image_links, markdown)
|
||||||
|
|
||||||
|
return markdown
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_markdown(content: str, is_rst: bool = True) -> str:
|
||||||
|
"""
|
||||||
|
Convert content to Markdown.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: Content to convert
|
||||||
|
is_rst: Whether the content is RST (True) or HTML (False)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Markdown content
|
Markdown content
|
||||||
"""
|
"""
|
||||||
if not DOCUTILS_AVAILABLE:
|
|
||||||
print("Warning: docutils not available. RST to HTML conversion skipped.", file=sys.stderr)
|
|
||||||
return rst_content
|
|
||||||
|
|
||||||
if not PANDOC_AVAILABLE:
|
if not PANDOC_AVAILABLE:
|
||||||
print("Warning: pypandoc not available. HTML to Markdown conversion skipped.", file=sys.stderr)
|
print("Warning: pypandoc not available. Conversion to Markdown skipped.", file=sys.stderr)
|
||||||
return rst_content
|
return content
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
if is_rst:
|
||||||
|
if not DOCUTILS_AVAILABLE:
|
||||||
|
print("Warning: docutils not available. RST to HTML conversion skipped.", file=sys.stderr)
|
||||||
|
return content
|
||||||
|
|
||||||
# Convert RST to HTML
|
# Convert RST to HTML
|
||||||
html = docutils.core.publish_string(
|
html = docutils.core.publish_string(
|
||||||
source=rst_content,
|
source=content,
|
||||||
writer_name='html',
|
writer_name='html',
|
||||||
settings_overrides={'output_encoding': 'unicode'}
|
settings_overrides={'output_encoding': 'unicode'}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Convert HTML to Markdown
|
# Convert HTML to Markdown
|
||||||
markdown = pypandoc.convert_text(html, 'md', format='html')
|
markdown = pypandoc.convert_text(html, 'md', format='html')
|
||||||
|
else:
|
||||||
|
# Convert HTML directly to Markdown
|
||||||
|
markdown = pypandoc.convert_text(content, 'md', format='html')
|
||||||
|
|
||||||
|
# Post-process markdown to clean up URL-encoded template variables
|
||||||
|
markdown = clean_markdown_output(markdown)
|
||||||
|
|
||||||
return markdown
|
return markdown
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error converting to markdown: {e}", file=sys.stderr)
|
print(f"Error converting to markdown: {e}", file=sys.stderr)
|
||||||
return rst_content
|
return content
|
||||||
|
|
||||||
|
|
||||||
def process_rst_file(
|
def is_draft(file_path: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if a file is a draft based on its name.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Path to the file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the file is a draft, False otherwise
|
||||||
|
"""
|
||||||
|
return '.draft.' in file_path.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def process_file(
|
||||||
input_path: str,
|
input_path: str,
|
||||||
po_file_path: str,
|
po_file_path: str,
|
||||||
output_path: Optional[str] = None,
|
output_path: Optional[str] = None,
|
||||||
@ -165,10 +410,10 @@ def process_rst_file(
|
|||||||
assets_dir: str = "./assets"
|
assets_dir: str = "./assets"
|
||||||
) -> Tuple[bool, str]:
|
) -> Tuple[bool, str]:
|
||||||
"""
|
"""
|
||||||
Process an RST file by replacing translations and handling images.
|
Process a file by replacing translations and handling images.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
input_path: Path to input RST file
|
input_path: Path to input file (RST or HTML)
|
||||||
po_file_path: Path to .po file with translations
|
po_file_path: Path to .po file with translations
|
||||||
output_path: Path to write output (default: add .translated suffix)
|
output_path: Path to write output (default: add .translated suffix)
|
||||||
to_markdown: Whether to convert to markdown
|
to_markdown: Whether to convert to markdown
|
||||||
@ -186,10 +431,29 @@ def process_rst_file(
|
|||||||
print(f"Error: PO file does not exist: {po_file_path}", file=sys.stderr)
|
print(f"Error: PO file does not exist: {po_file_path}", file=sys.stderr)
|
||||||
return False, ""
|
return False, ""
|
||||||
|
|
||||||
|
# Determine file type
|
||||||
|
is_rst = input_path.lower().endswith('.rst')
|
||||||
|
is_html = input_path.lower().endswith(('.html', '.htm'))
|
||||||
|
|
||||||
|
if not (is_rst or is_html):
|
||||||
|
print(f"Error: Unsupported file type: {input_path}. Only .rst, .html, and .htm files are supported.",
|
||||||
|
file=sys.stderr)
|
||||||
|
return False, ""
|
||||||
|
|
||||||
# Determine output path if not specified
|
# Determine output path if not specified
|
||||||
if not output_path:
|
if not output_path:
|
||||||
base, ext = os.path.splitext(input_path)
|
base, ext = os.path.splitext(input_path)
|
||||||
output_path = f"{base}.translated{'.md' if to_markdown else ext}"
|
# Handle .draft.rst/.draft.html case
|
||||||
|
if '.draft.' in base.lower():
|
||||||
|
base = base.replace('.draft', '')
|
||||||
|
|
||||||
|
# Set extension based on conversion type
|
||||||
|
if to_markdown:
|
||||||
|
out_ext = '.md'
|
||||||
|
else:
|
||||||
|
out_ext = ext
|
||||||
|
|
||||||
|
output_path = f"{base}.translated{out_ext}"
|
||||||
|
|
||||||
# Create assets directory
|
# Create assets directory
|
||||||
os.makedirs(assets_dir, exist_ok=True)
|
os.makedirs(assets_dir, exist_ok=True)
|
||||||
@ -204,16 +468,25 @@ def process_rst_file(
|
|||||||
with open(input_path, 'r', encoding='utf-8') as f:
|
with open(input_path, 'r', encoding='utf-8') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
|
# Replace template variables
|
||||||
|
content = replace_template_vars(content)
|
||||||
|
|
||||||
# Replace translations
|
# Replace translations
|
||||||
content = replace_translations(content, translations)
|
content = replace_translations(content, translations)
|
||||||
|
|
||||||
# Process images
|
# Process images based on file type
|
||||||
base_dir = os.path.dirname(os.path.abspath(input_path))
|
base_dir = os.path.dirname(os.path.abspath(input_path))
|
||||||
content = process_images(content, base_dir, assets_dir)
|
if is_rst:
|
||||||
|
content = process_rst_images(content, base_dir, assets_dir)
|
||||||
|
elif is_html:
|
||||||
|
content = process_html_images(content, base_dir, assets_dir)
|
||||||
|
|
||||||
# Convert to markdown if requested
|
# Convert to markdown if requested
|
||||||
if to_markdown:
|
if to_markdown:
|
||||||
content = convert_to_markdown(content)
|
content = convert_to_markdown(content, is_rst=is_rst)
|
||||||
|
|
||||||
|
# Create directory for output file if it doesn't exist
|
||||||
|
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
|
||||||
|
|
||||||
# Write output file
|
# Write output file
|
||||||
with open(output_path, 'w', encoding='utf-8') as f:
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
@ -227,24 +500,133 @@ def process_rst_file(
|
|||||||
return False, ""
|
return False, ""
|
||||||
|
|
||||||
|
|
||||||
|
def find_files(directory: str, include_drafts: bool = False) -> List[str]:
|
||||||
|
"""
|
||||||
|
Find all RST and HTML files in a directory recursively.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
directory: Directory to search
|
||||||
|
include_drafts: Whether to include draft files
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of file paths
|
||||||
|
"""
|
||||||
|
result_files = []
|
||||||
|
for root, _, files in os.walk(directory):
|
||||||
|
for file in files:
|
||||||
|
if file.lower().endswith(('.rst', '.html', '.htm')):
|
||||||
|
# Skip draft files if not included
|
||||||
|
if not include_drafts and '.draft.' in file.lower():
|
||||||
|
continue
|
||||||
|
result_files.append(os.path.join(root, file))
|
||||||
|
return result_files
|
||||||
|
|
||||||
|
|
||||||
|
def process_directory(
|
||||||
|
input_dir: str,
|
||||||
|
po_file_path: str,
|
||||||
|
output_dir: Optional[str] = None,
|
||||||
|
to_markdown: bool = False,
|
||||||
|
assets_dir: str = "./assets",
|
||||||
|
include_drafts: bool = False
|
||||||
|
) -> bool:
|
||||||
|
"""
|
||||||
|
Process all RST and HTML files in a directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_dir: Directory containing files
|
||||||
|
po_file_path: Path to .po file with translations
|
||||||
|
output_dir: Directory to write output files (default: add .translated suffix)
|
||||||
|
to_markdown: Whether to convert to markdown
|
||||||
|
assets_dir: Directory to store assets
|
||||||
|
include_drafts: Whether to include draft files
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if all files were processed successfully, False otherwise
|
||||||
|
"""
|
||||||
|
if not os.path.isdir(input_dir):
|
||||||
|
print(f"Error: Input directory does not exist: {input_dir}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Find all RST and HTML files in the directory
|
||||||
|
input_files = find_files(input_dir, include_drafts)
|
||||||
|
if not input_files:
|
||||||
|
print(f"No RST or HTML files found in {input_dir}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
|
success = True
|
||||||
|
for input_file in input_files:
|
||||||
|
# Determine output path based on relative path from input_dir
|
||||||
|
rel_path = os.path.relpath(input_file, input_dir)
|
||||||
|
if output_dir:
|
||||||
|
out_path = os.path.join(output_dir, rel_path)
|
||||||
|
# Adjust extension if converting to markdown
|
||||||
|
if to_markdown:
|
||||||
|
out_path = os.path.splitext(out_path)[0] + '.md'
|
||||||
|
# Remove .draft from path if present
|
||||||
|
if '.draft.' in out_path:
|
||||||
|
out_path = out_path.replace('.draft', '')
|
||||||
|
else:
|
||||||
|
out_path = None
|
||||||
|
|
||||||
|
# Process the file
|
||||||
|
file_success, _ = process_file(
|
||||||
|
input_file,
|
||||||
|
po_file_path,
|
||||||
|
out_path,
|
||||||
|
to_markdown,
|
||||||
|
assets_dir
|
||||||
|
)
|
||||||
|
|
||||||
|
if not file_success:
|
||||||
|
success = False
|
||||||
|
|
||||||
|
return success
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Parse arguments and run the script."""
|
"""Parse arguments and run the script."""
|
||||||
|
# Fix for locale/gettext issues by forcing English locale
|
||||||
|
import locale
|
||||||
|
import os
|
||||||
|
# Force 'C' locale to avoid gettext issues
|
||||||
|
os.environ['LC_ALL'] = 'C'
|
||||||
|
locale.setlocale(locale.LC_ALL, 'C')
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='Process RST files by replacing translations and handling images'
|
description='Process RST and HTML files by replacing translations and handling images'
|
||||||
)
|
)
|
||||||
parser.add_argument('input_path', help='Path to input RST file')
|
parser.add_argument('input_path', help='Path to input file or directory')
|
||||||
parser.add_argument('po_file_path', help='Path to .po file with translations')
|
parser.add_argument('po_file_path', help='Path to .po file with translations')
|
||||||
parser.add_argument('-o', '--output-path', help='Path to write output file')
|
parser.add_argument('-o', '--output-path', help='Path to write output file or directory')
|
||||||
parser.add_argument('--to-markdown', action='store_true', help='Convert output to markdown')
|
parser.add_argument('--to-markdown', action='store_true', help='Convert output to markdown')
|
||||||
parser.add_argument('--assets-dir', default='./assets', help='Directory to store assets (default: ./assets)')
|
parser.add_argument('--assets-dir', default='./assets', help='Directory to store assets (default: ./assets)')
|
||||||
|
parser.add_argument('--include-drafts', action='store_true', help='Process draft files (ending in .draft.rst or .draft.html)')
|
||||||
|
parser.add_argument('--recursive', action='store_true', help='Process directories recursively')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.to_markdown and not (DOCUTILS_AVAILABLE and PANDOC_AVAILABLE):
|
if args.to_markdown and not PANDOC_AVAILABLE:
|
||||||
print("Warning: Markdown conversion requires docutils and pypandoc packages.", file=sys.stderr)
|
print("Warning: Markdown conversion requires pypandoc package.", file=sys.stderr)
|
||||||
print("Install them with: pip install docutils pypandoc", file=sys.stderr)
|
print("Install it with: pip install pypandoc", file=sys.stderr)
|
||||||
|
|
||||||
success, output_path = process_rst_file(
|
# Check if input path is a directory
|
||||||
|
if os.path.isdir(args.input_path):
|
||||||
|
if args.recursive:
|
||||||
|
success = process_directory(
|
||||||
|
args.input_path,
|
||||||
|
args.po_file_path,
|
||||||
|
args.output_path,
|
||||||
|
args.to_markdown,
|
||||||
|
args.assets_dir,
|
||||||
|
args.include_drafts
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print("Error: Input path is a directory. Use --recursive to process it.", file=sys.stderr)
|
||||||
|
success = False
|
||||||
|
else:
|
||||||
|
# Process a single file
|
||||||
|
success, _ = process_file(
|
||||||
args.input_path,
|
args.input_path,
|
||||||
args.po_file_path,
|
args.po_file_path,
|
||||||
args.output_path,
|
args.output_path,
|
||||||
|
23
run.sh
23
run.sh
@ -2,5 +2,24 @@
|
|||||||
|
|
||||||
export main=$(pwd)/main.py
|
export main=$(pwd)/main.py
|
||||||
export cmd=$(pwd)/cmd.sh
|
export cmd=$(pwd)/cmd.sh
|
||||||
export LANGUAGE=ru
|
|
||||||
find blog -name '*.rst' -exec "$cmd" {} \;
|
|
||||||
|
export LANGUAGES="ar az ca cs da de el es es_AR et_EE fa fi fr gl he hu id it ja ko mg nb nl pl pt pt_BR ro ru sk sl sq sv tr uk zh zh_TW"
|
||||||
|
# produce translated files for all languages and all directories
|
||||||
|
for lang in $LANGUAGES; do
|
||||||
|
export LANGUAGE=$lang
|
||||||
|
export dir=pages
|
||||||
|
export posource=docs.po
|
||||||
|
export ext=html
|
||||||
|
echo "Processing $lang in $dir for $ext"
|
||||||
|
find "$dir" -name "*.$ext" -exec "$cmd" {} \;
|
||||||
|
done
|
||||||
|
|
||||||
|
for lang in $LANGUAGES; do
|
||||||
|
export LANGUAGE=$lang
|
||||||
|
export dir=pages
|
||||||
|
export posource=blog.po
|
||||||
|
export ext=rst
|
||||||
|
echo "Processing $lang in $dir for $ext"
|
||||||
|
find "$dir" -name "*.$ext" -exec "$cmd" {} \;
|
||||||
|
done
|
||||||
|
Reference in New Issue
Block a user