mirror of
https://github.com/go-i2p/www.git
synced 2025-06-09 11:04:09 -04:00
97 lines
3.9 KiB
Python
97 lines
3.9 KiB
Python
#!/usr/bin/env python3
|
|
# filepath: clean_markdown.py
|
|
|
|
import re
|
|
import os
|
|
import sys
|
|
import argparse
|
|
from pathlib import Path
|
|
|
|
def clean_markdown_file(file_path, dry_run=False):
|
|
"""Remove curly-braced elements from markdown files"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Store original content for comparison
|
|
original_content = content
|
|
|
|
# Replace {{...}} expressions with empty strings
|
|
# Pattern matches {{ followed by any characters (non-greedy) followed by }}
|
|
content = re.sub(r'\{\{\s*([^}]*?)\s*\}\}', '', content)
|
|
|
|
# Replace {%...%} template tags with empty strings
|
|
content = re.sub(r'\{%[^%]*?%\}', '', content)
|
|
|
|
# Replace broken links that might result from removing template variables
|
|
# e.g., [network database](%7B%7B%20netdb%20%7D%7D) -> [network database]()
|
|
content = re.sub(r'\]\(%7B%7B[^)]*?%7D%7D\)', ']()', content)
|
|
|
|
# Handle other URL-encoded template variables
|
|
content = re.sub(r'%7B%7B[^%]*?%7D%7D', '', content)
|
|
|
|
# Fix escaped backslashes that might appear in code blocks
|
|
content = re.sub(r'\\\\([`*_{}[\]()#+-.!])', r'\1', content)
|
|
|
|
# Clean up any double spaces created by removals
|
|
content = re.sub(r' +', ' ', content)
|
|
|
|
# Only write if content changed and not in dry run mode
|
|
if content != original_content and not dry_run:
|
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
print(f"Cleaned: {file_path}")
|
|
return True
|
|
elif content != original_content and dry_run:
|
|
print(f"Would clean: {file_path} (dry run)")
|
|
return True
|
|
else:
|
|
print(f"No changes needed: {file_path}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"Error processing {file_path}: {e}")
|
|
return False
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Clean markdown files by removing template variables and expressions.")
|
|
parser.add_argument("paths", nargs='+', help="Markdown files or directories to process")
|
|
parser.add_argument("--dry-run", action="store_true", help="Show what would be changed without making changes")
|
|
parser.add_argument("--recursive", "-r", action="store_true", help="Process directories recursively")
|
|
args = parser.parse_args()
|
|
|
|
files_processed = 0
|
|
files_changed = 0
|
|
|
|
for path in args.paths:
|
|
path_obj = Path(path)
|
|
if path_obj.is_file() and path_obj.suffix.lower() in ['.md', '.markdown']:
|
|
files_processed += 1
|
|
if clean_markdown_file(path_obj, args.dry_run):
|
|
files_changed += 1
|
|
elif path_obj.is_dir():
|
|
if args.recursive:
|
|
for md_file in path_obj.glob('**/*.md'):
|
|
files_processed += 1
|
|
if clean_markdown_file(md_file, args.dry_run):
|
|
files_changed += 1
|
|
for md_file in path_obj.glob('**/*.markdown'):
|
|
files_processed += 1
|
|
if clean_markdown_file(md_file, args.dry_run):
|
|
files_changed += 1
|
|
else:
|
|
for md_file in path_obj.glob('*.md'):
|
|
files_processed += 1
|
|
if clean_markdown_file(md_file, args.dry_run):
|
|
files_changed += 1
|
|
for md_file in path_obj.glob('*.markdown'):
|
|
files_processed += 1
|
|
if clean_markdown_file(md_file, args.dry_run):
|
|
files_changed += 1
|
|
else:
|
|
print(f"Skipping {path}: Not a markdown file or directory")
|
|
|
|
print(f"\nSummary: Processed {files_processed} files, changed {files_changed} files")
|
|
|
|
if __name__ == "__main__":
|
|
main() |