Files
go-gh-page/pkg/git/git.go
2025-05-05 21:55:21 -04:00

351 lines
9.4 KiB
Go

package git
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
)
// RepositoryData contains all the information about a repository
type RepositoryData struct {
Owner string
Name string
Description string
URL string
// Content
ReadmeContent string
ReadmePath string
MarkdownFiles map[string]string // path -> content
// Stats from git
Contributors []Contributor
CommitCount int
LastCommitDate time.Time
// License information if available
License string
// Set of image paths in the repository (to copy to output)
ImageFiles map[string]string // path -> full path on disk
}
// Contributor represents a repository contributor
type Contributor struct {
Name string
Email string
Commits int
AvatarURL string
}
// CloneRepository clones a Git repository to the specified directory
func CloneRepository(url, destination, branch string) (*git.Repository, error) {
// Check if repository already exists
if _, err := os.Stat(destination); err == nil {
// Directory exists, try to open repository
repo, err := git.PlainOpen(destination)
if err == nil {
fmt.Println("Using existing repository clone")
return repo, nil
}
// If error, remove directory and clone fresh
os.RemoveAll(destination)
}
// Clone options
options := &git.CloneOptions{
URL: url,
}
// Set branch if not default
if branch != "main" && branch != "master" {
options.ReferenceName = plumbing.NewBranchReferenceName(branch)
}
// Clone the repository
return git.PlainClone(destination, false, options)
}
// GetRepositoryData extracts information from a cloned repository
func GetRepositoryData(repo *git.Repository, owner, name, repoPath string) (*RepositoryData, error) {
repoData := &RepositoryData{
Owner: owner,
Name: name,
URL: fmt.Sprintf("https://github.com/%s/%s", owner, name),
MarkdownFiles: make(map[string]string),
ImageFiles: make(map[string]string),
}
// Get the repository description from the repository
config, err := repo.Config()
if err == nil && config != nil {
repoData.Description = config.Raw.Section("").Option("description")
}
// Get HEAD reference
ref, err := repo.Head()
if err != nil {
return nil, fmt.Errorf("failed to get HEAD reference: %w", err)
}
// Get commit history
cIter, err := repo.Log(&git.LogOptions{From: ref.Hash()})
if err != nil {
return nil, fmt.Errorf("failed to get commit history: %w", err)
}
// Process commits
contributors := make(map[string]*Contributor)
err = cIter.ForEach(func(c *object.Commit) error {
// Count commits
repoData.CommitCount++
// Update last commit date if needed
if repoData.LastCommitDate.IsZero() || c.Author.When.After(repoData.LastCommitDate) {
repoData.LastCommitDate = c.Author.When
}
// Track contributors
email := c.Author.Email
if _, exists := contributors[email]; !exists {
contributors[email] = &Contributor{
Name: c.Author.Name,
Email: email,
Commits: 0,
// GitHub avatar URL uses MD5 hash of email, which we'd generate here
// but for simplicity we'll use a default avatar
AvatarURL: fmt.Sprintf("https://avatars.githubusercontent.com/u/0?v=4"),
}
}
contributors[email].Commits++
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to process commits: %w", err)
}
// Convert contributors map to slice and sort by commit count
for _, contributor := range contributors {
repoData.Contributors = append(repoData.Contributors, *contributor)
}
// Sort contributors by commit count (we'll implement this in utils)
sortContributorsByCommits(repoData.Contributors)
// If we have more than 5 contributors, limit to top 5
if len(repoData.Contributors) > 5 {
repoData.Contributors = repoData.Contributors[:5]
}
// Walk the repository to find markdown and image files
err = filepath.WalkDir(repoPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Skip .git directory
if d.IsDir() && d.Name() == ".git" {
return filepath.SkipDir
}
// Skip other common directories we don't want
if d.IsDir() && (d.Name() == "node_modules" || d.Name() == "vendor" || d.Name() == ".github") {
return filepath.SkipDir
}
// Process files
if !d.IsDir() {
relativePath, err := filepath.Rel(repoPath, path)
if err != nil {
return err
}
// Handle markdown files
if isMarkdownFile(d.Name()) {
content, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read file %s: %w", path, err)
}
// Store markdown content
repoData.MarkdownFiles[relativePath] = string(content)
// Check if this is a README file
if isReadmeFile(d.Name()) && (repoData.ReadmePath == "" || relativePath == "README.md") {
repoData.ReadmePath = relativePath
repoData.ReadmeContent = string(content)
}
fmt.Printf("Found markdown file: %s\n", relativePath)
}
// Handle image files
if isImageFile(d.Name()) {
repoData.ImageFiles[relativePath] = path
fmt.Printf("Found image file: %s\n", relativePath)
}
// Check for license file
if isLicenseFile(d.Name()) && repoData.License == "" {
content, err := os.ReadFile(path)
if err == nil {
// Try to determine license type from content
licenseType := detectLicenseType(string(content))
if licenseType != "" {
repoData.License = licenseType
} else {
repoData.License = "License"
}
}
}
}
return nil
})
if err != nil {
return nil, fmt.Errorf("failed to walk repository: %w", err)
}
// If we didn't find a description, try to extract from README
if repoData.Description == "" && repoData.ReadmeContent != "" {
repoData.Description = extractDescriptionFromReadme(repoData.ReadmeContent)
}
return repoData, nil
}
// isMarkdownFile checks if a filename has a markdown extension
func isMarkdownFile(filename string) bool {
extensions := []string{".md", ".markdown", ".mdown", ".mkdn"}
lowerFilename := strings.ToLower(filename)
for _, ext := range extensions {
if strings.HasSuffix(lowerFilename, ext) {
return true
}
}
return false
}
// isReadmeFile checks if a file is a README
func isReadmeFile(filename string) bool {
lowerFilename := strings.ToLower(filename)
return strings.HasPrefix(lowerFilename, "readme.") && isMarkdownFile(filename)
}
// isImageFile checks if a filename has an image extension
func isImageFile(filename string) bool {
extensions := []string{".jpg", ".jpeg", ".png", ".gif", ".svg", ".webp"}
lowerFilename := strings.ToLower(filename)
for _, ext := range extensions {
if strings.HasSuffix(lowerFilename, ext) {
return true
}
}
return false
}
// isLicenseFile checks if a file is likely a license file
func isLicenseFile(filename string) bool {
lowerFilename := strings.ToLower(filename)
return lowerFilename == "license" || lowerFilename == "license.md" ||
lowerFilename == "license.txt" || lowerFilename == "copying"
}
// detectLicenseType tries to determine the license type from its content
func detectLicenseType(content string) string {
content = strings.ToLower(content)
// Check for common license types
if strings.Contains(content, "mit license") {
return "MIT License"
} else if strings.Contains(content, "apache license") {
return "Apache License"
} else if strings.Contains(content, "gnu general public license") ||
strings.Contains(content, "gpl") {
return "GPL License"
} else if strings.Contains(content, "bsd") {
return "BSD License"
} else if strings.Contains(content, "mozilla public license") {
return "Mozilla Public License"
}
return ""
}
// extractDescriptionFromReadme tries to get a short description from README
func extractDescriptionFromReadme(content string) string {
// Try to find the first paragraph after the title
re := regexp.MustCompile(`(?m)^#\s+.+\n+(.+)`)
matches := re.FindStringSubmatch(content)
if len(matches) > 1 {
// Return first paragraph, up to 150 chars
desc := matches[1]
if len(desc) > 150 {
desc = desc[:147] + "..."
}
return desc
}
// If no match, just take the first non-empty line
lines := strings.Split(content, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
if line != "" && !strings.HasPrefix(line, "#") {
if len(line) > 150 {
line = line[:147] + "..."
}
return line
}
}
return ""
}
// sortContributorsByCommits sorts contributors by commit count (descending)
func sortContributorsByCommits(contributors []Contributor) {
// Simple bubble sort implementation
for i := 0; i < len(contributors); i++ {
for j := i + 1; j < len(contributors); j++ {
if contributors[i].Commits < contributors[j].Commits {
contributors[i], contributors[j] = contributors[j], contributors[i]
}
}
}
}
// GetCommitStats gets commit statistics for the repository
func GetCommitStats(repo *git.Repository) (int, error) {
// Get HEAD reference
ref, err := repo.Head()
if err != nil {
return 0, fmt.Errorf("failed to get HEAD reference: %w", err)
}
// Get commit history
cIter, err := repo.Log(&git.LogOptions{From: ref.Hash()})
if err != nil {
return 0, fmt.Errorf("failed to get commit history: %w", err)
}
// Count commits
count := 0
err = cIter.ForEach(func(c *object.Commit) error {
count++
return nil
})
if err != nil {
return 0, fmt.Errorf("failed to process commits: %w", err)
}
return count, nil
}