Attempt to resolve rate-limiting issues on large namespaces

This commit is contained in:
eyedeekay
2025-05-24 13:42:13 -04:00
parent 2e65217503
commit 7b81314e57
3 changed files with 357 additions and 382 deletions

140
README.md
View File

@ -1,140 +0,0 @@
# Go GitHub Dashboard
A pure Go command-line application that generates a static GitHub dashboard by aggregating repository data from GitHub API and RSS feeds.
## Features
- Generate a comprehensive dashboard for any GitHub user or organization
- View open pull requests, issues, and recent discussions for each repository
- Clean, responsive UI with collapsible sections (no JavaScript required)
- Hybrid approach using both GitHub API and RSS feeds for efficient data collection
- Intelligent caching to reduce API calls and handle rate limits
- Fully static output that can be deployed to any static hosting service
## Installation
### Using Go
```bash
go install github.com/yourusername/go-github-dashboard/cmd/go-github-dashboard@latest
```
### From Source
```bash
git clone https://github.com/yourusername/go-github-dashboard.git
cd go-github-dashboard
go build ./cmd/go-github-dashboard
```
## Usage
```bash
# Generate dashboard for a user
go-github-dashboard generate --user octocat --output ./dashboard
# Generate dashboard for an organization
go-github-dashboard generate --org kubernetes --output ./k8s-dashboard
# Use with authentication token (recommended for large organizations)
go-github-dashboard generate --user developername --token $GITHUB_TOKEN --output ./my-dashboard
# Specify cache duration
go-github-dashboard generate --user octocat --cache-ttl 2h --output ./dashboard
# Show version information
go-github-dashboard version
```
### Command-line Options
- `--user` or `-u`: GitHub username to generate dashboard for
- `--org` or `-o`: GitHub organization to generate dashboard for
- `--output` or `-d`: Output directory for the dashboard (default: `./dashboard`)
- `--token` or `-t`: GitHub API token (optional, increases rate limits)
- `--cache-dir`: Directory for caching API responses (default: `./.cache`)
- `--cache-ttl`: Cache time-to-live duration (default: `1h`)
- `--verbose` or `-v`: Enable verbose output
### Environment Variables
You can also set configuration using environment variables:
- `GITHUB_DASHBOARD_USER`: GitHub username
- `GITHUB_DASHBOARD_ORG`: GitHub organization
- `GITHUB_DASHBOARD_OUTPUT`: Output directory
- `GITHUB_DASHBOARD_TOKEN` or `GITHUB_TOKEN`: GitHub API token
- `GITHUB_DASHBOARD_CACHE_DIR`: Cache directory
- `GITHUB_DASHBOARD_CACHE_TTL`: Cache TTL duration
- `GITHUB_DASHBOARD_VERBOSE`: Enable verbose output (set to `true`)
## Output Structure
The generated dashboard follows this structure:
```
output/
├── index.html # Main HTML dashboard
├── style.css # CSS styling
├── README.md # Dashboard information
├── repositories/ # Directory containing markdown files
│ ├── repo1.md # Markdown version of repository data
│ ├── repo1.html # HTML version of repository data
│ ├── repo2.md
│ ├── repo2.html
│ └── ...
```
## Development
The project is structured as follows:
```
go-github-dashboard/
├── cmd/
│ └── go-github-dashboard/
│ └── main.go # Application entry point
├── pkg/
│ ├── api/ # API clients for GitHub and RSS
│ │ ├── github.go # GitHub API client
│ │ ├── rss.go # RSS feed parser
│ │ └── cache.go # Caching implementation
│ ├── cmd/ # Cobra commands
│ │ ├── root.go # Root command and flag definition
│ │ ├── generate.go # Generate dashboard command
│ │ └── version.go # Version information command
│ ├── config/ # Configuration handling with Viper
│ │ └── config.go # Config validation and processing
│ ├── generator/ # HTML and markdown generators
│ │ ├── markdown.go # Markdown file generation
│ │ └── html.go # HTML dashboard generation
│ └── types/ # Type definitions
│ └── types.go # Core data structures
└── README.md
```
## Key Features
- **Concurrent Repository Processing**: Uses Go's concurrency features to fetch and process multiple repositories in parallel
- **Intelligent Data Sourcing**: Prioritizes RSS feeds to avoid API rate limits, falling back to the GitHub API when needed
- **Resilient API Handling**: Implements retries, timeouts, and graceful error handling for network requests
- **Efficient Caching**: Stores API responses to reduce duplicate requests and speed up subsequent runs
- **Pure Go Implementation**: Uses only Go standard library and well-maintained third-party packages
- **No JavaScript Requirement**: Dashboard uses CSS-only techniques for interactivity (collapsible sections)
- **Clean Architecture**: Separation of concerns with distinct packages for different responsibilities
## Example Dashboard
Once generated, the dashboard presents:
- An overview of all repositories with key metrics
- Expandable sections for each repository showing:
- Open pull requests with author and label information
- Open issues organized by priority and label
- Recent discussions categorized by topic
- Responsive design that works on both desktop and mobile browsers
- Both HTML and Markdown versions of all content
## License
[MIT License](LICENSE)

View File

@ -2,217 +2,11 @@ package api
import ( import (
"context" "context"
"fmt"
"log"
"net/http"
"github.com/go-i2p/go-github-dashboard/pkg/types" "github.com/go-i2p/go-github-dashboard/pkg/types"
"github.com/google/go-github/v58/github" "github.com/google/go-github/v58/github"
"github.com/hashicorp/go-retryablehttp"
"golang.org/x/oauth2"
) )
// GitHubClient wraps the GitHub API client with additional functionality
type GitHubClient struct {
client *github.Client
cache *Cache
rateLimited bool
config *types.Config
}
// NewGitHubClient creates a new GitHub API client
func NewGitHubClient(config *types.Config, cache *Cache) *GitHubClient {
var httpClient *http.Client
// Create a retry client
retryClient := retryablehttp.NewClient()
retryClient.RetryMax = 3
retryClient.Logger = nil // Disable logging from the retry client
if config.GithubToken != "" {
// If token is provided, use it for authentication
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: config.GithubToken},
)
httpClient = oauth2.NewClient(context.Background(), ts)
retryClient.HTTPClient = httpClient
}
client := github.NewClient(retryClient.StandardClient())
return &GitHubClient{
client: client,
cache: cache,
rateLimited: false,
config: config,
}
}
// GetRepositories fetches repositories for a user or organization
func (g *GitHubClient) GetRepositories(ctx context.Context) ([]types.Repository, error) {
var allRepos []types.Repository
cacheKey := "repos_"
if g.config.User != "" {
cacheKey += g.config.User
} else {
cacheKey += g.config.Organization
}
// Try to get from cache first
if cachedRepos, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Println("Using cached repositories")
}
return cachedRepos.([]types.Repository), nil
}
if g.config.Verbose {
log.Println("Fetching repositories from GitHub API")
}
for {
if g.config.User != "" {
opts := &github.RepositoryListOptions{
ListOptions: github.ListOptions{PerPage: 100},
Sort: "updated",
}
repos, resp, err := g.client.Repositories.List(ctx, g.config.User, opts)
if err != nil {
return nil, fmt.Errorf("error fetching repositories: %w", err)
}
for _, repo := range repos {
allRepos = append(allRepos, convertRepository(repo))
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
} else {
opts := &github.RepositoryListByOrgOptions{
ListOptions: github.ListOptions{PerPage: 100},
Sort: "updated",
}
repos, resp, err := g.client.Repositories.ListByOrg(ctx, g.config.Organization, opts)
if err != nil {
return nil, fmt.Errorf("error fetching repositories: %w", err)
}
for _, repo := range repos {
allRepos = append(allRepos, convertRepository(repo))
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
}
// Cache the results
g.cache.Set(cacheKey, allRepos)
return allRepos, nil
}
// GetPullRequests fetches open pull requests for a repository
func (g *GitHubClient) GetPullRequests(ctx context.Context, owner, repo string) ([]types.PullRequest, error) {
var allPRs []types.PullRequest
cacheKey := fmt.Sprintf("prs_%s_%s", owner, repo)
// Try to get from cache first
if cachedPRs, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Printf("Using cached pull requests for %s/%s", owner, repo)
}
return cachedPRs.([]types.PullRequest), nil
}
if g.config.Verbose {
log.Printf("Fetching pull requests for %s/%s", owner, repo)
}
opts := &github.PullRequestListOptions{
State: "open",
Sort: "updated",
Direction: "desc",
ListOptions: github.ListOptions{PerPage: 100},
}
for {
prs, resp, err := g.client.PullRequests.List(ctx, owner, repo, opts)
if err != nil {
return nil, fmt.Errorf("error fetching pull requests: %w", err)
}
for _, pr := range prs {
allPRs = append(allPRs, convertPullRequest(pr))
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
// Cache the results
g.cache.Set(cacheKey, allPRs)
return allPRs, nil
}
// GetIssues fetches open issues for a repository
func (g *GitHubClient) GetIssues(ctx context.Context, owner, repo string) ([]types.Issue, error) {
var allIssues []types.Issue
cacheKey := fmt.Sprintf("issues_%s_%s", owner, repo)
// Try to get from cache first
if cachedIssues, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Printf("Using cached issues for %s/%s", owner, repo)
}
return cachedIssues.([]types.Issue), nil
}
if g.config.Verbose {
log.Printf("Fetching issues for %s/%s", owner, repo)
}
opts := &github.IssueListByRepoOptions{
State: "open",
Sort: "updated",
Direction: "desc",
ListOptions: github.ListOptions{PerPage: 100},
}
for {
issues, resp, err := g.client.Issues.ListByRepo(ctx, owner, repo, opts)
if err != nil {
return nil, fmt.Errorf("error fetching issues: %w", err)
}
for _, issue := range issues {
// Skip pull requests (they appear in the issues API)
if issue.PullRequestLinks != nil {
continue
}
allIssues = append(allIssues, convertIssue(issue))
}
if resp.NextPage == 0 {
break
}
opts.Page = resp.NextPage
}
// Cache the results
g.cache.Set(cacheKey, allIssues)
return allIssues, nil
}
// GetDiscussions fetches recent discussions for a repository // GetDiscussions fetches recent discussions for a repository
func (g *GitHubClient) GetDiscussions(ctx context.Context, owner, repo string) ([]types.Discussion, error) { func (g *GitHubClient) GetDiscussions(ctx context.Context, owner, repo string) ([]types.Discussion, error) {
// Note: The GitHub API v3 doesn't have a direct endpoint for discussions // Note: The GitHub API v3 doesn't have a direct endpoint for discussions
@ -221,42 +15,6 @@ func (g *GitHubClient) GetDiscussions(ctx context.Context, owner, repo string) (
return []types.Discussion{}, nil return []types.Discussion{}, nil
} }
// GetWorkflowRuns fetches recent workflow runs for a repository
func (g *GitHubClient) GetWorkflowRuns(ctx context.Context, owner, repo string) ([]types.WorkflowRun, error) {
var allRuns []types.WorkflowRun
cacheKey := fmt.Sprintf("workflow_runs_%s_%s", owner, repo)
// Try to get from cache first
if cachedRuns, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Printf("Using cached workflow runs for %s/%s", owner, repo)
}
return cachedRuns.([]types.WorkflowRun), nil
}
if g.config.Verbose {
log.Printf("Fetching workflow runs for %s/%s", owner, repo)
}
opts := &github.ListWorkflowRunsOptions{
ListOptions: github.ListOptions{PerPage: 10}, // Limit to 10 most recent runs
}
runs, _, err := g.client.Actions.ListRepositoryWorkflowRuns(ctx, owner, repo, opts)
if err != nil {
return nil, fmt.Errorf("error fetching workflow runs: %w", err)
}
for _, run := range runs.WorkflowRuns {
allRuns = append(allRuns, convertWorkflowRun(run))
}
// Cache the results
g.cache.Set(cacheKey, allRuns)
return allRuns, nil
}
// Helper functions to convert GitHub API types to our domain types // Helper functions to convert GitHub API types to our domain types
func convertRepository(repo *github.Repository) types.Repository { func convertRepository(repo *github.Repository) types.Repository {
r := types.Repository{ r := types.Repository{

357
pkg/api/github_client.go Normal file
View File

@ -0,0 +1,357 @@
package api
import (
"context"
"fmt"
"log"
"net/http"
"strconv"
"time"
"github.com/go-i2p/go-github-dashboard/pkg/types"
"github.com/google/go-github/v58/github"
"github.com/hashicorp/go-retryablehttp"
"golang.org/x/oauth2"
)
// GitHubClient wraps the GitHub API client with additional functionality
type GitHubClient struct {
client *github.Client
cache *Cache
config *types.Config
rateLimiter *RateLimiter
}
// RateLimiter manages GitHub API rate limiting
type RateLimiter struct {
remaining int
resetTime time.Time
lastChecked time.Time
}
// NewGitHubClient creates a new GitHub API client
func NewGitHubClient(config *types.Config, cache *Cache) *GitHubClient {
var httpClient *http.Client
// Create a retry client with custom retry policy
retryClient := retryablehttp.NewClient()
retryClient.RetryMax = 5
retryClient.RetryWaitMin = 1 * time.Second
retryClient.RetryWaitMax = 30 * time.Second
retryClient.Logger = nil
// Custom retry policy for rate limiting
retryClient.CheckRetry = func(ctx context.Context, resp *http.Response, err error) (bool, error) {
if resp != nil && resp.StatusCode == 403 {
// Check if it's a rate limit error
if resp.Header.Get("X-RateLimit-Remaining") == "0" {
resetTime := resp.Header.Get("X-RateLimit-Reset")
if resetTime != "" {
if resetTimestamp, parseErr := strconv.ParseInt(resetTime, 10, 64); parseErr == nil {
waitTime := time.Unix(resetTimestamp, 0).Sub(time.Now())
if waitTime > 0 && waitTime < 1*time.Hour {
log.Printf("Rate limit exceeded. Waiting %v until reset...", waitTime)
time.Sleep(waitTime + 5*time.Second) // Add 5 seconds buffer
return true, nil
}
}
}
}
}
return retryablehttp.DefaultRetryPolicy(ctx, resp, err)
}
if config.GithubToken != "" {
ts := oauth2.StaticTokenSource(
&oauth2.Token{AccessToken: config.GithubToken},
)
httpClient = oauth2.NewClient(context.Background(), ts)
retryClient.HTTPClient = httpClient
}
client := github.NewClient(retryClient.StandardClient())
return &GitHubClient{
client: client,
cache: cache,
config: config,
rateLimiter: &RateLimiter{},
}
}
// checkRateLimit updates rate limit information from response headers
func (g *GitHubClient) checkRateLimit(resp *github.Response) {
if resp != nil && resp.Response != nil {
if remaining := resp.Header.Get("X-RateLimit-Remaining"); remaining != "" {
if val, err := strconv.Atoi(remaining); err == nil {
g.rateLimiter.remaining = val
}
}
if reset := resp.Header.Get("X-RateLimit-Reset"); reset != "" {
if val, err := strconv.ParseInt(reset, 10, 64); err == nil {
g.rateLimiter.resetTime = time.Unix(val, 0)
}
}
g.rateLimiter.lastChecked = time.Now()
if g.config.Verbose {
log.Printf("Rate limit remaining: %d, resets at: %v",
g.rateLimiter.remaining, g.rateLimiter.resetTime)
}
}
}
// waitForRateLimit waits if we're close to hitting rate limits
func (g *GitHubClient) waitForRateLimit(ctx context.Context) error {
// If we have less than 100 requests remaining, wait for reset
if g.rateLimiter.remaining < 100 && !g.rateLimiter.resetTime.IsZero() {
waitTime := time.Until(g.rateLimiter.resetTime)
if waitTime > 0 && waitTime < 1*time.Hour {
log.Printf("Approaching rate limit (%d remaining). Waiting %v for reset...",
g.rateLimiter.remaining, waitTime)
select {
case <-time.After(waitTime + 5*time.Second):
log.Println("Rate limit reset. Continuing...")
return nil
case <-ctx.Done():
return ctx.Err()
}
}
}
return nil
}
// GetRepositories fetches repositories for a user or organization with pagination
func (g *GitHubClient) GetRepositories(ctx context.Context) ([]types.Repository, error) {
var allRepos []types.Repository
cacheKey := "repos_"
if g.config.User != "" {
cacheKey += g.config.User
} else {
cacheKey += g.config.Organization
}
if cachedRepos, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Println("Using cached repositories")
}
return cachedRepos.([]types.Repository), nil
}
if g.config.Verbose {
log.Println("Fetching repositories from GitHub API")
}
page := 1
for {
// Check rate limit before making request
if err := g.waitForRateLimit(ctx); err != nil {
return nil, err
}
var repos []*github.Repository
var resp *github.Response
var err error
if g.config.User != "" {
opts := &github.RepositoryListOptions{
ListOptions: github.ListOptions{PerPage: 100, Page: page},
Sort: "updated",
}
repos, resp, err = g.client.Repositories.List(ctx, g.config.User, opts)
} else {
opts := &github.RepositoryListByOrgOptions{
ListOptions: github.ListOptions{PerPage: 100, Page: page},
Sort: "updated",
}
repos, resp, err = g.client.Repositories.ListByOrg(ctx, g.config.Organization, opts)
}
if err != nil {
return nil, fmt.Errorf("error fetching repositories (page %d): %w", page, err)
}
g.checkRateLimit(resp)
for _, repo := range repos {
allRepos = append(allRepos, convertRepository(repo))
}
if g.config.Verbose {
log.Printf("Fetched page %d with %d repositories", page, len(repos))
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
// Add a small delay between requests to be respectful
select {
case <-time.After(100 * time.Millisecond):
case <-ctx.Done():
return nil, ctx.Err()
}
}
g.cache.Set(cacheKey, allRepos)
return allRepos, nil
}
// GetPullRequests fetches open pull requests with enhanced pagination
func (g *GitHubClient) GetPullRequests(ctx context.Context, owner, repo string) ([]types.PullRequest, error) {
var allPRs []types.PullRequest
cacheKey := fmt.Sprintf("prs_%s_%s", owner, repo)
if cachedPRs, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Printf("Using cached pull requests for %s/%s", owner, repo)
}
return cachedPRs.([]types.PullRequest), nil
}
if g.config.Verbose {
log.Printf("Fetching pull requests for %s/%s", owner, repo)
}
page := 1
for {
if err := g.waitForRateLimit(ctx); err != nil {
return nil, err
}
opts := &github.PullRequestListOptions{
State: "open",
Sort: "updated",
Direction: "desc",
ListOptions: github.ListOptions{PerPage: 100, Page: page},
}
prs, resp, err := g.client.PullRequests.List(ctx, owner, repo, opts)
if err != nil {
return nil, fmt.Errorf("error fetching pull requests (page %d): %w", page, err)
}
g.checkRateLimit(resp)
for _, pr := range prs {
allPRs = append(allPRs, convertPullRequest(pr))
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
select {
case <-time.After(50 * time.Millisecond):
case <-ctx.Done():
return nil, ctx.Err()
}
}
g.cache.Set(cacheKey, allPRs)
return allPRs, nil
}
// GetIssues fetches open issues with enhanced pagination
func (g *GitHubClient) GetIssues(ctx context.Context, owner, repo string) ([]types.Issue, error) {
var allIssues []types.Issue
cacheKey := fmt.Sprintf("issues_%s_%s", owner, repo)
if cachedIssues, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Printf("Using cached issues for %s/%s", owner, repo)
}
return cachedIssues.([]types.Issue), nil
}
if g.config.Verbose {
log.Printf("Fetching issues for %s/%s", owner, repo)
}
page := 1
for {
if err := g.waitForRateLimit(ctx); err != nil {
return nil, err
}
opts := &github.IssueListByRepoOptions{
State: "open",
Sort: "updated",
Direction: "desc",
ListOptions: github.ListOptions{PerPage: 100, Page: page},
}
issues, resp, err := g.client.Issues.ListByRepo(ctx, owner, repo, opts)
if err != nil {
return nil, fmt.Errorf("error fetching issues (page %d): %w", page, err)
}
g.checkRateLimit(resp)
for _, issue := range issues {
if issue.PullRequestLinks != nil {
continue
}
allIssues = append(allIssues, convertIssue(issue))
}
if resp.NextPage == 0 {
break
}
page = resp.NextPage
select {
case <-time.After(50 * time.Millisecond):
case <-ctx.Done():
return nil, ctx.Err()
}
}
g.cache.Set(cacheKey, allIssues)
return allIssues, nil
}
// GetWorkflowRuns fetches recent workflow runs with rate limiting
func (g *GitHubClient) GetWorkflowRuns(ctx context.Context, owner, repo string) ([]types.WorkflowRun, error) {
var allRuns []types.WorkflowRun
cacheKey := fmt.Sprintf("workflow_runs_%s_%s", owner, repo)
if cachedRuns, found := g.cache.Get(cacheKey); found {
if g.config.Verbose {
log.Printf("Using cached workflow runs for %s/%s", owner, repo)
}
return cachedRuns.([]types.WorkflowRun), nil
}
if g.config.Verbose {
log.Printf("Fetching workflow runs for %s/%s", owner, repo)
}
if err := g.waitForRateLimit(ctx); err != nil {
return nil, err
}
opts := &github.ListWorkflowRunsOptions{
ListOptions: github.ListOptions{PerPage: 10},
}
runs, resp, err := g.client.Actions.ListRepositoryWorkflowRuns(ctx, owner, repo, opts)
if err != nil {
return nil, fmt.Errorf("error fetching workflow runs: %w", err)
}
g.checkRateLimit(resp)
for _, run := range runs.WorkflowRuns {
allRuns = append(allRuns, convertWorkflowRun(run))
}
g.cache.Set(cacheKey, allRuns)
return allRuns, nil
}