perf: optimize episode fetching and metadata scraping

This commit is contained in:
2026-05-02 23:57:24 +02:00
parent 56f0951d5e
commit a83ab2e33f
7 changed files with 341 additions and 71 deletions

View File

@@ -12,6 +12,7 @@ import (
"strconv"
"strings"
"sync"
"time"
"mal/integrations/jikan"
database "mal/internal/db"
@@ -57,32 +58,36 @@ func (h *Handler) HandleWatchPage(w http.ResponseWriter, r *http.Request) {
return
}
// Try to get video episodes first (for thumbnails)
episodes, err := h.jikanClient.GetVideoEpisodes(r.Context(), id, 1)
if err != nil || len(episodes.Data) == 0 {
// Fallback to standard episodes if no video episodes
episodes, err = h.jikanClient.GetEpisodes(r.Context(), id, 1)
if err != nil {
log.Printf("watch error: %v", err)
// Get essential episodes (first and last pages)
allEpisodes, err := h.jikanClient.GetAllEpisodes(r.Context(), id)
if err != nil {
log.Printf("watch error fetching episodes: %v", err)
}
// Fetch any metadata overlays (thumbnails)
videoEpisodes, _ := h.jikanClient.GetVideoEpisodes(r.Context(), id, 1)
videoMeta := make(map[int]jikan.Episode)
for _, ve := range videoEpisodes.Data {
videoMeta[ve.MalID] = ve
}
for i, ep := range allEpisodes {
if ve, ok := videoMeta[ep.MalID]; ok {
if ve.Images != nil && ve.Images.Jpg.ImageURL != "" {
allEpisodes[i].Images = ve.Images
}
}
}
var wg sync.WaitGroup
for i := range episodes.Data {
if episodes.Data[i].Images == nil {
episodes.Data[i].Images = &jikan.EpisodeImages{}
// Deduplicate and prep the list
seen := make(map[int]bool)
unique := make([]jikan.Episode, 0)
for _, ep := range allEpisodes {
if !seen[ep.MalID] {
seen[ep.MalID] = true
unique = append(unique, ep)
}
wg.Add(1)
go func(idx int) {
defer wg.Done()
episodes.Data[idx].Images.Jpg.ImageURL = episodes.Data[idx].GetFallbackImage(id)
}(i)
}
wg.Wait()
sort.Slice(episodes.Data, func(i, j int) bool {
return episodes.Data[i].MalID < episodes.Data[j].MalID
})
user := middleware.GetUser(r.Context())
@@ -131,52 +136,75 @@ func (h *Handler) HandleWatchPage(w http.ResponseWriter, r *http.Request) {
}
}
if maxCount > len(episodes.Data) {
// Fetch metadata for the missing episodes
start := len(episodes.Data) + 1
for i := start; i <= maxCount; i++ {
epStr := strconv.Itoa(i)
meta, err := h.svc.GetEpisodeMetadata(r.Context(), id, epStr)
epMap := make(map[int]jikan.Episode)
for _, ep := range unique {
epMap[ep.MalID] = ep
}
title := fmt.Sprintf("Episode %d", i)
imgURL := ""
if err == nil && meta != nil {
if info, ok := meta["episodeInfo"].(map[string]any); ok {
if thumbs, ok := info["thumbnails"].([]any); ok && len(thumbs) > 0 {
if firstThumb, ok := thumbs[0].(string); ok {
imgURL = firstThumb
}
}
}
if notes, ok := meta["notes"].(string); ok && notes != "" {
title = notes
}
if maxCount > 0 {
var fullList []jikan.Episode
for i := 1; i <= maxCount; i++ {
if ep, ok := epMap[i]; ok {
fullList = append(fullList, ep)
} else {
fullList = append(fullList, jikan.Episode{
MalID: i,
Episode: fmt.Sprintf("Episode %d", i),
Title: fmt.Sprintf("Episode %d", i),
})
}
if imgURL == "" {
// Last resort fallback
tmpEp := jikan.Episode{MalID: i}
imgURL = tmpEp.GetFallbackImage(id)
}
episodes.Data = append(episodes.Data, jikan.Episode{
MalID: i,
Episode: fmt.Sprintf("Episode %d", i),
Title: title,
Images: &jikan.EpisodeImages{
Jpg: struct {
ImageURL string `json:"image_url"`
}{ImageURL: imgURL},
},
})
}
unique = fullList
}
}
// Update episodes list if fallback has more
if watchData.FallbackEpisodes != nil {
maxCount := 0
for _, count := range watchData.FallbackEpisodes {
if count > maxCount {
maxCount = count
}
}
// Ensure we don't have duplicates or missing episodes in the sequence
epMap := make(map[int]jikan.Episode)
for _, ep := range unique {
epMap[ep.MalID] = ep
}
if maxCount > 0 {
var newEpisodes []jikan.Episode
// We build the list from 1 to maxCount to ensure order and completeness
// If we have data from Jikan, we use it. Otherwise we generate a placeholder.
for i := 1; i <= maxCount; i++ {
if ep, ok := epMap[i]; ok {
newEpisodes = append(newEpisodes, ep)
} else {
title := fmt.Sprintf("Episode %d", i)
newEpisodes = append(newEpisodes, jikan.Episode{
MalID: i,
Episode: fmt.Sprintf("Episode %d", i),
Title: title,
Images: &jikan.EpisodeImages{
Jpg: struct {
ImageURL string `json:"image_url"`
}{ImageURL: ""},
},
})
}
}
unique = newEpisodes
}
}
sort.Slice(unique, func(i, j int) bool {
return unique[i].MalID < unique[j].MalID
})
if err := templates.GetRenderer().ExecuteTemplate(r.Context(), w, "watch.gohtml", map[string]any{
"Anime": anime,
"Episodes": episodes.Data,
"Episodes": unique,
"WatchData": watchData,
"User": user,
"CurrentPath": r.URL.Path,
@@ -375,3 +403,119 @@ func (h *Handler) HandleEpisodeData(w http.ResponseWriter, r *http.Request) {
"episode_title": "", // Find episode title if possible
})
}
func (h *Handler) HandleEpisodeThumbnails(w http.ResponseWriter, r *http.Request) {
parts := strings.Split(r.URL.Path, "/")
// /api/watch/thumbnails/{animeId}
if len(parts) < 5 {
http.Error(w, "invalid path", http.StatusBadRequest)
return
}
id, err := strconv.Atoi(parts[4])
if err != nil {
http.Error(w, "invalid animeId", http.StatusBadRequest)
return
}
// Get essential episodes (first and last pages)
allEpisodes, err := h.jikanClient.GetAllEpisodes(r.Context(), id)
if err != nil {
http.Error(w, "failed to get episodes", http.StatusInternalServerError)
return
}
// Also get video episodes for richer metadata (thumbnails) on recent episodes
videoEpisodes, _ := h.jikanClient.GetVideoEpisodes(r.Context(), id, 1)
// Merge metadata
videoMeta := make(map[int]jikan.Episode)
for _, ve := range videoEpisodes.Data {
videoMeta[ve.MalID] = ve
}
for i, ep := range allEpisodes {
if ve, ok := videoMeta[ep.MalID]; ok {
if ve.Images != nil && ve.Images.Jpg.ImageURL != "" {
allEpisodes[i].Images = ve.Images
}
}
}
// Dedup and sort
seen := make(map[int]bool)
unique := make([]jikan.Episode, 0, len(allEpisodes))
for _, ep := range allEpisodes {
if !seen[ep.MalID] {
seen[ep.MalID] = true
unique = append(unique, ep)
}
}
// Calculate total count from anime info for complete list
anime, _ := h.jikanClient.GetAnimeByID(r.Context(), id)
maxCount := anime.Episodes
epMap := make(map[int]jikan.Episode)
for _, ep := range unique {
epMap[ep.MalID] = ep
}
if maxCount > 0 {
var fullList []jikan.Episode
for i := 1; i <= maxCount; i++ {
if ep, ok := epMap[i]; ok {
fullList = append(fullList, ep)
} else {
fullList = append(fullList, jikan.Episode{
MalID: i,
Episode: fmt.Sprintf("Episode %d", i),
Title: fmt.Sprintf("Episode %d", i),
})
}
}
unique = fullList
}
sort.Slice(unique, func(i, j int) bool {
return unique[i].MalID < unique[j].MalID
})
type ThumbResult struct {
MalID int `json:"mal_id"`
URL string `json:"url"`
Title string `json:"title,omitempty"`
}
results := make([]ThumbResult, len(unique))
// Use a semaphore to limit concurrent scraping requests to avoid MAL bans
sem := make(chan struct{}, 2)
var wg sync.WaitGroup
for i := range unique {
wg.Add(1)
go func(idx int) {
defer wg.Done()
sem <- struct{}{} // Acquire
// Add a small jittered delay between requests to avoid 405/429
time.Sleep(time.Duration(200+idx%300) * time.Millisecond)
defer func() { <-sem }() // Release
ep := unique[idx]
imgURL := ep.GetFallbackImage(id)
results[idx] = ThumbResult{
MalID: ep.MalID,
URL: imgURL,
Title: ep.Title,
}
}(i)
}
wg.Wait()
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(results)
}