perf: optimize episode fetching and metadata scraping

This commit is contained in:
2026-05-02 23:57:24 +02:00
parent 56f0951d5e
commit a83ab2e33f
7 changed files with 341 additions and 71 deletions

View File

@@ -12,6 +12,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"time"
"mal/integrations/jikan" "mal/integrations/jikan"
database "mal/internal/db" database "mal/internal/db"
@@ -57,32 +58,36 @@ func (h *Handler) HandleWatchPage(w http.ResponseWriter, r *http.Request) {
return return
} }
// Try to get video episodes first (for thumbnails) // Get essential episodes (first and last pages)
episodes, err := h.jikanClient.GetVideoEpisodes(r.Context(), id, 1) allEpisodes, err := h.jikanClient.GetAllEpisodes(r.Context(), id)
if err != nil || len(episodes.Data) == 0 { if err != nil {
// Fallback to standard episodes if no video episodes log.Printf("watch error fetching episodes: %v", err)
episodes, err = h.jikanClient.GetEpisodes(r.Context(), id, 1) }
if err != nil {
log.Printf("watch error: %v", err) // Fetch any metadata overlays (thumbnails)
videoEpisodes, _ := h.jikanClient.GetVideoEpisodes(r.Context(), id, 1)
videoMeta := make(map[int]jikan.Episode)
for _, ve := range videoEpisodes.Data {
videoMeta[ve.MalID] = ve
}
for i, ep := range allEpisodes {
if ve, ok := videoMeta[ep.MalID]; ok {
if ve.Images != nil && ve.Images.Jpg.ImageURL != "" {
allEpisodes[i].Images = ve.Images
}
} }
} }
var wg sync.WaitGroup // Deduplicate and prep the list
for i := range episodes.Data { seen := make(map[int]bool)
if episodes.Data[i].Images == nil { unique := make([]jikan.Episode, 0)
episodes.Data[i].Images = &jikan.EpisodeImages{} for _, ep := range allEpisodes {
if !seen[ep.MalID] {
seen[ep.MalID] = true
unique = append(unique, ep)
} }
wg.Add(1)
go func(idx int) {
defer wg.Done()
episodes.Data[idx].Images.Jpg.ImageURL = episodes.Data[idx].GetFallbackImage(id)
}(i)
} }
wg.Wait()
sort.Slice(episodes.Data, func(i, j int) bool {
return episodes.Data[i].MalID < episodes.Data[j].MalID
})
user := middleware.GetUser(r.Context()) user := middleware.GetUser(r.Context())
@@ -131,52 +136,75 @@ func (h *Handler) HandleWatchPage(w http.ResponseWriter, r *http.Request) {
} }
} }
if maxCount > len(episodes.Data) { epMap := make(map[int]jikan.Episode)
// Fetch metadata for the missing episodes for _, ep := range unique {
start := len(episodes.Data) + 1 epMap[ep.MalID] = ep
for i := start; i <= maxCount; i++ { }
epStr := strconv.Itoa(i)
meta, err := h.svc.GetEpisodeMetadata(r.Context(), id, epStr)
title := fmt.Sprintf("Episode %d", i) if maxCount > 0 {
imgURL := "" var fullList []jikan.Episode
for i := 1; i <= maxCount; i++ {
if err == nil && meta != nil { if ep, ok := epMap[i]; ok {
if info, ok := meta["episodeInfo"].(map[string]any); ok { fullList = append(fullList, ep)
if thumbs, ok := info["thumbnails"].([]any); ok && len(thumbs) > 0 { } else {
if firstThumb, ok := thumbs[0].(string); ok { fullList = append(fullList, jikan.Episode{
imgURL = firstThumb MalID: i,
} Episode: fmt.Sprintf("Episode %d", i),
} Title: fmt.Sprintf("Episode %d", i),
} })
if notes, ok := meta["notes"].(string); ok && notes != "" {
title = notes
}
} }
if imgURL == "" {
// Last resort fallback
tmpEp := jikan.Episode{MalID: i}
imgURL = tmpEp.GetFallbackImage(id)
}
episodes.Data = append(episodes.Data, jikan.Episode{
MalID: i,
Episode: fmt.Sprintf("Episode %d", i),
Title: title,
Images: &jikan.EpisodeImages{
Jpg: struct {
ImageURL string `json:"image_url"`
}{ImageURL: imgURL},
},
})
} }
unique = fullList
} }
} }
// Update episodes list if fallback has more
if watchData.FallbackEpisodes != nil {
maxCount := 0
for _, count := range watchData.FallbackEpisodes {
if count > maxCount {
maxCount = count
}
}
// Ensure we don't have duplicates or missing episodes in the sequence
epMap := make(map[int]jikan.Episode)
for _, ep := range unique {
epMap[ep.MalID] = ep
}
if maxCount > 0 {
var newEpisodes []jikan.Episode
// We build the list from 1 to maxCount to ensure order and completeness
// If we have data from Jikan, we use it. Otherwise we generate a placeholder.
for i := 1; i <= maxCount; i++ {
if ep, ok := epMap[i]; ok {
newEpisodes = append(newEpisodes, ep)
} else {
title := fmt.Sprintf("Episode %d", i)
newEpisodes = append(newEpisodes, jikan.Episode{
MalID: i,
Episode: fmt.Sprintf("Episode %d", i),
Title: title,
Images: &jikan.EpisodeImages{
Jpg: struct {
ImageURL string `json:"image_url"`
}{ImageURL: ""},
},
})
}
}
unique = newEpisodes
}
}
sort.Slice(unique, func(i, j int) bool {
return unique[i].MalID < unique[j].MalID
})
if err := templates.GetRenderer().ExecuteTemplate(r.Context(), w, "watch.gohtml", map[string]any{ if err := templates.GetRenderer().ExecuteTemplate(r.Context(), w, "watch.gohtml", map[string]any{
"Anime": anime, "Anime": anime,
"Episodes": episodes.Data, "Episodes": unique,
"WatchData": watchData, "WatchData": watchData,
"User": user, "User": user,
"CurrentPath": r.URL.Path, "CurrentPath": r.URL.Path,
@@ -375,3 +403,119 @@ func (h *Handler) HandleEpisodeData(w http.ResponseWriter, r *http.Request) {
"episode_title": "", // Find episode title if possible "episode_title": "", // Find episode title if possible
}) })
} }
func (h *Handler) HandleEpisodeThumbnails(w http.ResponseWriter, r *http.Request) {
parts := strings.Split(r.URL.Path, "/")
// /api/watch/thumbnails/{animeId}
if len(parts) < 5 {
http.Error(w, "invalid path", http.StatusBadRequest)
return
}
id, err := strconv.Atoi(parts[4])
if err != nil {
http.Error(w, "invalid animeId", http.StatusBadRequest)
return
}
// Get essential episodes (first and last pages)
allEpisodes, err := h.jikanClient.GetAllEpisodes(r.Context(), id)
if err != nil {
http.Error(w, "failed to get episodes", http.StatusInternalServerError)
return
}
// Also get video episodes for richer metadata (thumbnails) on recent episodes
videoEpisodes, _ := h.jikanClient.GetVideoEpisodes(r.Context(), id, 1)
// Merge metadata
videoMeta := make(map[int]jikan.Episode)
for _, ve := range videoEpisodes.Data {
videoMeta[ve.MalID] = ve
}
for i, ep := range allEpisodes {
if ve, ok := videoMeta[ep.MalID]; ok {
if ve.Images != nil && ve.Images.Jpg.ImageURL != "" {
allEpisodes[i].Images = ve.Images
}
}
}
// Dedup and sort
seen := make(map[int]bool)
unique := make([]jikan.Episode, 0, len(allEpisodes))
for _, ep := range allEpisodes {
if !seen[ep.MalID] {
seen[ep.MalID] = true
unique = append(unique, ep)
}
}
// Calculate total count from anime info for complete list
anime, _ := h.jikanClient.GetAnimeByID(r.Context(), id)
maxCount := anime.Episodes
epMap := make(map[int]jikan.Episode)
for _, ep := range unique {
epMap[ep.MalID] = ep
}
if maxCount > 0 {
var fullList []jikan.Episode
for i := 1; i <= maxCount; i++ {
if ep, ok := epMap[i]; ok {
fullList = append(fullList, ep)
} else {
fullList = append(fullList, jikan.Episode{
MalID: i,
Episode: fmt.Sprintf("Episode %d", i),
Title: fmt.Sprintf("Episode %d", i),
})
}
}
unique = fullList
}
sort.Slice(unique, func(i, j int) bool {
return unique[i].MalID < unique[j].MalID
})
type ThumbResult struct {
MalID int `json:"mal_id"`
URL string `json:"url"`
Title string `json:"title,omitempty"`
}
results := make([]ThumbResult, len(unique))
// Use a semaphore to limit concurrent scraping requests to avoid MAL bans
sem := make(chan struct{}, 2)
var wg sync.WaitGroup
for i := range unique {
wg.Add(1)
go func(idx int) {
defer wg.Done()
sem <- struct{}{} // Acquire
// Add a small jittered delay between requests to avoid 405/429
time.Sleep(time.Duration(200+idx%300) * time.Millisecond)
defer func() { <-sem }() // Release
ep := unique[idx]
imgURL := ep.GetFallbackImage(id)
results[idx] = ThumbResult{
MalID: ep.MalID,
URL: imgURL,
Title: ep.Title,
}
}(i)
}
wg.Wait()
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(results)
}

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"fmt" "fmt"
"io" "io"
"log"
"net/http" "net/http"
"regexp" "regexp"
"strconv" "strconv"
@@ -40,8 +41,9 @@ func (e *Episode) GetFallbackImage(animeID int) string {
// Always trigger scraping if we encounter the banned icon OR the generic placeholder // Always trigger scraping if we encounter the banned icon OR the generic placeholder
// OR if there is no image URL at all // OR if there is no image URL at all
if imageUrl == bannedImageURL || imageUrl == placeholderImageURL || imageUrl == "" { if imageUrl == bannedImageURL || imageUrl == placeholderImageURL || imageUrl == "" {
// MAL URLs usually follow this format, and it redirects to the slug version // MAL URLs follow this format: https://myanimelist.net/anime/20/Naruto/episode/1
episodeURL := fmt.Sprintf("https://myanimelist.net/anime/%d/_/episode/%d", animeID, episodeNum) // The previous format used /_/ which is sometimes rejected with 405
episodeURL := fmt.Sprintf("https://myanimelist.net/anime/%d/slug/episode/%d", animeID, episodeNum)
fallbackURL := scrapeAnimeImageFromEpisodePage(episodeURL, episodeNum) fallbackURL := scrapeAnimeImageFromEpisodePage(episodeURL, episodeNum)
if fallbackURL != "" { if fallbackURL != "" {
@@ -68,7 +70,7 @@ func scrapeAnimeImageFromEpisodePage(episodeURL string, episodeNum int) string {
// Log the status code for debugging // Log the status code for debugging
if resp.StatusCode != 200 { if resp.StatusCode != 200 {
// fmt.Printf("[DEBUG] Failed to fetch %s: Status %d\n", episodeURL, resp.StatusCode) log.Printf("[DEBUG] Scraper failed to fetch %s: Status %d", episodeURL, resp.StatusCode)
return "" return ""
} }
@@ -80,8 +82,9 @@ func scrapeAnimeImageFromEpisodePage(episodeURL string, episodeNum int) string {
html := string(body) html := string(body)
// MAL sometimes redirects to a URL with a slug. // MAL sometimes redirects to a URL with a slug.
// The JSON object is very likely to be present in the full page. // We look for the "thumbnail" field in the page source.
// We extract the object {} containing "episode_number":X
// Pattern 1: Look for the specific episode object in the JSON data
episodeStr := strconv.Itoa(episodeNum) episodeStr := strconv.Itoa(episodeNum)
objPattern := regexp.MustCompile(`\{[^{}]*"episode_number":\s*` + episodeStr + `[^{}]*\}`) objPattern := regexp.MustCompile(`\{[^{}]*"episode_number":\s*` + episodeStr + `[^{}]*\}`)
match := objPattern.FindString(html) match := objPattern.FindString(html)
@@ -95,10 +98,19 @@ func scrapeAnimeImageFromEpisodePage(episodeURL string, episodeNum int) string {
thumbRe := regexp.MustCompile(`"thumbnail":\s*"([^"]+)"`) thumbRe := regexp.MustCompile(`"thumbnail":\s*"([^"]+)"`)
thumbMatch := thumbRe.FindStringSubmatch(match) thumbMatch := thumbRe.FindStringSubmatch(match)
if len(thumbMatch) > 1 { if len(thumbMatch) > 1 {
// Unescape backslashes in URL
return strings.ReplaceAll(thumbMatch[1], `\/`, `/`) return strings.ReplaceAll(thumbMatch[1], `\/`, `/`)
} }
} }
// Pattern 2: Fallback to og:image if it's the specific episode page
ogRe := regexp.MustCompile(`<meta\s+property="og:image"\s+content="([^"]+)"`)
ogMatch := ogRe.FindStringSubmatch(html)
if len(ogMatch) > 1 {
// Only use if it looks like an episode thumbnail (contains /episodes/)
if strings.Contains(ogMatch[1], "/episodes/") {
return ogMatch[1]
}
}
return "" return ""
} }
@@ -137,3 +149,62 @@ func (c *Client) GetEpisode(ctx context.Context, animeID int, episode int) (Epis
err := c.getWithCache(ctx, cacheKey, 24*time.Hour, reqURL, &result) err := c.getWithCache(ctx, cacheKey, 24*time.Hour, reqURL, &result)
return result, err return result, err
} }
func (c *Client) GetAllEpisodes(ctx context.Context, animeID int) ([]Episode, error) {
// First fetch the anime to get total episodes count
anime, err := c.GetAnimeByID(ctx, animeID)
if err != nil {
return nil, err
}
totalEpisodes := anime.Episodes
if totalEpisodes <= 0 {
// Fallback to simple page 1 fetch if count is unknown
resp, err := c.GetEpisodes(ctx, animeID, 1)
if err != nil {
return nil, err
}
return resp.Data, nil
}
// Jikan /episodes returns 100 per page
lastPage := (totalEpisodes + 99) / 100
var allEpisodes []Episode
// Fetch last page first (to get most recent episodes immediately)
lastResp, err := c.GetEpisodes(ctx, animeID, lastPage)
if err == nil {
allEpisodes = append(allEpisodes, lastResp.Data...)
}
// Fetch first page
if lastPage > 1 {
firstResp, err := c.GetEpisodes(ctx, animeID, 1)
if err == nil {
allEpisodes = append(allEpisodes, firstResp.Data...)
}
}
// Background fetching for intermediate pages
if lastPage > 2 {
go func() {
// Create a fresh context for background work
bgCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
for p := 2; p < lastPage; p++ {
// We don't need to store the result here if the client has an internal cache,
// but calling it ensures the data is ready for the next request.
_, _ = c.GetEpisodes(bgCtx, animeID, p)
select {
case <-bgCtx.Done():
return
case <-time.After(500 * time.Millisecond): // Rate limit buffer
}
}
}()
}
return allEpisodes, nil
}

View File

@@ -76,6 +76,10 @@ type Anime struct {
URL string `json:"url"` URL string `json:"url"`
} `json:"streaming"` } `json:"streaming"`
Relations []JikanRelationGroup `json:"relations"` Relations []JikanRelationGroup `json:"relations"`
External []struct {
Name string `json:"name"`
URL string `json:"url"`
} `json:"external"`
} }
func (a Anime) ImageURL() string { func (a Anime) ImageURL() string {

View File

@@ -87,6 +87,7 @@ func NewRouter(cfg Config) http.Handler {
mux.HandleFunc("/api/watch-progress", playbackHandler.HandleSaveProgress) mux.HandleFunc("/api/watch-progress", playbackHandler.HandleSaveProgress)
mux.HandleFunc("/api/watch-complete", playbackHandler.HandleCompleteAnime) mux.HandleFunc("/api/watch-complete", playbackHandler.HandleCompleteAnime)
mux.HandleFunc("/api/watch/episode/", playbackHandler.HandleEpisodeData) mux.HandleFunc("/api/watch/episode/", playbackHandler.HandleEpisodeData)
mux.HandleFunc("/api/watch/thumbnails/", playbackHandler.HandleEpisodeThumbnails)
// Auth Endpoints // Auth Endpoints
mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) { mux.HandleFunc("/login", func(w http.ResponseWriter, r *http.Request) {

View File

@@ -1113,6 +1113,42 @@ const initPlayer = (): void => {
showControls() showControls()
playerInitialized = true playerInitialized = true
// Fetch thumbnails and metadata in the background
fetch(`/api/watch/thumbnails/${malID}`)
.then((res) => res.json())
.then((data: Array<{ mal_id: number, url: string, title?: string }>) => {
const episodeList = document.querySelector('[data-episode-list]')
if (!episodeList) return
data.forEach((item) => {
const epCard = episodeList.querySelector(`[data-episode-id="${item.mal_id}"]`)
if (!epCard) return
if (item.url) {
const imgContainer = epCard.querySelector('.relative.aspect-video')
if (imgContainer) {
let img = imgContainer.querySelector('img')
if (!img) {
img = document.createElement('img')
img.className = 'h-full w-full object-cover transition-transform group-hover:scale-105'
img.loading = 'lazy'
const placeholder = imgContainer.querySelector('.flex.h-full.w-full.items-center.justify-center')
if (placeholder) placeholder.remove()
imgContainer.prepend(img)
}
img.src = item.url
img.alt = item.title || `Episode ${item.mal_id}`
}
}
if (item.title) {
const titleSpan = epCard.querySelector('[data-episode-title]')
if (titleSpan) titleSpan.textContent = item.title
}
})
})
.catch((err) => console.error('Failed to fetch thumbnails:', err))
} }
document.addEventListener('DOMContentLoaded', initPlayer) document.addEventListener('DOMContentLoaded', initPlayer)

View File

@@ -28,7 +28,15 @@
{{template "watchlist_actions" dict "Anime" $anime "User" .User "Status" .Status}} {{template "watchlist_actions" dict "Anime" $anime "User" .User "Status" .Status}}
<div class="space-y-4"> <div class="mt-6 flex flex-wrap gap-3">
{{range $anime.External}}
<a href="{{.URL}}" target="_blank" rel="noopener" class="bg-white/5 hover:bg-white/10 border-white/10 rounded-md border px-3 py-1.5 text-xs font-medium text-neutral-300 transition-colors">
{{.Name}}
</a>
{{end}}
</div>
<div class="mt-8 space-y-4">
<div> <div>
<h2 class="mb-2 text-lg font-medium text-neutral-300">Synopsis</h2> <h2 class="mb-2 text-lg font-medium text-neutral-300">Synopsis</h2>
<p id="synopsis-container" class="text-foreground-muted max-w-4xl text-base leading-relaxed whitespace-pre-line line-clamp-5 md:line-clamp-none"> <p id="synopsis-container" class="text-foreground-muted max-w-4xl text-base leading-relaxed whitespace-pre-line line-clamp-5 md:line-clamp-none">

View File

@@ -21,13 +21,19 @@
{{$totalEps = $fallbackSub}} {{$totalEps = $fallbackSub}}
{{end}} {{end}}
<div class="grid grid-cols-1 gap-4 sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4 xl:grid-cols-5"> <div class="grid grid-cols-1 gap-4 sm:grid-cols-2 md:grid-cols-3 lg:grid-cols-4 xl:grid-cols-5" data-episode-list>
{{range $episodes}} {{range $episodes}}
{{$isCurrent := eq (printf "%v" .MalID) $currentEpID}} {{$isCurrent := eq (printf "%v" .MalID) $currentEpID}}
<a href="/anime/{{$anime.MalID}}/watch?ep={{.MalID}}" class="group flex flex-col overflow-hidden bg-white/5 transition-colors hover:bg-white/10 {{if $isCurrent}}ring-accent ring-2{{end}}"> <a href="/anime/{{$anime.MalID}}/watch?ep={{.MalID}}" class="group flex flex-col overflow-hidden bg-white/5 transition-colors hover:bg-white/10 {{if $isCurrent}}ring-accent ring-2{{end}}" data-episode-id="{{.MalID}}">
<div class="relative aspect-video w-full overflow-hidden bg-black/50"> <div class="relative aspect-video w-full overflow-hidden bg-black/50">
{{if .Images.Jpg.ImageURL}} {{if .Images}}
<img src="{{.Images.Jpg.ImageURL}}" alt="{{.Title}}" class="h-full w-full object-cover transition-transform group-hover:scale-105" loading="lazy" /> {{if .Images.Jpg.ImageURL}}
<img src="{{.Images.Jpg.ImageURL}}" alt="{{.Title}}" class="h-full w-full object-cover transition-transform group-hover:scale-105" loading="lazy" />
{{else}}
<div class="flex h-full w-full items-center justify-center text-neutral-600">
<svg class="h-8 w-8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path stroke-linecap="round" stroke-linejoin="round" d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z"/></svg>
</div>
{{end}}
{{else}} {{else}}
<div class="flex h-full w-full items-center justify-center text-neutral-600"> <div class="flex h-full w-full items-center justify-center text-neutral-600">
<svg class="h-8 w-8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path stroke-linecap="round" stroke-linejoin="round" d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z"/></svg> <svg class="h-8 w-8" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path stroke-linecap="round" stroke-linejoin="round" d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z"/></svg>
@@ -41,7 +47,7 @@
</div> </div>
<div class="flex flex-col gap-1 p-3"> <div class="flex flex-col gap-1 p-3">
<span class="text-accent text-xs font-semibold">Episode {{.MalID}}</span> <span class="text-accent text-xs font-semibold">Episode {{.MalID}}</span>
<span class="line-clamp-2 text-sm font-medium text-neutral-200">{{.Title}}</span> <span class="line-clamp-2 text-sm font-medium text-neutral-200" data-episode-title>{{.Title}}</span>
</div> </div>
</a> </a>
{{end}} {{end}}