feat: parallelize episode image fallback scraping

This commit is contained in:
2026-05-02 17:15:21 +02:00
committed by Mikkel Elvers
parent 2850c56143
commit 8fb7b1b72f
3 changed files with 43 additions and 22 deletions

View File

@@ -8,10 +8,11 @@ import (
"sort"
"strconv"
"strings"
"sync"
"mal/integrations/jikan"
ctxpkg "mal/internal/context"
"mal/internal/db"
database "mal/internal/db"
"mal/templates"
)
@@ -63,9 +64,18 @@ func (h *Handler) HandleWatchPage(w http.ResponseWriter, r *http.Request) {
}
}
var wg sync.WaitGroup
for i := range episodes.Data {
episodes.Data[i].Images.Jpg.ImageURL = episodes.Data[i].GetFallbackImage(id)
if episodes.Data[i].Images == nil {
episodes.Data[i].Images = &jikan.EpisodeImages{}
}
wg.Add(1)
go func(idx int) {
defer wg.Done()
episodes.Data[idx].Images.Jpg.ImageURL = episodes.Data[idx].GetFallbackImage(id)
}(i)
}
wg.Wait()
sort.Slice(episodes.Data, func(i, j int) bool {
return episodes.Data[i].MalID < episodes.Data[j].MalID

View File

@@ -38,16 +38,16 @@ func (e *Episode) GetFallbackImage(animeID int) string {
}
// Always trigger scraping if we encounter the banned icon OR the generic placeholder
if imageUrl != bannedImageURL && imageUrl != placeholderImageURL && imageUrl != "" {
return imageUrl
}
episodeURL := fmt.Sprintf("https://myanimelist.net/anime/%d/episode/%d", animeID, episodeNum)
// OR if there is no image URL at all
if imageUrl == bannedImageURL || imageUrl == placeholderImageURL || imageUrl == "" {
// MAL URLs usually follow this format, and it redirects to the slug version
episodeURL := fmt.Sprintf("https://myanimelist.net/anime/%d/_/episode/%d", animeID, episodeNum)
fallbackURL := scrapeAnimeImageFromEpisodePage(episodeURL, episodeNum)
if fallbackURL != "" {
return fallbackURL
}
}
return imageUrl
}
@@ -66,7 +66,9 @@ func scrapeAnimeImageFromEpisodePage(episodeURL string, episodeNum int) string {
}
defer resp.Body.Close()
// Log the status code for debugging
if resp.StatusCode != 200 {
// fmt.Printf("[DEBUG] Failed to fetch %s: Status %d\n", episodeURL, resp.StatusCode)
return ""
}
@@ -77,11 +79,18 @@ func scrapeAnimeImageFromEpisodePage(episodeURL string, episodeNum int) string {
html := string(body)
// Look for the JSON data in MAL.episodeVideo.aroundVideos
// MAL sometimes redirects to a URL with a slug.
// The JSON object is very likely to be present in the full page.
// We extract the object {} containing "episode_number":X
episodeStr := strconv.Itoa(episodeNum)
objPattern := regexp.MustCompile(`\{[^{}]*"episode_number":\s*` + episodeStr + `[^{}]*\}`)
match := objPattern.FindString(html)
if match == "" {
// Try a broader search if the strict one fails
objPattern = regexp.MustCompile(`\{[^}]*"episode_number":\s*` + episodeStr + `[^}]*\}`)
match = objPattern.FindString(html)
}
if match != "" {
thumbRe := regexp.MustCompile(`"thumbnail":\s*"([^"]+)"`)
thumbMatch := thumbRe.FindStringSubmatch(match)

View File

@@ -166,17 +166,19 @@ type TopAnimeResponse struct {
Pagination Pagination `json:"pagination"`
}
type EpisodeImages struct {
Jpg struct {
ImageURL string `json:"image_url"`
} `json:"jpg"`
}
type Episode struct {
MalID int `json:"mal_id"`
Title string `json:"title"`
Episode string `json:"episode"`
Filler bool `json:"filler"`
Recap bool `json:"recap"`
Images *struct {
Jpg struct {
ImageURL string `json:"image_url"`
} `json:"jpg"`
} `json:"images,omitempty"`
Images *EpisodeImages `json:"images,omitempty"`
}
type EpisodesResponse struct {