feat: parallelize episode image fallback scraping
This commit is contained in:
@@ -8,10 +8,11 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"mal/integrations/jikan"
|
||||
ctxpkg "mal/internal/context"
|
||||
"mal/internal/db"
|
||||
database "mal/internal/db"
|
||||
"mal/templates"
|
||||
)
|
||||
|
||||
@@ -63,9 +64,18 @@ func (h *Handler) HandleWatchPage(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := range episodes.Data {
|
||||
episodes.Data[i].Images.Jpg.ImageURL = episodes.Data[i].GetFallbackImage(id)
|
||||
if episodes.Data[i].Images == nil {
|
||||
episodes.Data[i].Images = &jikan.EpisodeImages{}
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(idx int) {
|
||||
defer wg.Done()
|
||||
episodes.Data[idx].Images.Jpg.ImageURL = episodes.Data[idx].GetFallbackImage(id)
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
sort.Slice(episodes.Data, func(i, j int) bool {
|
||||
return episodes.Data[i].MalID < episodes.Data[j].MalID
|
||||
|
||||
@@ -38,15 +38,15 @@ func (e *Episode) GetFallbackImage(animeID int) string {
|
||||
}
|
||||
|
||||
// Always trigger scraping if we encounter the banned icon OR the generic placeholder
|
||||
if imageUrl != bannedImageURL && imageUrl != placeholderImageURL && imageUrl != "" {
|
||||
return imageUrl
|
||||
}
|
||||
|
||||
episodeURL := fmt.Sprintf("https://myanimelist.net/anime/%d/episode/%d", animeID, episodeNum)
|
||||
fallbackURL := scrapeAnimeImageFromEpisodePage(episodeURL, episodeNum)
|
||||
|
||||
if fallbackURL != "" {
|
||||
return fallbackURL
|
||||
// OR if there is no image URL at all
|
||||
if imageUrl == bannedImageURL || imageUrl == placeholderImageURL || imageUrl == "" {
|
||||
// MAL URLs usually follow this format, and it redirects to the slug version
|
||||
episodeURL := fmt.Sprintf("https://myanimelist.net/anime/%d/_/episode/%d", animeID, episodeNum)
|
||||
fallbackURL := scrapeAnimeImageFromEpisodePage(episodeURL, episodeNum)
|
||||
|
||||
if fallbackURL != "" {
|
||||
return fallbackURL
|
||||
}
|
||||
}
|
||||
|
||||
return imageUrl
|
||||
@@ -66,7 +66,9 @@ func scrapeAnimeImageFromEpisodePage(episodeURL string, episodeNum int) string {
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Log the status code for debugging
|
||||
if resp.StatusCode != 200 {
|
||||
// fmt.Printf("[DEBUG] Failed to fetch %s: Status %d\n", episodeURL, resp.StatusCode)
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -77,11 +79,18 @@ func scrapeAnimeImageFromEpisodePage(episodeURL string, episodeNum int) string {
|
||||
|
||||
html := string(body)
|
||||
|
||||
// Look for the JSON data in MAL.episodeVideo.aroundVideos
|
||||
// MAL sometimes redirects to a URL with a slug.
|
||||
// The JSON object is very likely to be present in the full page.
|
||||
// We extract the object {} containing "episode_number":X
|
||||
episodeStr := strconv.Itoa(episodeNum)
|
||||
objPattern := regexp.MustCompile(`\{[^{}]*"episode_number":\s*` + episodeStr + `[^{}]*\}`)
|
||||
match := objPattern.FindString(html)
|
||||
if match == "" {
|
||||
// Try a broader search if the strict one fails
|
||||
objPattern = regexp.MustCompile(`\{[^}]*"episode_number":\s*` + episodeStr + `[^}]*\}`)
|
||||
match = objPattern.FindString(html)
|
||||
}
|
||||
|
||||
if match != "" {
|
||||
thumbRe := regexp.MustCompile(`"thumbnail":\s*"([^"]+)"`)
|
||||
thumbMatch := thumbRe.FindStringSubmatch(match)
|
||||
|
||||
@@ -166,17 +166,19 @@ type TopAnimeResponse struct {
|
||||
Pagination Pagination `json:"pagination"`
|
||||
}
|
||||
|
||||
type EpisodeImages struct {
|
||||
Jpg struct {
|
||||
ImageURL string `json:"image_url"`
|
||||
} `json:"jpg"`
|
||||
}
|
||||
|
||||
type Episode struct {
|
||||
MalID int `json:"mal_id"`
|
||||
Title string `json:"title"`
|
||||
Episode string `json:"episode"`
|
||||
Filler bool `json:"filler"`
|
||||
Recap bool `json:"recap"`
|
||||
Images *struct {
|
||||
Jpg struct {
|
||||
ImageURL string `json:"image_url"`
|
||||
} `json:"jpg"`
|
||||
} `json:"images,omitempty"`
|
||||
MalID int `json:"mal_id"`
|
||||
Title string `json:"title"`
|
||||
Episode string `json:"episode"`
|
||||
Filler bool `json:"filler"`
|
||||
Recap bool `json:"recap"`
|
||||
Images *EpisodeImages `json:"images,omitempty"`
|
||||
}
|
||||
|
||||
type EpisodesResponse struct {
|
||||
|
||||
Reference in New Issue
Block a user