feat: add animeschedule integration
This commit is contained in:
479
integrations/animeschedule/animeschedule.go
Normal file
479
integrations/animeschedule/animeschedule.go
Normal file
@@ -0,0 +1,479 @@
|
||||
package animeschedule
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"mal/internal/observability"
|
||||
"mal/pkg/net/limits"
|
||||
"mal/pkg/net/useragent"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type AirType string
|
||||
|
||||
const (
|
||||
AirTypeJPN AirType = "JPN"
|
||||
AirTypeSUB AirType = "SUB"
|
||||
AirTypeDUB AirType = "DUB"
|
||||
)
|
||||
|
||||
type Entry struct {
|
||||
Title string
|
||||
AnimeURL string
|
||||
ImageURL string
|
||||
EpisodeText string
|
||||
AirType AirType
|
||||
LocalTime string
|
||||
DateLabel string
|
||||
Weekday time.Weekday
|
||||
}
|
||||
|
||||
type WeekSchedule struct {
|
||||
Year int
|
||||
Week int
|
||||
Days map[time.Weekday][]Entry
|
||||
}
|
||||
|
||||
type HTTPStatusError struct {
|
||||
StatusCode int
|
||||
URL string
|
||||
ContentType string
|
||||
BodyPreview string
|
||||
}
|
||||
|
||||
func (e *HTTPStatusError) Error() string {
|
||||
return fmt.Sprintf("unexpected status %d for %s", e.StatusCode, e.URL)
|
||||
}
|
||||
|
||||
var reWeek = regexp.MustCompile(`(?i)[?&]week=(\d+)`)
|
||||
var reYear = regexp.MustCompile(`(?i)[?&]year=(\d+)`)
|
||||
|
||||
func scheduleLocation() *time.Location {
|
||||
// Use the host's local timezone (e.g. CEST) so the schedule matches the user's environment.
|
||||
return time.Local
|
||||
}
|
||||
|
||||
func FetchWeek(ctx context.Context, httpClient *http.Client, year int, week int) (WeekSchedule, error) {
|
||||
debugScrape := os.Getenv("ANIMESCHEDULE_DEBUG_SCRAPE") == "1"
|
||||
apiToken := strings.TrimSpace(os.Getenv("ANIMESCHEDULE_API_TOKEN"))
|
||||
|
||||
if apiToken != "" {
|
||||
return fetchWeekAPI(ctx, httpClient, apiToken, year, week)
|
||||
}
|
||||
|
||||
u, _ := url.Parse("https://animeschedule.net/")
|
||||
q := u.Query()
|
||||
if year > 0 {
|
||||
q.Set("year", strconv.Itoa(year))
|
||||
}
|
||||
if week > 0 {
|
||||
q.Set("week", strconv.Itoa(week))
|
||||
}
|
||||
u.RawQuery = q.Encode()
|
||||
|
||||
doc, finalURL, err := fetchDocument(ctx, httpClient, u.String())
|
||||
if err != nil {
|
||||
return WeekSchedule{}, err
|
||||
}
|
||||
|
||||
resolvedYear := year
|
||||
resolvedWeek := week
|
||||
if resolvedWeek == 0 {
|
||||
if match := reWeek.FindStringSubmatch(finalURL); len(match) == 2 {
|
||||
if v, err := strconv.Atoi(match[1]); err == nil {
|
||||
resolvedWeek = v
|
||||
}
|
||||
}
|
||||
}
|
||||
if resolvedYear == 0 {
|
||||
if match := reYear.FindStringSubmatch(finalURL); len(match) == 2 {
|
||||
if v, err := strconv.Atoi(match[1]); err == nil {
|
||||
resolvedYear = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out := WeekSchedule{
|
||||
Year: resolvedYear,
|
||||
Week: resolvedWeek,
|
||||
Days: map[time.Weekday][]Entry{},
|
||||
}
|
||||
|
||||
doc.Find(".timetable-column").Each(func(_ int, column *goquery.Selection) {
|
||||
h1 := column.Find("h1.timetable-column-date").First()
|
||||
rawHeader := strings.Join(strings.Fields(strings.TrimSpace(h1.Text())), " ")
|
||||
weekday := parseWeekdayFromHeader(rawHeader)
|
||||
if weekday == nil {
|
||||
return
|
||||
}
|
||||
|
||||
dayEntries := make([]Entry, 0, 16)
|
||||
|
||||
column.Find(".timetable-column-show").Each(func(_ int, show *goquery.Selection) {
|
||||
if selectionHasClass(show, "filtered-out") {
|
||||
return
|
||||
}
|
||||
|
||||
a := show.Find("a.show-link").First()
|
||||
title := strings.TrimSpace(a.Find("h2").First().Text())
|
||||
if title == "" {
|
||||
title = strings.TrimSpace(a.Text())
|
||||
}
|
||||
href, _ := a.Attr("href")
|
||||
animeURL := absolutizeURL("https://animeschedule.net", href)
|
||||
|
||||
imageURL := ""
|
||||
if img := a.Find("img").First(); img != nil && img.Length() == 1 {
|
||||
if src, ok := img.Attr("data-src"); ok {
|
||||
imageURL = strings.TrimSpace(src)
|
||||
}
|
||||
if imageURL == "" {
|
||||
if src, ok := img.Attr("src"); ok && !strings.HasPrefix(src, "data:") {
|
||||
imageURL = strings.TrimSpace(src)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
meta := show.Find("h3.time-bar").First()
|
||||
metaText := strings.Join(strings.Fields(strings.TrimSpace(meta.Text())), " ")
|
||||
|
||||
epText, _, airType := parseMeta(metaText)
|
||||
localTime, rawDatetime, rawRenderedTime := parseLocalTime(meta)
|
||||
if title == "" || animeURL == "" || localTime == "" || airType == "" {
|
||||
return
|
||||
}
|
||||
|
||||
if debugScrape {
|
||||
observability.LogJSON(
|
||||
observability.LogLevelInfo,
|
||||
"animeschedule_scrape_time",
|
||||
"integrations/animeschedule",
|
||||
"scraped time info for entry",
|
||||
map[string]any{
|
||||
"title": title,
|
||||
"anime_url": animeURL,
|
||||
"meta_text": metaText,
|
||||
"raw_datetime": rawDatetime,
|
||||
"raw_renderedTime": rawRenderedTime,
|
||||
"local_time": localTime,
|
||||
"week": resolvedWeek,
|
||||
"year": resolvedYear,
|
||||
},
|
||||
nil,
|
||||
)
|
||||
}
|
||||
|
||||
dayEntries = append(dayEntries, Entry{
|
||||
Title: title,
|
||||
AnimeURL: animeURL,
|
||||
ImageURL: imageURL,
|
||||
EpisodeText: epText,
|
||||
AirType: airType,
|
||||
LocalTime: localTime,
|
||||
DateLabel: rawHeader,
|
||||
Weekday: *weekday,
|
||||
})
|
||||
})
|
||||
|
||||
if len(dayEntries) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
out.Days[*weekday] = append(out.Days[*weekday], dayEntries...)
|
||||
})
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func selectionHasClass(selection *goquery.Selection, className string) bool {
|
||||
raw, ok := selection.Attr("class")
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
for _, class := range strings.Fields(raw) {
|
||||
if class == className {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func parseWeekdayFromHeader(header string) *time.Weekday {
|
||||
lower := strings.ToLower(header)
|
||||
candidates := []struct {
|
||||
key string
|
||||
val time.Weekday
|
||||
}{
|
||||
{"monday", time.Monday},
|
||||
{"tuesday", time.Tuesday},
|
||||
{"wednesday", time.Wednesday},
|
||||
{"thursday", time.Thursday},
|
||||
{"friday", time.Friday},
|
||||
{"saturday", time.Saturday},
|
||||
{"sunday", time.Sunday},
|
||||
}
|
||||
for _, c := range candidates {
|
||||
if strings.Contains(lower, c.key) {
|
||||
v := c.val
|
||||
return &v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseMeta(meta string) (episodeText string, localTime string, airType AirType) {
|
||||
// Example: "Ep 8 04:00 PM SUB"
|
||||
parts := strings.Fields(meta)
|
||||
if len(parts) < 4 {
|
||||
return "", "", ""
|
||||
}
|
||||
|
||||
// Find the time token(s)
|
||||
var timeIdx int = -1
|
||||
for i := 0; i < len(parts); i++ {
|
||||
if strings.Contains(parts[i], ":") && len(parts[i]) >= 4 {
|
||||
timeIdx = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if timeIdx == -1 || timeIdx+2 >= len(parts) {
|
||||
return "", "", ""
|
||||
}
|
||||
|
||||
localTime = strings.TrimSpace(parts[timeIdx] + " " + parts[timeIdx+1])
|
||||
typeRaw := strings.TrimSpace(parts[timeIdx+2])
|
||||
switch strings.ToUpper(typeRaw) {
|
||||
case "JPN":
|
||||
airType = AirTypeJPN
|
||||
case "SUB":
|
||||
airType = AirTypeSUB
|
||||
case "DUB":
|
||||
airType = AirTypeDUB
|
||||
default:
|
||||
return "", "", ""
|
||||
}
|
||||
|
||||
episodeText = strings.TrimSpace(strings.Join(parts[:timeIdx], " "))
|
||||
return episodeText, localTime, airType
|
||||
}
|
||||
|
||||
func parseLocalTime(meta *goquery.Selection) (localTime string, rawDatetime string, rawRenderedTime string) {
|
||||
// AnimeSchedule updates rendered time client-side based on the viewer's timezone.
|
||||
// The server-rendered HTML can show a different time string, so we prefer the `datetime`
|
||||
// attribute when available.
|
||||
t := meta.Find("time").First()
|
||||
if t.Length() == 1 {
|
||||
rawRenderedTime = strings.Join(strings.Fields(strings.TrimSpace(t.Text())), " ")
|
||||
if raw, ok := t.Attr("datetime"); ok {
|
||||
rawDatetime = raw
|
||||
if parsed, err := time.Parse(time.RFC3339, rawDatetime); err == nil {
|
||||
localTime = parsed.In(scheduleLocation()).Format("03:04 PM")
|
||||
return localTime, rawDatetime, rawRenderedTime
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fallback := strings.Join(strings.Fields(strings.TrimSpace(meta.Text())), " ")
|
||||
_, parsedTime, _ := parseMeta(fallback)
|
||||
return parsedTime, "", ""
|
||||
}
|
||||
|
||||
func absolutizeURL(base string, href string) string {
|
||||
href = strings.TrimSpace(href)
|
||||
if href == "" {
|
||||
return ""
|
||||
}
|
||||
if strings.HasPrefix(href, "http://") || strings.HasPrefix(href, "https://") {
|
||||
return href
|
||||
}
|
||||
return strings.TrimRight(base, "/") + "/" + strings.TrimLeft(href, "/")
|
||||
}
|
||||
|
||||
func addCommonHeaders(request *http.Request) {
|
||||
request.Header.Set("User-Agent", useragent.Chrome135)
|
||||
request.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
|
||||
request.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
||||
request.Header.Set("Referer", "https://animeschedule.net/")
|
||||
request.Header.Set("Cache-Control", "no-cache")
|
||||
}
|
||||
|
||||
func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*goquery.Document, string, error) {
|
||||
client := httpClient
|
||||
if client == nil {
|
||||
client = http.DefaultClient
|
||||
}
|
||||
|
||||
request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, url, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
addCommonHeaders(request)
|
||||
|
||||
response, err := client.Do(request)
|
||||
if err != nil {
|
||||
return nil, url, fmt.Errorf("request failed: %w", err)
|
||||
}
|
||||
defer func() { _ = response.Body.Close() }()
|
||||
|
||||
if response.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(response.Body, limits.Bytes512))
|
||||
return nil, url, &HTTPStatusError{
|
||||
StatusCode: response.StatusCode,
|
||||
URL: url,
|
||||
ContentType: strings.TrimSpace(response.Header.Get("Content-Type")),
|
||||
BodyPreview: strings.Join(strings.Fields(strings.TrimSpace(string(body))), " "),
|
||||
}
|
||||
}
|
||||
|
||||
document, err := goquery.NewDocumentFromReader(response.Body)
|
||||
if err != nil {
|
||||
return nil, url, fmt.Errorf("failed to parse html: %w", err)
|
||||
}
|
||||
|
||||
return document, response.Request.URL.String(), nil
|
||||
}
|
||||
|
||||
type timetableAnimeAPI struct {
|
||||
Title string `json:"title"`
|
||||
Route string `json:"route"`
|
||||
EpisodeDate time.Time `json:"episodeDate"`
|
||||
EpisodeNumber int `json:"episodeNumber"`
|
||||
SubtractedEpisodeNumber int `json:"subtractedEpisodeNumber"`
|
||||
AirType string `json:"airType"`
|
||||
ImageVersionRoute string `json:"imageVersionRoute"`
|
||||
}
|
||||
|
||||
func fetchWeekAPI(ctx context.Context, httpClient *http.Client, token string, year int, week int) (WeekSchedule, error) {
|
||||
client := httpClient
|
||||
if client == nil {
|
||||
client = http.DefaultClient
|
||||
}
|
||||
|
||||
u, _ := url.Parse("https://animeschedule.net/api/v3/timetables/sub")
|
||||
q := u.Query()
|
||||
if year > 0 && week > 0 {
|
||||
q.Set("year", strconv.Itoa(year))
|
||||
q.Set("week", strconv.Itoa(week))
|
||||
}
|
||||
tz := strings.TrimSpace(os.Getenv("ANIMESCHEDULE_TZ"))
|
||||
if tz == "" {
|
||||
tz = "Europe/Copenhagen"
|
||||
}
|
||||
q.Set("tz", tz)
|
||||
u.RawQuery = q.Encode()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
|
||||
if err != nil {
|
||||
return WeekSchedule{}, fmt.Errorf("create api request: %w", err)
|
||||
}
|
||||
req.Header.Set("Authorization", "Bearer "+token)
|
||||
req.Header.Set("Accept", "application/json")
|
||||
req.Header.Set("User-Agent", useragent.Chrome135)
|
||||
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
return WeekSchedule{}, fmt.Errorf("api request failed: %w", err)
|
||||
}
|
||||
defer func() { _ = res.Body.Close() }()
|
||||
|
||||
if res.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(res.Body, limits.Bytes512))
|
||||
return WeekSchedule{}, &HTTPStatusError{
|
||||
StatusCode: res.StatusCode,
|
||||
URL: u.String(),
|
||||
ContentType: strings.TrimSpace(res.Header.Get("Content-Type")),
|
||||
BodyPreview: strings.Join(strings.Fields(strings.TrimSpace(string(body))), " "),
|
||||
}
|
||||
}
|
||||
|
||||
var payload []timetableAnimeAPI
|
||||
if err := json.NewDecoder(res.Body).Decode(&payload); err != nil {
|
||||
return WeekSchedule{}, fmt.Errorf("decode timetables api: %w", err)
|
||||
}
|
||||
|
||||
resolvedYear := year
|
||||
resolvedWeek := week
|
||||
if resolvedYear == 0 || resolvedWeek == 0 {
|
||||
resolvedYear, resolvedWeek = time.Now().In(time.Local).ISOWeek()
|
||||
}
|
||||
|
||||
out := WeekSchedule{
|
||||
Year: resolvedYear,
|
||||
Week: resolvedWeek,
|
||||
Days: map[time.Weekday][]Entry{},
|
||||
}
|
||||
|
||||
for _, item := range payload {
|
||||
title := strings.TrimSpace(item.Title)
|
||||
if title == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
episodeNumber := item.EpisodeNumber
|
||||
subtracted := item.SubtractedEpisodeNumber
|
||||
episodeText := ""
|
||||
switch {
|
||||
case subtracted > 0 && subtracted < episodeNumber:
|
||||
episodeText = fmt.Sprintf("Ep %d-%d", subtracted, episodeNumber)
|
||||
case episodeNumber > 0:
|
||||
episodeText = fmt.Sprintf("Ep %d", episodeNumber)
|
||||
default:
|
||||
episodeText = "Ep ?"
|
||||
}
|
||||
|
||||
airType := AirType(strings.ToUpper(strings.TrimSpace(item.AirType)))
|
||||
if airType != AirTypeJPN && airType != AirTypeSUB && airType != AirTypeDUB {
|
||||
continue
|
||||
}
|
||||
|
||||
episodeTime := item.EpisodeDate.In(time.Local)
|
||||
weekday := episodeTime.Weekday()
|
||||
localTime := episodeTime.Format("03:04 PM")
|
||||
|
||||
imageURL := ""
|
||||
if strings.TrimSpace(item.ImageVersionRoute) != "" {
|
||||
imageURL = "https://img.animeschedule.net/production/assets/public/img/" + strings.TrimLeft(strings.TrimSpace(item.ImageVersionRoute), "/")
|
||||
}
|
||||
|
||||
animeURL := ""
|
||||
if strings.TrimSpace(item.Route) != "" {
|
||||
animeURL = "https://animeschedule.net/anime/" + strings.TrimLeft(strings.TrimSpace(item.Route), "/")
|
||||
}
|
||||
|
||||
out.Days[weekday] = append(out.Days[weekday], Entry{
|
||||
Title: title,
|
||||
AnimeURL: animeURL,
|
||||
ImageURL: imageURL,
|
||||
EpisodeText: episodeText,
|
||||
AirType: airType,
|
||||
LocalTime: localTime,
|
||||
Weekday: weekday,
|
||||
})
|
||||
}
|
||||
|
||||
observability.LogJSON(
|
||||
observability.LogLevelInfo,
|
||||
"animeschedule_api_timetables_loaded",
|
||||
"integrations/animeschedule",
|
||||
"loaded timetable entries via api",
|
||||
map[string]any{
|
||||
"count": len(payload),
|
||||
"year": out.Year,
|
||||
"week": out.Week,
|
||||
"tz": tz,
|
||||
},
|
||||
nil,
|
||||
)
|
||||
|
||||
return out, nil
|
||||
}
|
||||
Reference in New Issue
Block a user