diff --git a/Dockerfile b/Dockerfile index 4b27a04..24041b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,7 +32,6 @@ RUN mkdir -p /app/data COPY --from=builder /app/main_server . COPY --from=builder /app/static ./static COPY --from=builder /app/migrations ./migrations -COPY --from=builder /app/data ./data # Expose the application port EXPOSE 3000 diff --git a/cmd/server/main.go b/cmd/server/main.go index 7bdad88..3c96946 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -16,7 +16,6 @@ import ( "mal/internal/features/auth" "mal/internal/jikan" "mal/internal/server" - "mal/internal/watchorder" "mal/internal/worker" ) @@ -40,22 +39,7 @@ func main() { queries := database.New(db) authService := auth.NewService(queries) - - watchOrderFile := os.Getenv("WATCH_ORDER_FILE") - if watchOrderFile == "" { - watchOrderFile = "./data/watch_order.json" - } - - watchOrderStore := watchorder.EmptyStore() - loadedStore, err := watchorder.LoadFromFile(watchOrderFile) - if err != nil { - log.Printf("watch-order: failed to load %s: %v", watchOrderFile, err) - } else { - watchOrderStore = loadedStore - log.Printf("watch-order: loaded %d entries from %s", watchOrderStore.Len(), watchOrderFile) - } - - jikanClient := jikan.NewClient(queries, watchOrderStore) + jikanClient := jikan.NewClient(queries) // Start background workers relationsWorker := worker.New(queries, jikanClient) diff --git a/cmd/watchorder/main.go b/cmd/watchorder/main.go deleted file mode 100644 index e266c60..0000000 --- a/cmd/watchorder/main.go +++ /dev/null @@ -1,243 +0,0 @@ -package main - -import ( - "context" - "encoding/json" - "flag" - "fmt" - "net/http" - "os" - "path/filepath" - "regexp" - "sort" - "strconv" - "strings" - "time" - - "github.com/PuerkitoBio/goquery" - - "mal/internal/watchorder" -) - -const defaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36" - -var idPattern = regexp.MustCompile(`/id/(\d+)`) - -type seedPayload struct { - IDs []int `json:"ids"` -} - -type outputPayload struct { - Data map[string][]watchorder.WatchOrderEntry `json:"data"` -} - -func parseRootID(url string) (int, error) { - match := idPattern.FindStringSubmatch(url) - if len(match) != 2 { - return 0, fmt.Errorf("invalid watch-order url: %s", url) - } - - id, err := strconv.Atoi(match[1]) - if err != nil { - return 0, fmt.Errorf("invalid watch-order id in url %s: %w", url, err) - } - - return id, nil -} - -func fetchDocument(ctx context.Context, client *http.Client, url string) (*goquery.Document, error) { - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err != nil { - return nil, err - } - - req.Header.Set("User-Agent", defaultUserAgent) - req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8") - req.Header.Set("Accept-Language", "en-US,en;q=0.9") - req.Header.Set("Referer", "https://chiaki.site/") - req.Header.Set("Cache-Control", "no-cache") - - resp, err := client.Do(req) - if err != nil { - return nil, err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("status %d", resp.StatusCode) - } - - return goquery.NewDocumentFromReader(resp.Body) -} - -func parseRows(doc *goquery.Document) []watchorder.WatchOrderEntry { - entries := make([]watchorder.WatchOrderEntry, 0) - - doc.Find("tr[data-id]").Each(func(_ int, selection *goquery.Selection) { - rawID, ok := selection.Attr("data-id") - if !ok { - return - } - - id, err := strconv.Atoi(strings.TrimSpace(rawID)) - if err != nil { - return - } - - typeLabel := "" - rawTypeID, hasType := selection.Attr("data-type") - if hasType { - typeID := strings.TrimSpace(rawTypeID) - typeLabel = mapTypeByID(doc, typeID) - } - - title := strings.TrimSpace(selection.Find(".wo_title").First().Text()) - titleAlt := strings.TrimSpace(selection.Find(".uk-text-small").First().Text()) - - entries = append(entries, watchorder.WatchOrderEntry{ - ID: id, - Type: typeLabel, - Title: title, - TitleAlt: titleAlt, - }) - }) - - return entries -} - -func mapTypeByID(doc *goquery.Document, typeID string) string { - label := "" - doc.Find("#wo_type_filter label").EachWithBreak(func(_ int, selection *goquery.Selection) bool { - input := selection.Find("input[type='checkbox']") - value, ok := input.Attr("value") - if ok && strings.TrimSpace(value) == typeID { - label = strings.TrimSpace(selection.Text()) - return false - } - return true - }) - - return label -} - -func parseIDList(value string) ([]int, error) { - if strings.TrimSpace(value) == "" { - return []int{}, nil - } - - parts := strings.Split(value, ",") - ids := make([]int, 0, len(parts)) - for _, part := range parts { - trimmed := strings.TrimSpace(part) - if trimmed == "" { - continue - } - - id, err := strconv.Atoi(trimmed) - if err != nil { - return nil, fmt.Errorf("invalid id %q: %w", trimmed, err) - } - ids = append(ids, id) - } - - return ids, nil -} - -func loadSeedIDs(path string) ([]int, error) { - if strings.TrimSpace(path) == "" { - return []int{}, nil - } - - content, err := os.ReadFile(path) - if err != nil { - return nil, err - } - - payload := seedPayload{} - if err := json.Unmarshal(content, &payload); err != nil { - return nil, err - } - - return payload.IDs, nil -} - -func sortAndUnique(ids []int) []int { - seen := make(map[int]bool) - unique := make([]int, 0, len(ids)) - for _, id := range ids { - if id <= 0 || seen[id] { - continue - } - seen[id] = true - unique = append(unique, id) - } - - sort.Ints(unique) - return unique -} - -func main() { - outputPath := flag.String("out", "data/watch_order.json", "output json file path") - seedPath := flag.String("seed", "tmp/watch_order_seed_ids.json", "seed json file path with {\"ids\": [...]} ") - idList := flag.String("ids", "", "comma-separated MAL ids") - flag.Parse() - - idsFromFlag, err := parseIDList(*idList) - if err != nil { - fmt.Fprintf(os.Stderr, "error: %v\n", err) - os.Exit(1) - } - - idsFromSeed, err := loadSeedIDs(*seedPath) - if err != nil { - fmt.Fprintf(os.Stderr, "error: failed to load seed ids: %v\n", err) - os.Exit(1) - } - - allIDs := sortAndUnique(append(idsFromSeed, idsFromFlag...)) - if len(allIDs) == 0 { - fmt.Fprintln(os.Stderr, "error: no ids provided (use -seed and/or -ids)") - os.Exit(1) - } - - httpClient := &http.Client{Timeout: 12 * time.Second} - ctx := context.Background() - - data := make(map[string][]watchorder.WatchOrderEntry, len(allIDs)) - for _, id := range allIDs { - url := fmt.Sprintf("https://chiaki.site/?/tools/watch_order/id/%d", id) - if _, err := parseRootID(url); err != nil { - continue - } - - doc, err := fetchDocument(ctx, httpClient, url) - if err != nil { - continue - } - - if doc.Find("#wo_list").Length() == 0 { - continue - } - - data[strconv.Itoa(id)] = parseRows(doc) - } - - encoded, err := json.Marshal(outputPayload{Data: data}) - if err != nil { - fmt.Fprintf(os.Stderr, "error: failed to encode output: %v\n", err) - os.Exit(1) - } - - outputDirectory := filepath.Dir(*outputPath) - if err := os.MkdirAll(outputDirectory, 0o755); err != nil { - fmt.Fprintf(os.Stderr, "error: failed to create data directory: %v\n", err) - os.Exit(1) - } - - if err := os.WriteFile(*outputPath, encoded, 0o644); err != nil { - fmt.Fprintf(os.Stderr, "error: failed to write output %q: %v\n", *outputPath, err) - os.Exit(1) - } - - fmt.Printf("wrote watch-order dataset for %d ids to %s\n", len(data), *outputPath) -} diff --git a/data/watch_order.json b/data/watch_order.json deleted file mode 100644 index 171b747..0000000 --- a/data/watch_order.json +++ /dev/null @@ -1 +0,0 @@ -{"data":{"34572":[{"id":33950,"type":"OVA","title":"Black Clover: Jump Festa 2016 Special"},{"id":34572,"type":"TV","title":"Black Clover"},{"id":39864,"type":"Special","title":"Petit Clover Advance"},{"id":38768,"type":"Special","title":"Black Clover: Jump Festa 2018 Special","title_alt":"Black Clover: The All Magic Knights Thanksgiving Festa"},{"id":40031,"type":"ONA","title":"Mugyutto! Black Clover","title_alt":"Squishy! Black Clover"},{"id":48585,"type":"Movie","title":"Black Clover: Mahou Tei no Ken","title_alt":"Black Clover: Sword of the Wizard King"},{"id":61967,"type":"TV","title":"Black Clover 2nd Season","title_alt":"Black Clover Season 2"}],"37430":[{"id":37430,"type":"TV","title":"Tensei shitara Slime Datta Ken","title_alt":"That Time I Got Reincarnated as a Slime"},{"id":39607,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Veldora Nikki","title_alt":"That Time I Got Reincarnated as a Slime: Tales - Veldora's Journal"},{"id":38793,"type":"OVA","title":"Tensei shitara Slime Datta Ken OVA","title_alt":"That Time I Got Reincarnated as a Slime OAD"},{"id":45753,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Hinata Sakaguchi","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Digression - Hinata Sakaguchi"},{"id":39551,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 2"},{"id":41488,"type":"TV","title":"Tensura Nikki: Tensei shitara Slime Datta Ken","title_alt":"The Slime Diaries"},{"id":49318,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 2nd Season: Kanwa - Veldora Nikki 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Tales - Veldora's Journal 2"},{"id":41487,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2 Part 2"},{"id":51309,"type":"ONA","title":"Sukuwareru Ramiris"},{"id":54050,"type":"CM","title":"He Wei Dao x Guan Yu Wo Zhuan Sheng Bian Cheng Shi Lai Mu Zhe Dang Shi"},{"id":49877,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen","title_alt":"That Time I Got Reincarnated as a Slime: The Movie - Scarlet Bond"},{"id":55720,"type":"ONA","title":"Genjitsu no Rimuru: Sunshine in the Slime"},{"id":57434,"type":"Special","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen Specials"},{"id":54565,"type":"ONA","title":"Tensei shitara Slime Datta Ken: Coleus no Yume","title_alt":"That Time I Got Reincarnated as a Slime: Visions of Coleus"},{"id":58592,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 3rd Season: Kanwa - Diablo Nikki","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Diablo's Journal"},{"id":53580,"type":"TV","title":"Tensei shitara Slime Datta Ken 3rd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 3"},{"id":59493,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Luminous Memories","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Luminous Memories"},{"id":59971,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie 2: Soukai no Namida-hen","title_alt":"That Time I Got Reincarnated as a Slime the Movie: Tears of the Azure Sea"},{"id":59970,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season","title_alt":"That Time I Got Reincarnated as a Slime Season 4"},{"id":63129,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 4 Part 2"}],"39551":[{"id":37430,"type":"TV","title":"Tensei shitara Slime Datta Ken","title_alt":"That Time I Got Reincarnated as a Slime"},{"id":39607,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Veldora Nikki","title_alt":"That Time I Got Reincarnated as a Slime: Tales - Veldora's Journal"},{"id":38793,"type":"OVA","title":"Tensei shitara Slime Datta Ken OVA","title_alt":"That Time I Got Reincarnated as a Slime OAD"},{"id":45753,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Hinata Sakaguchi","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Digression - Hinata Sakaguchi"},{"id":39551,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 2"},{"id":41488,"type":"TV","title":"Tensura Nikki: Tensei shitara Slime Datta Ken","title_alt":"The Slime Diaries"},{"id":49318,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 2nd Season: Kanwa - Veldora Nikki 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Tales - Veldora's Journal 2"},{"id":41487,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2 Part 2"},{"id":51309,"type":"ONA","title":"Sukuwareru Ramiris"},{"id":54050,"type":"CM","title":"He Wei Dao x Guan Yu Wo Zhuan Sheng Bian Cheng Shi Lai Mu Zhe Dang Shi"},{"id":49877,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen","title_alt":"That Time I Got Reincarnated as a Slime: The Movie - Scarlet Bond"},{"id":55720,"type":"ONA","title":"Genjitsu no Rimuru: Sunshine in the Slime"},{"id":57434,"type":"Special","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen Specials"},{"id":54565,"type":"ONA","title":"Tensei shitara Slime Datta Ken: Coleus no Yume","title_alt":"That Time I Got Reincarnated as a Slime: Visions of Coleus"},{"id":58592,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 3rd Season: Kanwa - Diablo Nikki","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Diablo's Journal"},{"id":53580,"type":"TV","title":"Tensei shitara Slime Datta Ken 3rd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 3"},{"id":59493,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Luminous Memories","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Luminous Memories"},{"id":59971,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie 2: Soukai no Namida-hen","title_alt":"That Time I Got Reincarnated as a Slime the Movie: Tears of the Azure Sea"},{"id":59970,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season","title_alt":"That Time I Got Reincarnated as a Slime Season 4"},{"id":63129,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 4 Part 2"}],"40748":[{"id":38777,"type":"PV","title":"Jujutsu Kaisen Official PVs"},{"id":40748,"type":"TV","title":"Jujutsu Kaisen"},{"id":52558,"type":"Music","title":"Vivid Vice"},{"id":48561,"type":"Movie","title":"Jujutsu Kaisen 0 Movie","title_alt":"Jujutsu Kaisen 0"},{"id":51009,"type":"TV","title":"Jujutsu Kaisen 2nd Season","title_alt":"Jujutsu Kaisen Season 2"},{"id":56243,"type":"TV Special","title":"Jujutsu Kaisen 2nd Season Recaps","title_alt":"Jujutsu Kaisen Season 2 Recaps"},{"id":59654,"type":"Movie","title":"Jujutsu Kaisen: Kaigyoku/Gyokusetsu","title_alt":"Jujutsu Kaisen: Hidden Inventory/Premature Death"},{"id":62392,"type":"Movie","title":"Jujutsu Kaisen Movie: Shibuya Jihen Tokubetsu Henshuu-ban x Shimetsu Kaiyuu Senkou Jouei","title_alt":"Jujutsu Kaisen: Execution"},{"id":57658,"type":"TV","title":"Jujutsu Kaisen: Shimetsu Kaiyuu - Zenpen","title_alt":"Jujutsu Kaisen: The Culling Game Part 1"}],"41487":[{"id":37430,"type":"TV","title":"Tensei shitara Slime Datta Ken","title_alt":"That Time I Got Reincarnated as a Slime"},{"id":39607,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Veldora Nikki","title_alt":"That Time I Got Reincarnated as a Slime: Tales - Veldora's Journal"},{"id":38793,"type":"OVA","title":"Tensei shitara Slime Datta Ken OVA","title_alt":"That Time I Got Reincarnated as a Slime OAD"},{"id":45753,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Hinata Sakaguchi","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Digression - Hinata Sakaguchi"},{"id":39551,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 2"},{"id":41488,"type":"TV","title":"Tensura Nikki: Tensei shitara Slime Datta Ken","title_alt":"The Slime Diaries"},{"id":49318,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 2nd Season: Kanwa - Veldora Nikki 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Tales - Veldora's Journal 2"},{"id":41487,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2 Part 2"},{"id":51309,"type":"ONA","title":"Sukuwareru Ramiris"},{"id":54050,"type":"CM","title":"He Wei Dao x Guan Yu Wo Zhuan Sheng Bian Cheng Shi Lai Mu Zhe Dang Shi"},{"id":49877,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen","title_alt":"That Time I Got Reincarnated as a Slime: The Movie - Scarlet Bond"},{"id":55720,"type":"ONA","title":"Genjitsu no Rimuru: Sunshine in the Slime"},{"id":57434,"type":"Special","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen Specials"},{"id":54565,"type":"ONA","title":"Tensei shitara Slime Datta Ken: Coleus no Yume","title_alt":"That Time I Got Reincarnated as a Slime: Visions of Coleus"},{"id":58592,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 3rd Season: Kanwa - Diablo Nikki","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Diablo's Journal"},{"id":53580,"type":"TV","title":"Tensei shitara Slime Datta Ken 3rd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 3"},{"id":59493,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Luminous Memories","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Luminous Memories"},{"id":59971,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie 2: Soukai no Namida-hen","title_alt":"That Time I Got Reincarnated as a Slime the Movie: Tears of the Azure Sea"},{"id":59970,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season","title_alt":"That Time I Got Reincarnated as a Slime Season 4"},{"id":63129,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 4 Part 2"}],"53580":[{"id":37430,"type":"TV","title":"Tensei shitara Slime Datta Ken","title_alt":"That Time I Got Reincarnated as a Slime"},{"id":39607,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Veldora Nikki","title_alt":"That Time I Got Reincarnated as a Slime: Tales - Veldora's Journal"},{"id":38793,"type":"OVA","title":"Tensei shitara Slime Datta Ken OVA","title_alt":"That Time I Got Reincarnated as a Slime OAD"},{"id":45753,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Hinata Sakaguchi","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Digression - Hinata Sakaguchi"},{"id":39551,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 2"},{"id":41488,"type":"TV","title":"Tensura Nikki: Tensei shitara Slime Datta Ken","title_alt":"The Slime Diaries"},{"id":49318,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 2nd Season: Kanwa - Veldora Nikki 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Tales - Veldora's Journal 2"},{"id":41487,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2 Part 2"},{"id":51309,"type":"ONA","title":"Sukuwareru Ramiris"},{"id":54050,"type":"CM","title":"He Wei Dao x Guan Yu Wo Zhuan Sheng Bian Cheng Shi Lai Mu Zhe Dang Shi"},{"id":49877,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen","title_alt":"That Time I Got Reincarnated as a Slime: The Movie - Scarlet Bond"},{"id":55720,"type":"ONA","title":"Genjitsu no Rimuru: Sunshine in the Slime"},{"id":57434,"type":"Special","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen Specials"},{"id":54565,"type":"ONA","title":"Tensei shitara Slime Datta Ken: Coleus no Yume","title_alt":"That Time I Got Reincarnated as a Slime: Visions of Coleus"},{"id":58592,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 3rd Season: Kanwa - Diablo Nikki","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Diablo's Journal"},{"id":53580,"type":"TV","title":"Tensei shitara Slime Datta Ken 3rd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 3"},{"id":59493,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Luminous Memories","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Luminous Memories"},{"id":59971,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie 2: Soukai no Namida-hen","title_alt":"That Time I Got Reincarnated as a Slime the Movie: Tears of the Azure Sea"},{"id":59970,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season","title_alt":"That Time I Got Reincarnated as a Slime Season 4"},{"id":63129,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 4 Part 2"}],"56876":[{"id":50739,"type":"TV","title":"Otonari no Tenshi-sama ni Itsunomanika Dame Ningen ni Sareteita Ken","title_alt":"The Angel Next Door Spoils Me Rotten"},{"id":56876,"type":"TV","title":"Otonari no Tenshi-sama ni Itsunomanika Dame Ningen ni Sareteita Ken 2","title_alt":"The Angel Next Door Spoils Me Rotten Season 2"}],"59708":[{"id":30813,"type":"PV","title":"Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e PV"},{"id":35507,"type":"TV","title":"Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e","title_alt":"Classroom of the Elite"},{"id":51096,"type":"TV","title":"Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e 2nd Season","title_alt":"Classroom of the Elite II"},{"id":51180,"type":"TV","title":"Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e 3rd Season","title_alt":"Classroom of the Elite III"},{"id":59708,"type":"TV","title":"Youkoso Jitsuryoku Shijou Shugi no Kyoushitsu e 4th Season: 2-nensei-hen 1 Gakki","title_alt":"Classroom of the Elite IV"}],"59970":[{"id":37430,"type":"TV","title":"Tensei shitara Slime Datta Ken","title_alt":"That Time I Got Reincarnated as a Slime"},{"id":39607,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Veldora Nikki","title_alt":"That Time I Got Reincarnated as a Slime: Tales - Veldora's Journal"},{"id":38793,"type":"OVA","title":"Tensei shitara Slime Datta Ken OVA","title_alt":"That Time I Got Reincarnated as a Slime OAD"},{"id":45753,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Hinata Sakaguchi","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Digression - Hinata Sakaguchi"},{"id":39551,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 2"},{"id":41488,"type":"TV","title":"Tensura Nikki: Tensei shitara Slime Datta Ken","title_alt":"The Slime Diaries"},{"id":49318,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 2nd Season: Kanwa - Veldora Nikki 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Tales - Veldora's Journal 2"},{"id":41487,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2 Part 2"},{"id":51309,"type":"ONA","title":"Sukuwareru Ramiris"},{"id":54050,"type":"CM","title":"He Wei Dao x Guan Yu Wo Zhuan Sheng Bian Cheng Shi Lai Mu Zhe Dang Shi"},{"id":49877,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen","title_alt":"That Time I Got Reincarnated as a Slime: The Movie - Scarlet Bond"},{"id":55720,"type":"ONA","title":"Genjitsu no Rimuru: Sunshine in the Slime"},{"id":57434,"type":"Special","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen Specials"},{"id":54565,"type":"ONA","title":"Tensei shitara Slime Datta Ken: Coleus no Yume","title_alt":"That Time I Got Reincarnated as a Slime: Visions of Coleus"},{"id":58592,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 3rd Season: Kanwa - Diablo Nikki","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Diablo's Journal"},{"id":53580,"type":"TV","title":"Tensei shitara Slime Datta Ken 3rd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 3"},{"id":59493,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Luminous Memories","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Luminous Memories"},{"id":59971,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie 2: Soukai no Namida-hen","title_alt":"That Time I Got Reincarnated as a Slime the Movie: Tears of the Azure Sea"},{"id":59970,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season","title_alt":"That Time I Got Reincarnated as a Slime Season 4"},{"id":63129,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 4 Part 2"}],"61316":[{"id":31240,"type":"TV","title":"Re:Zero kara Hajimeru Isekai Seikatsu","title_alt":"Re:ZERO -Starting Life in Another World-"},{"id":33142,"type":"TV Special","title":"Re:Zero kara Hajimeru Break Time"},{"id":33569,"type":"Special","title":"Re:Petit kara Hajimeru Isekai Seikatsu"},{"id":42776,"type":"PV","title":"Re:Zero kara Hajimeru Isekai Seikatsu - Memory Snow Announcement"},{"id":38389,"type":"CM","title":"He Wei Dao x Re:Cong Ling Kaishi de Yi Shijie Shenghuo"},{"id":39921,"type":"Movie","title":"Re:Zero kara Hajimeru Isekai Seikatsu - Memory Snow - Manner Movie"},{"id":36286,"type":"Movie","title":"Re:Zero kara Hajimeru Isekai Seikatsu - Memory Snow","title_alt":"Re:ZERO -Starting Life in Another World- Memory Snow"},{"id":41590,"type":"Movie","title":"Re:Zero kara Hajimeru Isekai Seikatsu - Hyouketsu no Kizuna - Manner Movie"},{"id":38414,"type":"Movie","title":"Re:Zero kara Hajimeru Isekai Seikatsu - Hyouketsu no Kizuna","title_alt":"Re:ZERO -Starting Life in Another World- The Frozen Bond"},{"id":39587,"type":"TV","title":"Re:Zero kara Hajimeru Isekai Seikatsu 2nd Season","title_alt":"Re:ZERO -Starting Life in Another World- Season 2"},{"id":42364,"type":"Special","title":"Re:Zero kara Hajimeru Break Time 2nd Season"},{"id":42203,"type":"TV","title":"Re:Zero kara Hajimeru Isekai Seikatsu 2nd Season Part 2","title_alt":"Re:ZERO -Starting Life in Another World- Season 2 Part 2"},{"id":54857,"type":"TV","title":"Re:Zero kara Hajimeru Isekai Seikatsu 3rd Season","title_alt":"Re:ZERO -Starting Life in Another World- Season 3"},{"id":60012,"type":"Special","title":"Re:Zero kara Hajimeru Break Time 3rd Season"},{"id":63830,"type":"Special","title":"Re:Zero kara Hajimeru Break Time 4th Season"},{"id":61316,"type":"TV","title":"Re:Zero kara Hajimeru Isekai Seikatsu 4th Season","title_alt":"Re:ZERO -Starting Life in Another World- Season 4"}],"61967":[{"id":33950,"type":"OVA","title":"Black Clover: Jump Festa 2016 Special"},{"id":34572,"type":"TV","title":"Black Clover"},{"id":39864,"type":"Special","title":"Petit Clover Advance"},{"id":38768,"type":"Special","title":"Black Clover: Jump Festa 2018 Special","title_alt":"Black Clover: The All Magic Knights Thanksgiving Festa"},{"id":40031,"type":"ONA","title":"Mugyutto! Black Clover","title_alt":"Squishy! Black Clover"},{"id":48585,"type":"Movie","title":"Black Clover: Mahou Tei no Ken","title_alt":"Black Clover: Sword of the Wizard King"},{"id":61967,"type":"TV","title":"Black Clover 2nd Season","title_alt":"Black Clover Season 2"}],"63129":[{"id":37430,"type":"TV","title":"Tensei shitara Slime Datta Ken","title_alt":"That Time I Got Reincarnated as a Slime"},{"id":39607,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Veldora Nikki","title_alt":"That Time I Got Reincarnated as a Slime: Tales - Veldora's Journal"},{"id":38793,"type":"OVA","title":"Tensei shitara Slime Datta Ken OVA","title_alt":"That Time I Got Reincarnated as a Slime OAD"},{"id":45753,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Hinata Sakaguchi","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Digression - Hinata Sakaguchi"},{"id":39551,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 2"},{"id":41488,"type":"TV","title":"Tensura Nikki: Tensei shitara Slime Datta Ken","title_alt":"The Slime Diaries"},{"id":49318,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 2nd Season: Kanwa - Veldora Nikki 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2: Tales - Veldora's Journal 2"},{"id":41487,"type":"TV","title":"Tensei shitara Slime Datta Ken 2nd Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 2 Part 2"},{"id":51309,"type":"ONA","title":"Sukuwareru Ramiris"},{"id":54050,"type":"CM","title":"He Wei Dao x Guan Yu Wo Zhuan Sheng Bian Cheng Shi Lai Mu Zhe Dang Shi"},{"id":49877,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen","title_alt":"That Time I Got Reincarnated as a Slime: The Movie - Scarlet Bond"},{"id":55720,"type":"ONA","title":"Genjitsu no Rimuru: Sunshine in the Slime"},{"id":57434,"type":"Special","title":"Tensei shitara Slime Datta Ken Movie: Guren no Kizuna-hen Specials"},{"id":54565,"type":"ONA","title":"Tensei shitara Slime Datta Ken: Coleus no Yume","title_alt":"That Time I Got Reincarnated as a Slime: Visions of Coleus"},{"id":58592,"type":"TV Special","title":"Tensei shitara Slime Datta Ken 3rd Season: Kanwa - Diablo Nikki","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Diablo's Journal"},{"id":53580,"type":"TV","title":"Tensei shitara Slime Datta Ken 3rd Season","title_alt":"That Time I Got Reincarnated as a Slime Season 3"},{"id":59493,"type":"TV Special","title":"Tensei shitara Slime Datta Ken: Kanwa - Luminous Memories","title_alt":"That Time I Got Reincarnated as a Slime Season 3: Digression - Luminous Memories"},{"id":59971,"type":"Movie","title":"Tensei shitara Slime Datta Ken Movie 2: Soukai no Namida-hen","title_alt":"That Time I Got Reincarnated as a Slime the Movie: Tears of the Azure Sea"},{"id":59970,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season","title_alt":"That Time I Got Reincarnated as a Slime Season 4"},{"id":63129,"type":"TV","title":"Tensei shitara Slime Datta Ken 4th Season Part 2","title_alt":"That Time I Got Reincarnated as a Slime Season 4 Part 2"}]}} \ No newline at end of file diff --git a/internal/jikan/client.go b/internal/jikan/client.go index f4ebfe0..f638a0e 100644 --- a/internal/jikan/client.go +++ b/internal/jikan/client.go @@ -9,28 +9,21 @@ import ( "time" "mal/internal/database" - "mal/internal/watchorder" ) type Client struct { httpClient *http.Client baseURL string db database.Querier - watchOrders *watchorder.Store mu sync.Mutex lastReqTime time.Time } -func NewClient(db database.Querier, watchOrders *watchorder.Store) *Client { - if watchOrders == nil { - watchOrders = watchorder.EmptyStore() - } - +func NewClient(db database.Querier) *Client { return &Client{ - httpClient: &http.Client{Timeout: 10 * time.Second}, - baseURL: "https://api.jikan.moe/v4", - db: db, - watchOrders: watchOrders, + httpClient: &http.Client{Timeout: 10 * time.Second}, + baseURL: "https://api.jikan.moe/v4", + db: db, } } diff --git a/internal/jikan/relations.go b/internal/jikan/relations.go index 198ece9..413075e 100644 --- a/internal/jikan/relations.go +++ b/internal/jikan/relations.go @@ -2,10 +2,17 @@ package jikan import ( "context" + "errors" + "fmt" "log" "strings" + "time" + + "mal/internal/watchorder" ) +const chiakiWatchOrderURL = "https://chiaki.site/?/tools/watch_order/id/%d" +const watchOrderCacheTTL = time.Hour * 24 const maxWatchOrderEntries = 120 func watchOrderTypeLabel(value string) string { @@ -25,6 +32,49 @@ func isAllowedWatchOrderType(value string) bool { return normalized == "tv" || normalized == "movie" } +func relationCacheKey(id int) string { + return fmt.Sprintf("relations:watch-order:%d", id) +} + +func (c *Client) getWatchOrder(ctx context.Context, id int) (watchorder.WatchOrderResult, error) { + cacheKey := relationCacheKey(id) + + var cached watchorder.WatchOrderResult + if c.getCache(ctx, cacheKey, &cached) { + return cached, nil + } + + watchOrderURL := fmt.Sprintf(chiakiWatchOrderURL, id) + requestCtx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + result, err := watchorder.FetchWatchOrder(requestCtx, c.httpClient, watchOrderURL) + if err != nil { + var statusError *watchorder.HTTPStatusError + if errors.Is(err, watchorder.ErrWatchOrderMarkupNotFound) { + log.Printf("relations: watch-order markup missing for %d (%s): %v", id, watchOrderURL, err) + } else if errors.As(err, &statusError) { + log.Printf( + "relations: watch-order http error for %d (%s): status=%d server=%q cf_ray=%q location=%q content_type=%q body=%q", + id, + watchOrderURL, + statusError.StatusCode, + statusError.Server, + statusError.CFRay, + statusError.Location, + statusError.ContentType, + statusError.BodyPreview, + ) + } else { + log.Printf("relations: watch-order fetch failed for %d (%s): %v", id, watchOrderURL, err) + } + return watchorder.WatchOrderResult{}, err + } + + c.setCache(ctx, cacheKey, result, watchOrderCacheTTL) + return result, nil +} + func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEntry, error) { currentAnime, err := c.GetAnimeByID(ctx, id) if err != nil { @@ -40,16 +90,16 @@ func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEnt } func (c *Client) GetFullRelations(ctx context.Context, id int) ([]RelationEntry, error) { - watchOrder, found := c.watchOrders.Get(id) - if !found { - log.Printf("relations: no local watch-order data for %d", id) + result, err := c.getWatchOrder(ctx, id) + if err != nil { + log.Printf("relations: using current-only fallback for %d: %v", id, err) return c.currentOnlyRelation(ctx, id) } seen := make(map[int]bool) - relations := make([]RelationEntry, 0, len(watchOrder)+1) + relations := make([]RelationEntry, 0, len(result.WatchOrder)+1) - for _, watchOrderEntry := range watchOrder { + for _, watchOrderEntry := range result.WatchOrder { if len(relations) >= maxWatchOrderEntries { break } diff --git a/internal/watchorder/store.go b/internal/watchorder/store.go deleted file mode 100644 index 42c1158..0000000 --- a/internal/watchorder/store.go +++ /dev/null @@ -1,72 +0,0 @@ -package watchorder - -import ( - "encoding/json" - "fmt" - "os" - "strconv" -) - -type Store struct { - byID map[int][]WatchOrderEntry -} - -func EmptyStore() *Store { - return &Store{byID: make(map[int][]WatchOrderEntry)} -} - -func (s *Store) Len() int { - if s == nil { - return 0 - } - - return len(s.byID) -} - -func (s *Store) Get(id int) ([]WatchOrderEntry, bool) { - if s == nil { - return nil, false - } - - entries, ok := s.byID[id] - if !ok { - return nil, false - } - - return entries, true -} - -func LoadFromFile(path string) (*Store, error) { - content, err := os.ReadFile(path) - if err != nil { - return nil, fmt.Errorf("failed to read watch-order file %q: %w", path, err) - } - - rawMessages := make(map[string]json.RawMessage) - if err := json.Unmarshal(content, &rawMessages); err != nil { - return nil, fmt.Errorf("failed to parse watch-order file %q: %w", path, err) - } - - raw := make(map[string][]WatchOrderEntry) - if wrappedData, ok := rawMessages["data"]; ok && len(rawMessages) == 1 { - if err := json.Unmarshal(wrappedData, &raw); err != nil { - return nil, fmt.Errorf("failed to parse watch-order data in file %q: %w", path, err) - } - } else { - if err := json.Unmarshal(content, &raw); err != nil { - return nil, fmt.Errorf("failed to parse watch-order file %q: %w", path, err) - } - } - - byID := make(map[int][]WatchOrderEntry, len(raw)) - for key, entries := range raw { - id, err := strconv.Atoi(key) - if err != nil { - return nil, fmt.Errorf("invalid anime id key %q in watch-order file %q: %w", key, path, err) - } - - byID[id] = entries - } - - return &Store{byID: byID}, nil -} diff --git a/internal/watchorder/store_test.go b/internal/watchorder/store_test.go deleted file mode 100644 index 5edccf1..0000000 --- a/internal/watchorder/store_test.go +++ /dev/null @@ -1,72 +0,0 @@ -package watchorder - -import ( - "os" - "path/filepath" - "testing" -) - -func TestLoadFromFile_Success(t *testing.T) { - temporaryDirectory := t.TempDir() - filePath := filepath.Join(temporaryDirectory, "watch_order.json") - - content := `{ - "1": [{"id": 1, "type": "TV", "title": "One"}], - "2": [{"id": 2, "type": "Movie", "title": "Two"}] -}` - - if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil { - t.Fatalf("failed to write file: %v", err) - } - - store, err := LoadFromFile(filePath) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - if store.Len() != 2 { - t.Fatalf("expected 2 ids, got %d", store.Len()) - } - - entries, ok := store.Get(1) - if !ok { - t.Fatalf("expected id 1 to exist") - } - - if len(entries) != 1 || entries[0].ID != 1 { - t.Fatalf("unexpected entries for id 1: %+v", entries) - } -} - -func TestLoadFromFile_InvalidIDKey(t *testing.T) { - temporaryDirectory := t.TempDir() - filePath := filepath.Join(temporaryDirectory, "watch_order.json") - - if err := os.WriteFile(filePath, []byte(`{"abc": []}`), 0o644); err != nil { - t.Fatalf("failed to write file: %v", err) - } - - _, err := LoadFromFile(filePath) - if err == nil { - t.Fatalf("expected error for invalid id key") - } -} - -func TestLoadFromFile_WrappedPayload(t *testing.T) { - temporaryDirectory := t.TempDir() - filePath := filepath.Join(temporaryDirectory, "watch_order.json") - - content := `{"data":{"10":[{"id":10,"type":"TV","title":"Ten"}]}}` - if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil { - t.Fatalf("failed to write file: %v", err) - } - - store, err := LoadFromFile(filePath) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - if store.Len() != 1 { - t.Fatalf("expected 1 id, got %d", store.Len()) - } -} diff --git a/internal/watchorder/types.go b/internal/watchorder/types.go deleted file mode 100644 index 87f394f..0000000 --- a/internal/watchorder/types.go +++ /dev/null @@ -1,8 +0,0 @@ -package watchorder - -type WatchOrderEntry struct { - ID int `json:"id"` - Type string `json:"type"` - Title string `json:"title"` - TitleAlt string `json:"title_alt,omitempty"` -} diff --git a/internal/watchorder/watch_order.go b/internal/watchorder/watch_order.go new file mode 100644 index 0000000..54d6e12 --- /dev/null +++ b/internal/watchorder/watch_order.go @@ -0,0 +1,397 @@ +package watchorder + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "regexp" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +const defaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36" + +var idPattern = regexp.MustCompile(`/id/(\d+)`) +var malLinkPattern = regexp.MustCompile(`myanimelist\.net/anime/(\d+)`) + +var ErrInvalidWatchOrderURL = errors.New("invalid watch order url") +var ErrWatchOrderMarkupNotFound = errors.New("watch order markup not found") + +type HTTPStatusError struct { + StatusCode int + URL string + Server string + CFRay string + Location string + ContentType string + BodyPreview string +} + +func (e *HTTPStatusError) Error() string { + return fmt.Sprintf( + "unexpected status code: %d (url=%s server=%s cf_ray=%s location=%s content_type=%s body=%q)", + e.StatusCode, + e.URL, + e.Server, + e.CFRay, + e.Location, + e.ContentType, + e.BodyPreview, + ) +} + +type WatchOrderEntry struct { + ID int `json:"id"` + Type string `json:"type"` + Title string `json:"title"` + TitleAlt string `json:"title_alt,omitempty"` +} + +type WatchOrderResult struct { + ID int `json:"id"` + WatchOrder []WatchOrderEntry `json:"watch_order"` +} + +type watchOrderRow struct { + id int + typeID int + title string + alternativeTitle string +} + +func parseRootID(url string) (int, error) { + match := idPattern.FindStringSubmatch(url) + if len(match) != 2 { + return 0, ErrInvalidWatchOrderURL + } + + id, err := strconv.Atoi(match[1]) + if err != nil { + return 0, ErrInvalidWatchOrderURL + } + + return id, nil +} + +func addCommonHeaders(request *http.Request) { + request.Header.Set("User-Agent", defaultUserAgent) + request.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8") + request.Header.Set("Accept-Language", "en-US,en;q=0.9") + request.Header.Set("Referer", "https://chiaki.site/") + request.Header.Set("Cache-Control", "no-cache") +} + +func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*goquery.Document, error) { + client := httpClient + if client == nil { + client = http.DefaultClient + } + + request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + addCommonHeaders(request) + + response, err := client.Do(request) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(response.Body, 512)) + return nil, &HTTPStatusError{ + StatusCode: response.StatusCode, + URL: url, + Server: strings.TrimSpace(response.Header.Get("Server")), + CFRay: strings.TrimSpace(response.Header.Get("CF-Ray")), + Location: strings.TrimSpace(response.Header.Get("Location")), + ContentType: strings.TrimSpace(response.Header.Get("Content-Type")), + BodyPreview: strings.Join(strings.Fields(strings.TrimSpace(string(body))), " "), + } + } + + document, err := goquery.NewDocumentFromReader(response.Body) + if err != nil { + return nil, fmt.Errorf("failed to parse html: %w", err) + } + + return document, nil +} + +func extractTypeLabelsByID(doc *goquery.Document) map[int]string { + typeLabels := make(map[int]string) + + doc.Find("#wo_type_filter label").Each(func(_ int, selection *goquery.Selection) { + input := selection.Find("input[type='checkbox']") + rawID, exists := input.Attr("value") + if !exists { + return + } + + typeID, err := strconv.Atoi(strings.TrimSpace(rawID)) + if err != nil { + return + } + + label := strings.TrimSpace(selection.Text()) + if label == "" { + return + } + + typeLabels[typeID] = label + }) + + return typeLabels +} + +func parseAttrInt(selection *goquery.Selection, attrName string) (int, bool) { + rawValue, exists := selection.Attr(attrName) + if !exists { + return 0, false + } + + value, err := strconv.Atoi(strings.TrimSpace(rawValue)) + if err != nil { + return 0, false + } + + return value, true +} + +func extractRows(doc *goquery.Document) []watchOrderRow { + rows := make([]watchOrderRow, 0) + + doc.Find("tr[data-id]").Each(func(_ int, selection *goquery.Selection) { + id, ok := parseAttrInt(selection, "data-id") + if !ok { + return + } + + typeID, ok := parseAttrInt(selection, "data-type") + if !ok { + return + } + + title := strings.TrimSpace(selection.Find(".wo_title").First().Text()) + alternativeTitle := strings.TrimSpace(selection.Find(".uk-text-small").First().Text()) + + rows = append(rows, watchOrderRow{ + id: id, + typeID: typeID, + title: title, + alternativeTitle: alternativeTitle, + }) + }) + + return rows +} + +func hasWatchOrderTable(doc *goquery.Document) bool { + return doc.Find("#wo_list").Length() > 0 +} + +func shouldTryProxy(err error) bool { + var statusError *HTTPStatusError + if errors.As(err, &statusError) { + return statusError.StatusCode == http.StatusForbidden || statusError.StatusCode == http.StatusTooManyRequests || statusError.StatusCode == http.StatusServiceUnavailable + } + + return false +} + +func toJinaProxyURL(url string) string { + trimmed := strings.TrimPrefix(strings.TrimPrefix(url, "https://"), "http://") + return "https://r.jina.ai/http://" + trimmed +} + +func fetchProxyText(ctx context.Context, httpClient *http.Client, url string) (string, error) { + client := httpClient + if client == nil { + client = http.DefaultClient + } + + request, err := http.NewRequestWithContext(ctx, http.MethodGet, toJinaProxyURL(url), nil) + if err != nil { + return "", fmt.Errorf("failed to create proxy request: %w", err) + } + + addCommonHeaders(request) + + response, err := client.Do(request) + if err != nil { + return "", fmt.Errorf("proxy request failed: %w", err) + } + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + return "", fmt.Errorf("proxy status %d", response.StatusCode) + } + + body, err := io.ReadAll(io.LimitReader(response.Body, 2*1024*1024)) + if err != nil { + return "", fmt.Errorf("failed to read proxy response: %w", err) + } + + return string(body), nil +} + +func parseJinaEntries(text string) []WatchOrderEntry { + lines := strings.Split(text, "\n") + entries := make([]WatchOrderEntry, 0) + seen := make(map[int]bool) + + for index, line := range lines { + trimmed := strings.TrimSpace(line) + if trimmed == "" { + continue + } + + if !strings.Contains(trimmed, "myanimelist.net/anime/") || !strings.Contains(trimmed, "|") { + continue + } + + idMatch := malLinkPattern.FindStringSubmatch(trimmed) + if len(idMatch) != 2 { + continue + } + + id, err := strconv.Atoi(idMatch[1]) + if err != nil || seen[id] { + continue + } + + parts := strings.Split(trimmed, "|") + if len(parts) < 2 { + continue + } + + typeName := strings.TrimSpace(parts[1]) + if typeName == "" { + continue + } + + title, titleAlt := titleFromContext(lines, index) + entries = append(entries, WatchOrderEntry{ + ID: id, + Type: typeName, + Title: title, + TitleAlt: titleAlt, + }) + seen[id] = true + } + + return entries +} + +func isNoiseTitleLine(value string) bool { + lower := strings.ToLower(strings.TrimSpace(value)) + if lower == "" { + return true + } + + if strings.HasPrefix(lower, "title:") || strings.HasPrefix(lower, "url source:") || strings.HasPrefix(lower, "markdown content:") { + return true + } + + if strings.Contains(lower, "/ watch order") { + return true + } + + if strings.HasPrefix(lower, "http://") || strings.HasPrefix(lower, "https://") { + return true + } + + return false +} + +func titleFromContext(lines []string, metaIndex int) (string, string) { + collected := make([]string, 0, 2) + + for idx := metaIndex - 1; idx >= 0 && len(collected) < 2; idx-- { + candidate := strings.TrimSpace(lines[idx]) + if candidate == "" { + continue + } + + if isNoiseTitleLine(candidate) { + continue + } + + if strings.Contains(candidate, "myanimelist.net/anime/") { + continue + } + + collected = append(collected, candidate) + } + + if len(collected) == 0 { + return "", "" + } + + if len(collected) == 1 { + return collected[0], "" + } + + return collected[1], collected[0] +} + +func fetchViaProxy(ctx context.Context, httpClient *http.Client, url string, rootID int) (WatchOrderResult, error) { + proxyText, err := fetchProxyText(ctx, httpClient, url) + if err != nil { + return WatchOrderResult{}, err + } + + entries := parseJinaEntries(proxyText) + if len(entries) == 0 { + return WatchOrderResult{}, ErrWatchOrderMarkupNotFound + } + + return WatchOrderResult{ID: rootID, WatchOrder: entries}, nil +} + +func FetchWatchOrder(ctx context.Context, httpClient *http.Client, url string) (WatchOrderResult, error) { + rootID, err := parseRootID(url) + if err != nil { + return WatchOrderResult{}, err + } + + doc, err := fetchDocument(ctx, httpClient, url) + if err != nil { + if shouldTryProxy(err) { + return fetchViaProxy(ctx, httpClient, url, rootID) + } + return WatchOrderResult{}, err + } + + if !hasWatchOrderTable(doc) { + return fetchViaProxy(ctx, httpClient, url, rootID) + } + + rows := extractRows(doc) + if len(rows) == 0 { + return WatchOrderResult{ID: rootID, WatchOrder: []WatchOrderEntry{}}, nil + } + + typeByID := extractTypeLabelsByID(doc) + + entries := make([]WatchOrderEntry, 0, len(rows)) + for _, row := range rows { + typeName := strings.TrimSpace(typeByID[row.typeID]) + + entries = append(entries, WatchOrderEntry{ + ID: row.id, + Type: typeName, + Title: row.title, + TitleAlt: row.alternativeTitle, + }) + } + + return WatchOrderResult{ID: rootID, WatchOrder: entries}, nil +} diff --git a/internal/watchorder/watch_order_test.go b/internal/watchorder/watch_order_test.go new file mode 100644 index 0000000..7c61b16 --- /dev/null +++ b/internal/watchorder/watch_order_test.go @@ -0,0 +1,212 @@ +package watchorder + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func testServer(body string) *httptest.Server { + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + _, _ = w.Write([]byte(body)) + }) + + return httptest.NewServer(handler) +} + +func testHTMLWithMetadata() string { + return ` + + + +
+ + +
+ + + + +
+ Naruto Movie 1 + Naruto the Movie 1 +
+ +` +} + +func testHTMLEmptyRows() string { + return ` + + + +
+ + +
+
+ +` +} + +func TestFetchWatchOrder_OutputShape(t *testing.T) { + server := testServer(testHTMLWithMetadata()) + defer server.Close() + + url := server.URL + "/?/tools/watch_order/id/442" + result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if result.ID != 442 { + t.Fatalf("expected root id 442, got %d", result.ID) + } + + if len(result.WatchOrder) != 1 { + t.Fatalf("expected 1 watch_order entry, got %d", len(result.WatchOrder)) + } + + entry := result.WatchOrder[0] + if entry.ID != 442 { + t.Fatalf("expected entry id 442, got %d", entry.ID) + } + if entry.Type != "Movie" { + t.Fatalf("expected type Movie, got %q", entry.Type) + } + if entry.Title != "Naruto Movie 1" { + t.Fatalf("expected title Naruto Movie 1, got %q", entry.Title) + } + if entry.TitleAlt != "Naruto the Movie 1" { + t.Fatalf("expected title_alt Naruto the Movie 1, got %q", entry.TitleAlt) + } +} + +func TestFetchWatchOrder_NoRowsReturnsEmpty(t *testing.T) { + server := testServer(testHTMLEmptyRows()) + defer server.Close() + + url := server.URL + "/?/tools/watch_order/id/1535" + result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if result.ID != 1535 { + t.Fatalf("expected root id 1535, got %d", result.ID) + } + + if len(result.WatchOrder) != 0 { + t.Fatalf("expected no entries, got %d", len(result.WatchOrder)) + } +} + +func TestFetchWatchOrder_MissingMarkupFallsBackToProxy(t *testing.T) { + proxyPayload := `Title: Jujutsu Kaisen / Watch Order +URL Source: https://chiaki.site/?/tools/watch_order/id/40748 + +Markdown Content: +Jujutsu Kaisen + + Oct 3, 2020 – Mar 27, 2021 | TV | 24ep × 23min. | ★8.51 | [](https://myanimelist.net/anime/40748) +Jujutsu Kaisen 0 Movie + +Jujutsu Kaisen 0 + + Dec 24, 2021 | Movie | 1ep × 1hr. 44min. | ★8.36 | [](https://myanimelist.net/anime/48561) +` + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.HasPrefix(r.URL.Path, "/http/") { + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(proxyPayload)) + return + } + + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte("blocked")) + })) + defer server.Close() + + transport := http.DefaultTransport + testClient := &http.Client{ + Timeout: time.Second, + Transport: roundTripFunc(func(request *http.Request) (*http.Response, error) { + if strings.HasPrefix(request.URL.Host, "r.jina.ai") { + proxyURL := server.URL + "/http/" + strings.TrimPrefix(request.URL.Path, "/") + proxyRequest, err := http.NewRequestWithContext(request.Context(), request.Method, proxyURL, nil) + if err != nil { + return nil, err + } + return transport.RoundTrip(proxyRequest) + } + + blockedURL := server.URL + request.URL.Path + blockedRequest, err := http.NewRequestWithContext(request.Context(), request.Method, blockedURL, nil) + if err != nil { + return nil, err + } + return transport.RoundTrip(blockedRequest) + }), + } + + result, err := FetchWatchOrder(context.Background(), testClient, "https://chiaki.site/?/tools/watch_order/id/40748") + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if len(result.WatchOrder) != 2 { + t.Fatalf("expected 2 proxy entries, got %d", len(result.WatchOrder)) + } + + if result.WatchOrder[0].ID != 40748 || result.WatchOrder[0].Type != "TV" { + t.Fatalf("unexpected first entry: %+v", result.WatchOrder[0]) + } + + if result.WatchOrder[1].ID != 48561 || result.WatchOrder[1].Type != "Movie" { + t.Fatalf("unexpected second entry: %+v", result.WatchOrder[1]) + } +} + +func TestFetchWatchOrder_HTTPStatusErrorIncludesContext(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Server", "cloudflare") + w.Header().Set("CF-Ray", "abc123") + w.Header().Set("Content-Type", "text/html; charset=utf-8") + w.WriteHeader(http.StatusForbidden) + _, _ = w.Write([]byte("access denied")) + })) + defer server.Close() + + url := server.URL + "/?/tools/watch_order/id/1" + _, err := fetchDocument(context.Background(), &http.Client{Timeout: time.Second}, url) + if err == nil { + t.Fatalf("expected error, got nil") + } + + var statusError *HTTPStatusError + if !errors.As(err, &statusError) { + t.Fatalf("expected HTTPStatusError, got %T", err) + } + + if statusError.StatusCode != http.StatusForbidden { + t.Fatalf("expected 403, got %d", statusError.StatusCode) + } + if statusError.CFRay != "abc123" { + t.Fatalf("expected cf-ray abc123, got %q", statusError.CFRay) + } + if !strings.Contains(statusError.BodyPreview, "access denied") { + t.Fatalf("expected body preview to include access denied, got %q", statusError.BodyPreview) + } +} + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (f roundTripFunc) RoundTrip(request *http.Request) (*http.Response, error) { + return f(request) +}