From 6edf47c553f3c9c290611840fd556767f622ebf3 Mon Sep 17 00:00:00 2001 From: mkelvers Date: Sat, 11 Apr 2026 22:06:26 +0200 Subject: [PATCH] core: harden watch-order relations --- go.mod | 8 +- go.sum | 74 +++++- internal/jikan/relations.go | 296 +++++++----------------- internal/jikan/relations_test.go | 48 ++++ internal/watchorder/watch_order.go | 184 +++++++++++++++ internal/watchorder/watch_order_test.go | 105 +++++++++ 6 files changed, 505 insertions(+), 210 deletions(-) create mode 100644 internal/jikan/relations_test.go create mode 100644 internal/watchorder/watch_order.go create mode 100644 internal/watchorder/watch_order_test.go diff --git a/go.mod b/go.mod index b182bee..63b6804 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,14 @@ module mal go 1.24.0 require ( + github.com/PuerkitoBio/goquery v1.11.0 github.com/a-h/templ v0.3.1001 github.com/google/uuid v1.6.0 github.com/mattn/go-sqlite3 v1.14.40 - golang.org/x/crypto v0.31.0 + golang.org/x/crypto v0.45.0 +) + +require ( + github.com/andybalholm/cascadia v1.3.3 // indirect + golang.org/x/net v0.47.0 // indirect ) diff --git a/go.sum b/go.sum index f2490e2..f8d2911 100644 --- a/go.sum +++ b/go.sum @@ -1,12 +1,80 @@ +github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw= +github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ= github.com/a-h/templ v0.3.1001 h1:yHDTgexACdJttyiyamcTHXr2QkIeVF1MukLy44EAhMY= github.com/a-h/templ v0.3.1001/go.mod h1:oCZcnKRf5jjsGpf2yELzQfodLphd2mwecwG4Crk5HBo= +github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= +github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= -github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/mattn/go-sqlite3 v1.14.40 h1:f7+saIsbq4EF86mUqe0uiecQOJYMOdfi5uATADmUG94= github.com/mattn/go-sqlite3 v1.14.40/go.mod h1:pjEuOr8IwzLJP2MfGeTb0A35jauH+C2kbHKBr7yXKVQ= -golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/jikan/relations.go b/internal/jikan/relations.go index 080d9fd..0fb68fa 100644 --- a/internal/jikan/relations.go +++ b/internal/jikan/relations.go @@ -2,245 +2,129 @@ package jikan import ( "context" - "slices" - "sort" + "fmt" "strings" "time" + + "mal/internal/watchorder" ) -var canonicalRelationOrder = []string{ - "prequel", - "sequel", - "parent story", - "full story", - "alternative version", - "alternative setting", -} +const chiakiWatchOrderURL = "https://chiaki.site/?/tools/watch_order/id/%d" +const watchOrderCacheTTL = time.Hour * 24 +const maxWatchOrderEntries = 120 -var extraRelationOrder = []string{ - "side story", - "spin-off", - "summary", - "other", -} - -var relationPriorityOrder = append( - append([]string{}, canonicalRelationOrder...), - extraRelationOrder..., -) - -func relationKey(rel string) string { - key := strings.ToLower(strings.TrimSpace(rel)) - key = strings.ReplaceAll(key, "_", " ") - key = strings.Join(strings.Fields(key), " ") - - switch key { - case "prequels": - return "prequel" - case "sequels": - return "sequel" - case "side stories": - return "side story" - case "spin off", "spinoff": - return "spin-off" +func watchOrderTypeLabel(value string) string { + normalized := strings.ToLower(strings.TrimSpace(value)) + switch normalized { + case "tv": + return "TV" + case "movie": + return "Movie" default: - return key + return strings.TrimSpace(value) } } -func relationLabel(rel string) string { - key := relationKey(rel) - switch key { - case "prequel": - return "Prequels" - case "sequel": - return "Sequels" - case "parent story": - return "Parent story" - case "full story": - return "Full story" - case "alternative version": - return "Alternative version" - case "alternative setting": - return "Alternative setting" - case "side story": - return "Side story" - case "spin-off": - return "Spin-off" - case "summary": - return "Summary" - case "other": - return "Other" - default: - return strings.TrimSpace(rel) - } +func isAllowedWatchOrderType(value string) bool { + normalized := strings.ToLower(strings.TrimSpace(value)) + return normalized == "tv" || normalized == "movie" } -func isCanonicalRelation(rel string) bool { - return slices.Contains(canonicalRelationOrder, relationKey(rel)) +func relationCacheKey(id int) string { + return fmt.Sprintf("relations:watch-order:%d", id) } -func isExtraRelation(rel string) bool { - return slices.Contains(extraRelationOrder, relationKey(rel)) -} +func (c *Client) getWatchOrder(ctx context.Context, id int) (watchorder.WatchOrderResult, error) { + cacheKey := relationCacheKey(id) -func isFranchiseRelation(rel string) bool { - return isCanonicalRelation(rel) || isExtraRelation(rel) -} - -func relationOrder(rel string) int { - key := relationKey(rel) - for i, allowed := range relationPriorityOrder { - if key == allowed { - return i - } - } - return len(relationPriorityOrder) + 1 -} - -func relationAiredAt(anime Anime) (time.Time, bool) { - from := strings.TrimSpace(anime.Aired.From) - if from != "" { - if parsed, err := time.Parse(time.RFC3339, from); err == nil { - return parsed, true - } - if parsed, err := time.Parse("2006-01-02", from); err == nil { - return parsed, true - } + var cached watchorder.WatchOrderResult + if c.getCache(ctx, cacheKey, &cached) { + return cached, nil } - if anime.Year > 0 { - return time.Date(anime.Year, time.January, 1, 0, 0, 0, 0, time.UTC), true + watchOrderURL := fmt.Sprintf(chiakiWatchOrderURL, id) + requestCtx, cancel := context.WithTimeout(ctx, 8*time.Second) + defer cancel() + + result, err := watchorder.FetchWatchOrder(requestCtx, c.httpClient, watchOrderURL) + if err != nil { + return watchorder.WatchOrderResult{}, err } - return time.Time{}, false + c.setCache(ctx, cacheKey, result, watchOrderCacheTTL) + return result, nil } -func sortRelationEntriesChronological(entries []RelationEntry) { - sort.SliceStable(entries, func(i int, j int) bool { - left := entries[i] - right := entries[j] - - leftAiredAt, leftHasAiredAt := relationAiredAt(left.Anime) - rightAiredAt, rightHasAiredAt := relationAiredAt(right.Anime) - - if leftHasAiredAt != rightHasAiredAt { - return leftHasAiredAt - } - - if leftHasAiredAt && !leftAiredAt.Equal(rightAiredAt) { - return leftAiredAt.Before(rightAiredAt) - } - - leftRelationOrder := relationOrder(left.Relation) - rightRelationOrder := relationOrder(right.Relation) - if leftRelationOrder != rightRelationOrder { - return leftRelationOrder < rightRelationOrder - } - - leftTitle := strings.ToLower(left.Anime.DisplayTitle()) - rightTitle := strings.ToLower(right.Anime.DisplayTitle()) - if leftTitle != rightTitle { - return leftTitle < rightTitle - } - - return left.Anime.MalID < right.Anime.MalID - }) -} - -func relationEntries(ctx context.Context, c *Client, anime Anime) ([]RelationEntry, error) { - entries := make([]RelationEntry, 0) - - for _, group := range anime.Relations { - if !isFranchiseRelation(group.Relation) { - continue - } - - for _, entry := range group.Entry { - if entry.Type != "anime" { - continue - } - - relAnime, err := c.GetAnimeByID(ctx, entry.MalID) - if err != nil { - return nil, err - } - - entries = append(entries, RelationEntry{ - Anime: relAnime, - Relation: relationLabel(group.Relation), - IsCurrent: false, - IsExtra: !isCanonicalRelation(group.Relation), - }) - } - } - - return entries, nil -} - -func relationMap(ctx context.Context, c *Client, id int) (map[int]RelationEntry, error) { +func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEntry, error) { currentAnime, err := c.GetAnimeByID(ctx, id) if err != nil { return nil, err } - result := map[int]RelationEntry{ - currentAnime.MalID: { - Anime: currentAnime, - Relation: "Current", - IsCurrent: true, - IsExtra: false, - }, + return []RelationEntry{{ + Anime: currentAnime, + Relation: "Current", + IsCurrent: true, + IsExtra: false, + }}, nil +} + +func (c *Client) GetFullRelations(ctx context.Context, id int) ([]RelationEntry, error) { + result, err := c.getWatchOrder(ctx, id) + if err != nil { + return c.currentOnlyRelation(ctx, id) } - queue := []Anime{currentAnime} - visited := map[int]bool{currentAnime.MalID: true} + seen := make(map[int]bool) + relations := make([]RelationEntry, 0, len(result.WatchOrder)+1) - for len(queue) > 0 { - anime := queue[0] - queue = queue[1:] + for _, watchOrderEntry := range result.WatchOrder { + if len(relations) >= maxWatchOrderEntries { + break + } - entries, err := relationEntries(ctx, c, anime) + if !isAllowedWatchOrderType(watchOrderEntry.Type) { + continue + } + + if seen[watchOrderEntry.ID] { + continue + } + + anime, err := c.GetAnimeByID(ctx, watchOrderEntry.ID) + if err != nil { + continue + } + + seen[watchOrderEntry.ID] = true + relations = append(relations, RelationEntry{ + Anime: anime, + Relation: watchOrderTypeLabel(watchOrderEntry.Type), + IsCurrent: watchOrderEntry.ID == id, + IsExtra: false, + }) + if watchOrderEntry.ID == id { + relations[len(relations)-1].Relation = "Current" + } + } + + if !seen[id] { + currentAnime, err := c.GetAnimeByID(ctx, id) if err != nil { return nil, err } - for _, rel := range entries { - existing, exists := result[rel.Anime.MalID] - if !exists { - result[rel.Anime.MalID] = rel - } else if !existing.IsCurrent { - if existing.IsExtra && !rel.IsExtra { - // Prefer canonical timeline links over extras when both point to the same anime. - result[rel.Anime.MalID] = rel - } else if existing.IsExtra && rel.IsExtra && relationOrder(rel.Relation) < relationOrder(existing.Relation) { - // Keep the most specific extra label when multiple extra relations exist. - result[rel.Anime.MalID] = rel - } - } - - if !rel.IsExtra && !visited[rel.Anime.MalID] { - visited[rel.Anime.MalID] = true - queue = append(queue, rel.Anime) - } - } + relations = append([]RelationEntry{{ + Anime: currentAnime, + Relation: "Current", + IsCurrent: true, + IsExtra: false, + }}, relations...) } - return result, nil -} - -func (c *Client) GetFullRelations(ctx context.Context, id int) ([]RelationEntry, error) { - relationByID, err := relationMap(ctx, c, id) - if err != nil { - return nil, err - } - - ordered := make([]RelationEntry, 0, len(relationByID)) - for _, entry := range relationByID { - ordered = append(ordered, entry) - } - - sortRelationEntriesChronological(ordered) - - return ordered, nil + if len(relations) == 0 { + return c.currentOnlyRelation(ctx, id) + } + + return relations, nil } diff --git a/internal/jikan/relations_test.go b/internal/jikan/relations_test.go new file mode 100644 index 0000000..e69bcae --- /dev/null +++ b/internal/jikan/relations_test.go @@ -0,0 +1,48 @@ +package jikan + +import "testing" + +func TestIsAllowedWatchOrderType(t *testing.T) { + tests := []struct { + name string + input string + want bool + }{ + {name: "tv", input: "tv", want: true}, + {name: "movie", input: "movie", want: true}, + {name: "case and whitespace", input: " TV ", want: true}, + {name: "tv special", input: "tv special", want: false}, + {name: "ova", input: "ova", want: false}, + {name: "empty", input: "", want: false}, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + got := isAllowedWatchOrderType(testCase.input) + if got != testCase.want { + t.Fatalf("expected %v, got %v", testCase.want, got) + } + }) + } +} + +func TestWatchOrderTypeLabel(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + {name: "tv", input: "tv", want: "TV"}, + {name: "movie", input: "movie", want: "Movie"}, + {name: "trimmed passthrough", input: " tv special ", want: "tv special"}, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + got := watchOrderTypeLabel(testCase.input) + if got != testCase.want { + t.Fatalf("expected %q, got %q", testCase.want, got) + } + }) + } +} diff --git a/internal/watchorder/watch_order.go b/internal/watchorder/watch_order.go new file mode 100644 index 0000000..a847f51 --- /dev/null +++ b/internal/watchorder/watch_order.go @@ -0,0 +1,184 @@ +package watchorder + +import ( + "context" + "errors" + "fmt" + "net/http" + "regexp" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +const defaultUserAgent = "anime-relations-scraper/1.0 (+https://github.com/mkelvers/anime-relations)" + +var idPattern = regexp.MustCompile(`/id/(\d+)`) + +var ErrInvalidWatchOrderURL = errors.New("invalid watch order url") + +type WatchOrderEntry struct { + ID int `json:"id"` + Type string `json:"type"` + Title string `json:"title"` + TitleAlt string `json:"title_alt,omitempty"` +} + +type WatchOrderResult struct { + ID int `json:"id"` + WatchOrder []WatchOrderEntry `json:"watch_order"` +} + +type watchOrderRow struct { + id int + typeID int + title string + alternativeTitle string +} + +func parseRootID(url string) (int, error) { + match := idPattern.FindStringSubmatch(url) + if len(match) != 2 { + return 0, ErrInvalidWatchOrderURL + } + + id, err := strconv.Atoi(match[1]) + if err != nil { + return 0, ErrInvalidWatchOrderURL + } + + return id, nil +} + +func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*goquery.Document, error) { + client := httpClient + if client == nil { + client = http.DefaultClient + } + + request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + request.Header.Set("User-Agent", defaultUserAgent) + + response, err := client.Do(request) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + defer response.Body.Close() + + if response.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", response.StatusCode) + } + + document, err := goquery.NewDocumentFromReader(response.Body) + if err != nil { + return nil, fmt.Errorf("failed to parse html: %w", err) + } + + return document, nil +} + +func extractTypeLabelsByID(doc *goquery.Document) map[int]string { + typeLabels := make(map[int]string) + + doc.Find("#wo_type_filter label").Each(func(_ int, selection *goquery.Selection) { + input := selection.Find("input[type='checkbox']") + rawID, exists := input.Attr("value") + if !exists { + return + } + + typeID, err := strconv.Atoi(strings.TrimSpace(rawID)) + if err != nil { + return + } + + label := strings.TrimSpace(selection.Text()) + if label == "" { + return + } + + typeLabels[typeID] = label + }) + + return typeLabels +} + +func parseAttrInt(selection *goquery.Selection, attrName string) (int, bool) { + rawValue, exists := selection.Attr(attrName) + if !exists { + return 0, false + } + + value, err := strconv.Atoi(strings.TrimSpace(rawValue)) + if err != nil { + return 0, false + } + + return value, true +} + +func extractRows(doc *goquery.Document) []watchOrderRow { + rows := make([]watchOrderRow, 0) + + doc.Find("tr[data-id]").Each(func(_ int, selection *goquery.Selection) { + id, ok := parseAttrInt(selection, "data-id") + if !ok { + return + } + + typeID, ok := parseAttrInt(selection, "data-type") + if !ok { + return + } + + title := strings.TrimSpace(selection.Find(".wo_title").First().Text()) + alternativeTitle := strings.TrimSpace(selection.Find(".uk-text-small").First().Text()) + + rows = append(rows, watchOrderRow{ + id: id, + typeID: typeID, + title: title, + alternativeTitle: alternativeTitle, + }) + }) + + return rows +} + +func FetchWatchOrder(ctx context.Context, httpClient *http.Client, url string) (WatchOrderResult, error) { + rootID, err := parseRootID(url) + if err != nil { + return WatchOrderResult{}, err + } + + doc, err := fetchDocument(ctx, httpClient, url) + if err != nil { + return WatchOrderResult{}, err + } + + rows := extractRows(doc) + if len(rows) == 0 { + return WatchOrderResult{ID: rootID, WatchOrder: []WatchOrderEntry{}}, nil + } + + typeByID := extractTypeLabelsByID(doc) + + entries := make([]WatchOrderEntry, 0, len(rows)) + for _, row := range rows { + typeName := strings.TrimSpace(typeByID[row.typeID]) + + entries = append(entries, WatchOrderEntry{ + ID: row.id, + Type: typeName, + Title: row.title, + TitleAlt: row.alternativeTitle, + }) + } + + return WatchOrderResult{ID: rootID, WatchOrder: entries}, nil +} diff --git a/internal/watchorder/watch_order_test.go b/internal/watchorder/watch_order_test.go new file mode 100644 index 0000000..5949321 --- /dev/null +++ b/internal/watchorder/watch_order_test.go @@ -0,0 +1,105 @@ +package watchorder + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" +) + +func testServer(body string) *httptest.Server { + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + _, _ = w.Write([]byte(body)) + }) + + return httptest.NewServer(handler) +} + +func testHTMLWithMetadata() string { + return ` + + + +
+ + +
+ + + + +
+ Naruto Movie 1 + Naruto the Movie 1 +
+ +` +} + +func testHTMLEmptyRows() string { + return ` + + + +
+ + +
+
+ +` +} + +func TestFetchWatchOrder_OutputShape(t *testing.T) { + server := testServer(testHTMLWithMetadata()) + defer server.Close() + + url := server.URL + "/?/tools/watch_order/id/442" + result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if result.ID != 442 { + t.Fatalf("expected root id 442, got %d", result.ID) + } + + if len(result.WatchOrder) != 1 { + t.Fatalf("expected 1 watch_order entry, got %d", len(result.WatchOrder)) + } + + entry := result.WatchOrder[0] + if entry.ID != 442 { + t.Fatalf("expected entry id 442, got %d", entry.ID) + } + if entry.Type != "Movie" { + t.Fatalf("expected type Movie, got %q", entry.Type) + } + if entry.Title != "Naruto Movie 1" { + t.Fatalf("expected title Naruto Movie 1, got %q", entry.Title) + } + if entry.TitleAlt != "Naruto the Movie 1" { + t.Fatalf("expected title_alt Naruto the Movie 1, got %q", entry.TitleAlt) + } +} + +func TestFetchWatchOrder_NoRowsReturnsEmpty(t *testing.T) { + server := testServer(testHTMLEmptyRows()) + defer server.Close() + + url := server.URL + "/?/tools/watch_order/id/1535" + result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if result.ID != 1535 { + t.Fatalf("expected root id 1535, got %d", result.ID) + } + + if len(result.WatchOrder) != 0 { + t.Fatalf("expected no entries, got %d", len(result.WatchOrder)) + } +}