From 9115e16334bb883a1f797b1a4d0313b4a7277141 Mon Sep 17 00:00:00 2001 From: mkelvers Date: Sat, 11 Apr 2026 22:32:56 +0200 Subject: [PATCH] core: use local watch-order store --- cmd/server/main.go | 18 +- internal/jikan/client.go | 15 +- internal/jikan/relations.go | 60 +------ internal/jikan/relations_test.go | 21 +++ internal/watchorder/store.go | 72 ++++++++ internal/watchorder/store_test.go | 72 ++++++++ internal/watchorder/types.go | 8 + internal/watchorder/watch_order.go | 230 ------------------------ internal/watchorder/watch_order_test.go | 160 ----------------- 9 files changed, 206 insertions(+), 450 deletions(-) create mode 100644 internal/watchorder/store.go create mode 100644 internal/watchorder/store_test.go create mode 100644 internal/watchorder/types.go delete mode 100644 internal/watchorder/watch_order.go delete mode 100644 internal/watchorder/watch_order_test.go diff --git a/cmd/server/main.go b/cmd/server/main.go index 3c96946..7bdad88 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -16,6 +16,7 @@ import ( "mal/internal/features/auth" "mal/internal/jikan" "mal/internal/server" + "mal/internal/watchorder" "mal/internal/worker" ) @@ -39,7 +40,22 @@ func main() { queries := database.New(db) authService := auth.NewService(queries) - jikanClient := jikan.NewClient(queries) + + watchOrderFile := os.Getenv("WATCH_ORDER_FILE") + if watchOrderFile == "" { + watchOrderFile = "./data/watch_order.json" + } + + watchOrderStore := watchorder.EmptyStore() + loadedStore, err := watchorder.LoadFromFile(watchOrderFile) + if err != nil { + log.Printf("watch-order: failed to load %s: %v", watchOrderFile, err) + } else { + watchOrderStore = loadedStore + log.Printf("watch-order: loaded %d entries from %s", watchOrderStore.Len(), watchOrderFile) + } + + jikanClient := jikan.NewClient(queries, watchOrderStore) // Start background workers relationsWorker := worker.New(queries, jikanClient) diff --git a/internal/jikan/client.go b/internal/jikan/client.go index f638a0e..f4ebfe0 100644 --- a/internal/jikan/client.go +++ b/internal/jikan/client.go @@ -9,21 +9,28 @@ import ( "time" "mal/internal/database" + "mal/internal/watchorder" ) type Client struct { httpClient *http.Client baseURL string db database.Querier + watchOrders *watchorder.Store mu sync.Mutex lastReqTime time.Time } -func NewClient(db database.Querier) *Client { +func NewClient(db database.Querier, watchOrders *watchorder.Store) *Client { + if watchOrders == nil { + watchOrders = watchorder.EmptyStore() + } + return &Client{ - httpClient: &http.Client{Timeout: 10 * time.Second}, - baseURL: "https://api.jikan.moe/v4", - db: db, + httpClient: &http.Client{Timeout: 10 * time.Second}, + baseURL: "https://api.jikan.moe/v4", + db: db, + watchOrders: watchOrders, } } diff --git a/internal/jikan/relations.go b/internal/jikan/relations.go index 308772f..198ece9 100644 --- a/internal/jikan/relations.go +++ b/internal/jikan/relations.go @@ -2,17 +2,10 @@ package jikan import ( "context" - "errors" - "fmt" "log" "strings" - "time" - - "mal/internal/watchorder" ) -const chiakiWatchOrderURL = "https://chiaki.site/?/tools/watch_order/id/%d" -const watchOrderCacheTTL = time.Hour * 24 const maxWatchOrderEntries = 120 func watchOrderTypeLabel(value string) string { @@ -32,49 +25,6 @@ func isAllowedWatchOrderType(value string) bool { return normalized == "tv" || normalized == "movie" } -func relationCacheKey(id int) string { - return fmt.Sprintf("relations:watch-order:%d", id) -} - -func (c *Client) getWatchOrder(ctx context.Context, id int) (watchorder.WatchOrderResult, error) { - cacheKey := relationCacheKey(id) - - var cached watchorder.WatchOrderResult - if c.getCache(ctx, cacheKey, &cached) { - return cached, nil - } - - watchOrderURL := fmt.Sprintf(chiakiWatchOrderURL, id) - requestCtx, cancel := context.WithTimeout(ctx, 8*time.Second) - defer cancel() - - result, err := watchorder.FetchWatchOrder(requestCtx, c.httpClient, watchOrderURL) - if err != nil { - var statusError *watchorder.HTTPStatusError - if errors.Is(err, watchorder.ErrWatchOrderMarkupNotFound) { - log.Printf("relations: watch-order markup missing for %d (%s): %v", id, watchOrderURL, err) - } else if errors.As(err, &statusError) { - log.Printf( - "relations: watch-order http error for %d (%s): status=%d server=%q cf_ray=%q location=%q content_type=%q body=%q", - id, - watchOrderURL, - statusError.StatusCode, - statusError.Server, - statusError.CFRay, - statusError.Location, - statusError.ContentType, - statusError.BodyPreview, - ) - } else { - log.Printf("relations: watch-order fetch failed for %d (%s): %v", id, watchOrderURL, err) - } - return watchorder.WatchOrderResult{}, err - } - - c.setCache(ctx, cacheKey, result, watchOrderCacheTTL) - return result, nil -} - func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEntry, error) { currentAnime, err := c.GetAnimeByID(ctx, id) if err != nil { @@ -90,16 +40,16 @@ func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEnt } func (c *Client) GetFullRelations(ctx context.Context, id int) ([]RelationEntry, error) { - result, err := c.getWatchOrder(ctx, id) - if err != nil { - log.Printf("relations: using current-only fallback for %d: %v", id, err) + watchOrder, found := c.watchOrders.Get(id) + if !found { + log.Printf("relations: no local watch-order data for %d", id) return c.currentOnlyRelation(ctx, id) } seen := make(map[int]bool) - relations := make([]RelationEntry, 0, len(result.WatchOrder)+1) + relations := make([]RelationEntry, 0, len(watchOrder)+1) - for _, watchOrderEntry := range result.WatchOrder { + for _, watchOrderEntry := range watchOrder { if len(relations) >= maxWatchOrderEntries { break } diff --git a/internal/jikan/relations_test.go b/internal/jikan/relations_test.go index e69bcae..125517a 100644 --- a/internal/jikan/relations_test.go +++ b/internal/jikan/relations_test.go @@ -46,3 +46,24 @@ func TestWatchOrderTypeLabel(t *testing.T) { }) } } + +func TestAllowedWatchOrderTypeFromDataset(t *testing.T) { + tests := []struct { + name string + input string + want bool + }{ + {name: "label tv", input: "TV", want: true}, + {name: "label movie", input: "Movie", want: true}, + {name: "label special", input: "Special", want: false}, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + got := isAllowedWatchOrderType(testCase.input) + if got != testCase.want { + t.Fatalf("expected %v, got %v", testCase.want, got) + } + }) + } +} diff --git a/internal/watchorder/store.go b/internal/watchorder/store.go new file mode 100644 index 0000000..42c1158 --- /dev/null +++ b/internal/watchorder/store.go @@ -0,0 +1,72 @@ +package watchorder + +import ( + "encoding/json" + "fmt" + "os" + "strconv" +) + +type Store struct { + byID map[int][]WatchOrderEntry +} + +func EmptyStore() *Store { + return &Store{byID: make(map[int][]WatchOrderEntry)} +} + +func (s *Store) Len() int { + if s == nil { + return 0 + } + + return len(s.byID) +} + +func (s *Store) Get(id int) ([]WatchOrderEntry, bool) { + if s == nil { + return nil, false + } + + entries, ok := s.byID[id] + if !ok { + return nil, false + } + + return entries, true +} + +func LoadFromFile(path string) (*Store, error) { + content, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read watch-order file %q: %w", path, err) + } + + rawMessages := make(map[string]json.RawMessage) + if err := json.Unmarshal(content, &rawMessages); err != nil { + return nil, fmt.Errorf("failed to parse watch-order file %q: %w", path, err) + } + + raw := make(map[string][]WatchOrderEntry) + if wrappedData, ok := rawMessages["data"]; ok && len(rawMessages) == 1 { + if err := json.Unmarshal(wrappedData, &raw); err != nil { + return nil, fmt.Errorf("failed to parse watch-order data in file %q: %w", path, err) + } + } else { + if err := json.Unmarshal(content, &raw); err != nil { + return nil, fmt.Errorf("failed to parse watch-order file %q: %w", path, err) + } + } + + byID := make(map[int][]WatchOrderEntry, len(raw)) + for key, entries := range raw { + id, err := strconv.Atoi(key) + if err != nil { + return nil, fmt.Errorf("invalid anime id key %q in watch-order file %q: %w", key, path, err) + } + + byID[id] = entries + } + + return &Store{byID: byID}, nil +} diff --git a/internal/watchorder/store_test.go b/internal/watchorder/store_test.go new file mode 100644 index 0000000..5edccf1 --- /dev/null +++ b/internal/watchorder/store_test.go @@ -0,0 +1,72 @@ +package watchorder + +import ( + "os" + "path/filepath" + "testing" +) + +func TestLoadFromFile_Success(t *testing.T) { + temporaryDirectory := t.TempDir() + filePath := filepath.Join(temporaryDirectory, "watch_order.json") + + content := `{ + "1": [{"id": 1, "type": "TV", "title": "One"}], + "2": [{"id": 2, "type": "Movie", "title": "Two"}] +}` + + if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil { + t.Fatalf("failed to write file: %v", err) + } + + store, err := LoadFromFile(filePath) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if store.Len() != 2 { + t.Fatalf("expected 2 ids, got %d", store.Len()) + } + + entries, ok := store.Get(1) + if !ok { + t.Fatalf("expected id 1 to exist") + } + + if len(entries) != 1 || entries[0].ID != 1 { + t.Fatalf("unexpected entries for id 1: %+v", entries) + } +} + +func TestLoadFromFile_InvalidIDKey(t *testing.T) { + temporaryDirectory := t.TempDir() + filePath := filepath.Join(temporaryDirectory, "watch_order.json") + + if err := os.WriteFile(filePath, []byte(`{"abc": []}`), 0o644); err != nil { + t.Fatalf("failed to write file: %v", err) + } + + _, err := LoadFromFile(filePath) + if err == nil { + t.Fatalf("expected error for invalid id key") + } +} + +func TestLoadFromFile_WrappedPayload(t *testing.T) { + temporaryDirectory := t.TempDir() + filePath := filepath.Join(temporaryDirectory, "watch_order.json") + + content := `{"data":{"10":[{"id":10,"type":"TV","title":"Ten"}]}}` + if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil { + t.Fatalf("failed to write file: %v", err) + } + + store, err := LoadFromFile(filePath) + if err != nil { + t.Fatalf("expected no error, got %v", err) + } + + if store.Len() != 1 { + t.Fatalf("expected 1 id, got %d", store.Len()) + } +} diff --git a/internal/watchorder/types.go b/internal/watchorder/types.go new file mode 100644 index 0000000..87f394f --- /dev/null +++ b/internal/watchorder/types.go @@ -0,0 +1,8 @@ +package watchorder + +type WatchOrderEntry struct { + ID int `json:"id"` + Type string `json:"type"` + Title string `json:"title"` + TitleAlt string `json:"title_alt,omitempty"` +} diff --git a/internal/watchorder/watch_order.go b/internal/watchorder/watch_order.go deleted file mode 100644 index 23ff4f6..0000000 --- a/internal/watchorder/watch_order.go +++ /dev/null @@ -1,230 +0,0 @@ -package watchorder - -import ( - "context" - "errors" - "fmt" - "io" - "net/http" - "regexp" - "strconv" - "strings" - - "github.com/PuerkitoBio/goquery" -) - -const defaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36" - -var idPattern = regexp.MustCompile(`/id/(\d+)`) - -var ErrInvalidWatchOrderURL = errors.New("invalid watch order url") -var ErrWatchOrderMarkupNotFound = errors.New("watch order markup not found") - -type HTTPStatusError struct { - StatusCode int - URL string - Server string - CFRay string - Location string - ContentType string - BodyPreview string -} - -func (e *HTTPStatusError) Error() string { - return fmt.Sprintf( - "unexpected status code: %d (url=%s server=%s cf_ray=%s location=%s content_type=%s body=%q)", - e.StatusCode, - e.URL, - e.Server, - e.CFRay, - e.Location, - e.ContentType, - e.BodyPreview, - ) -} - -type WatchOrderEntry struct { - ID int `json:"id"` - Type string `json:"type"` - Title string `json:"title"` - TitleAlt string `json:"title_alt,omitempty"` -} - -type WatchOrderResult struct { - ID int `json:"id"` - WatchOrder []WatchOrderEntry `json:"watch_order"` -} - -type watchOrderRow struct { - id int - typeID int - title string - alternativeTitle string -} - -func parseRootID(url string) (int, error) { - match := idPattern.FindStringSubmatch(url) - if len(match) != 2 { - return 0, ErrInvalidWatchOrderURL - } - - id, err := strconv.Atoi(match[1]) - if err != nil { - return 0, ErrInvalidWatchOrderURL - } - - return id, nil -} - -func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*goquery.Document, error) { - client := httpClient - if client == nil { - client = http.DefaultClient - } - - request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) - } - - request.Header.Set("User-Agent", defaultUserAgent) - request.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8") - request.Header.Set("Accept-Language", "en-US,en;q=0.9") - request.Header.Set("Referer", "https://chiaki.site/") - request.Header.Set("Cache-Control", "no-cache") - - response, err := client.Do(request) - if err != nil { - return nil, fmt.Errorf("request failed: %w", err) - } - defer response.Body.Close() - - if response.StatusCode != http.StatusOK { - body, _ := io.ReadAll(io.LimitReader(response.Body, 512)) - return nil, &HTTPStatusError{ - StatusCode: response.StatusCode, - URL: url, - Server: strings.TrimSpace(response.Header.Get("Server")), - CFRay: strings.TrimSpace(response.Header.Get("CF-Ray")), - Location: strings.TrimSpace(response.Header.Get("Location")), - ContentType: strings.TrimSpace(response.Header.Get("Content-Type")), - BodyPreview: strings.Join(strings.Fields(strings.TrimSpace(string(body))), " "), - } - } - - document, err := goquery.NewDocumentFromReader(response.Body) - if err != nil { - return nil, fmt.Errorf("failed to parse html: %w", err) - } - - return document, nil -} - -func extractTypeLabelsByID(doc *goquery.Document) map[int]string { - typeLabels := make(map[int]string) - - doc.Find("#wo_type_filter label").Each(func(_ int, selection *goquery.Selection) { - input := selection.Find("input[type='checkbox']") - rawID, exists := input.Attr("value") - if !exists { - return - } - - typeID, err := strconv.Atoi(strings.TrimSpace(rawID)) - if err != nil { - return - } - - label := strings.TrimSpace(selection.Text()) - if label == "" { - return - } - - typeLabels[typeID] = label - }) - - return typeLabels -} - -func parseAttrInt(selection *goquery.Selection, attrName string) (int, bool) { - rawValue, exists := selection.Attr(attrName) - if !exists { - return 0, false - } - - value, err := strconv.Atoi(strings.TrimSpace(rawValue)) - if err != nil { - return 0, false - } - - return value, true -} - -func extractRows(doc *goquery.Document) []watchOrderRow { - rows := make([]watchOrderRow, 0) - - doc.Find("tr[data-id]").Each(func(_ int, selection *goquery.Selection) { - id, ok := parseAttrInt(selection, "data-id") - if !ok { - return - } - - typeID, ok := parseAttrInt(selection, "data-type") - if !ok { - return - } - - title := strings.TrimSpace(selection.Find(".wo_title").First().Text()) - alternativeTitle := strings.TrimSpace(selection.Find(".uk-text-small").First().Text()) - - rows = append(rows, watchOrderRow{ - id: id, - typeID: typeID, - title: title, - alternativeTitle: alternativeTitle, - }) - }) - - return rows -} - -func hasWatchOrderTable(doc *goquery.Document) bool { - return doc.Find("#wo_list").Length() > 0 -} - -func FetchWatchOrder(ctx context.Context, httpClient *http.Client, url string) (WatchOrderResult, error) { - rootID, err := parseRootID(url) - if err != nil { - return WatchOrderResult{}, err - } - - doc, err := fetchDocument(ctx, httpClient, url) - if err != nil { - return WatchOrderResult{}, err - } - - if !hasWatchOrderTable(doc) { - return WatchOrderResult{}, ErrWatchOrderMarkupNotFound - } - - rows := extractRows(doc) - if len(rows) == 0 { - return WatchOrderResult{ID: rootID, WatchOrder: []WatchOrderEntry{}}, nil - } - - typeByID := extractTypeLabelsByID(doc) - - entries := make([]WatchOrderEntry, 0, len(rows)) - for _, row := range rows { - typeName := strings.TrimSpace(typeByID[row.typeID]) - - entries = append(entries, WatchOrderEntry{ - ID: row.id, - Type: typeName, - Title: row.title, - TitleAlt: row.alternativeTitle, - }) - } - - return WatchOrderResult{ID: rootID, WatchOrder: entries}, nil -} diff --git a/internal/watchorder/watch_order_test.go b/internal/watchorder/watch_order_test.go deleted file mode 100644 index 077e8fa..0000000 --- a/internal/watchorder/watch_order_test.go +++ /dev/null @@ -1,160 +0,0 @@ -package watchorder - -import ( - "context" - "errors" - "net/http" - "net/http/httptest" - "strings" - "testing" - "time" -) - -func testServer(body string) *httptest.Server { - handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "text/html; charset=utf-8") - _, _ = w.Write([]byte(body)) - }) - - return httptest.NewServer(handler) -} - -func testHTMLWithMetadata() string { - return ` - - - -
- - -
- - - - -
- Naruto Movie 1 - Naruto the Movie 1 -
- -` -} - -func testHTMLEmptyRows() string { - return ` - - - -
- - -
-
- -` -} - -func testHTMLWithoutWatchOrderTable() string { - return ` - - - -

challenge page

- -` -} - -func TestFetchWatchOrder_OutputShape(t *testing.T) { - server := testServer(testHTMLWithMetadata()) - defer server.Close() - - url := server.URL + "/?/tools/watch_order/id/442" - result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - if result.ID != 442 { - t.Fatalf("expected root id 442, got %d", result.ID) - } - - if len(result.WatchOrder) != 1 { - t.Fatalf("expected 1 watch_order entry, got %d", len(result.WatchOrder)) - } - - entry := result.WatchOrder[0] - if entry.ID != 442 { - t.Fatalf("expected entry id 442, got %d", entry.ID) - } - if entry.Type != "Movie" { - t.Fatalf("expected type Movie, got %q", entry.Type) - } - if entry.Title != "Naruto Movie 1" { - t.Fatalf("expected title Naruto Movie 1, got %q", entry.Title) - } - if entry.TitleAlt != "Naruto the Movie 1" { - t.Fatalf("expected title_alt Naruto the Movie 1, got %q", entry.TitleAlt) - } -} - -func TestFetchWatchOrder_NoRowsReturnsEmpty(t *testing.T) { - server := testServer(testHTMLEmptyRows()) - defer server.Close() - - url := server.URL + "/?/tools/watch_order/id/1535" - result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) - if err != nil { - t.Fatalf("expected no error, got %v", err) - } - - if result.ID != 1535 { - t.Fatalf("expected root id 1535, got %d", result.ID) - } - - if len(result.WatchOrder) != 0 { - t.Fatalf("expected no entries, got %d", len(result.WatchOrder)) - } -} - -func TestFetchWatchOrder_MissingMarkupReturnsError(t *testing.T) { - server := testServer(testHTMLWithoutWatchOrderTable()) - defer server.Close() - - url := server.URL + "/?/tools/watch_order/id/1535" - _, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) - if !errors.Is(err, ErrWatchOrderMarkupNotFound) { - t.Fatalf("expected ErrWatchOrderMarkupNotFound, got %v", err) - } -} - -func TestFetchWatchOrder_HTTPStatusErrorIncludesContext(t *testing.T) { - server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Server", "cloudflare") - w.Header().Set("CF-Ray", "abc123") - w.Header().Set("Content-Type", "text/html; charset=utf-8") - w.WriteHeader(http.StatusForbidden) - _, _ = w.Write([]byte("access denied")) - })) - defer server.Close() - - url := server.URL + "/?/tools/watch_order/id/1" - _, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url) - if err == nil { - t.Fatalf("expected error, got nil") - } - - var statusError *HTTPStatusError - if !errors.As(err, &statusError) { - t.Fatalf("expected HTTPStatusError, got %T", err) - } - - if statusError.StatusCode != http.StatusForbidden { - t.Fatalf("expected 403, got %d", statusError.StatusCode) - } - if statusError.CFRay != "abc123" { - t.Fatalf("expected cf-ray abc123, got %q", statusError.CFRay) - } - if !strings.Contains(statusError.BodyPreview, "access denied") { - t.Fatalf("expected body preview to include access denied, got %q", statusError.BodyPreview) - } -}