core: use local watch-order store

This commit is contained in:
2026-04-11 22:32:56 +02:00
parent 600698e12a
commit 9115e16334
9 changed files with 206 additions and 450 deletions

View File

@@ -16,6 +16,7 @@ import (
"mal/internal/features/auth" "mal/internal/features/auth"
"mal/internal/jikan" "mal/internal/jikan"
"mal/internal/server" "mal/internal/server"
"mal/internal/watchorder"
"mal/internal/worker" "mal/internal/worker"
) )
@@ -39,7 +40,22 @@ func main() {
queries := database.New(db) queries := database.New(db)
authService := auth.NewService(queries) authService := auth.NewService(queries)
jikanClient := jikan.NewClient(queries)
watchOrderFile := os.Getenv("WATCH_ORDER_FILE")
if watchOrderFile == "" {
watchOrderFile = "./data/watch_order.json"
}
watchOrderStore := watchorder.EmptyStore()
loadedStore, err := watchorder.LoadFromFile(watchOrderFile)
if err != nil {
log.Printf("watch-order: failed to load %s: %v", watchOrderFile, err)
} else {
watchOrderStore = loadedStore
log.Printf("watch-order: loaded %d entries from %s", watchOrderStore.Len(), watchOrderFile)
}
jikanClient := jikan.NewClient(queries, watchOrderStore)
// Start background workers // Start background workers
relationsWorker := worker.New(queries, jikanClient) relationsWorker := worker.New(queries, jikanClient)

View File

@@ -9,21 +9,28 @@ import (
"time" "time"
"mal/internal/database" "mal/internal/database"
"mal/internal/watchorder"
) )
type Client struct { type Client struct {
httpClient *http.Client httpClient *http.Client
baseURL string baseURL string
db database.Querier db database.Querier
watchOrders *watchorder.Store
mu sync.Mutex mu sync.Mutex
lastReqTime time.Time lastReqTime time.Time
} }
func NewClient(db database.Querier) *Client { func NewClient(db database.Querier, watchOrders *watchorder.Store) *Client {
if watchOrders == nil {
watchOrders = watchorder.EmptyStore()
}
return &Client{ return &Client{
httpClient: &http.Client{Timeout: 10 * time.Second}, httpClient: &http.Client{Timeout: 10 * time.Second},
baseURL: "https://api.jikan.moe/v4", baseURL: "https://api.jikan.moe/v4",
db: db, db: db,
watchOrders: watchOrders,
} }
} }

View File

@@ -2,17 +2,10 @@ package jikan
import ( import (
"context" "context"
"errors"
"fmt"
"log" "log"
"strings" "strings"
"time"
"mal/internal/watchorder"
) )
const chiakiWatchOrderURL = "https://chiaki.site/?/tools/watch_order/id/%d"
const watchOrderCacheTTL = time.Hour * 24
const maxWatchOrderEntries = 120 const maxWatchOrderEntries = 120
func watchOrderTypeLabel(value string) string { func watchOrderTypeLabel(value string) string {
@@ -32,49 +25,6 @@ func isAllowedWatchOrderType(value string) bool {
return normalized == "tv" || normalized == "movie" return normalized == "tv" || normalized == "movie"
} }
func relationCacheKey(id int) string {
return fmt.Sprintf("relations:watch-order:%d", id)
}
func (c *Client) getWatchOrder(ctx context.Context, id int) (watchorder.WatchOrderResult, error) {
cacheKey := relationCacheKey(id)
var cached watchorder.WatchOrderResult
if c.getCache(ctx, cacheKey, &cached) {
return cached, nil
}
watchOrderURL := fmt.Sprintf(chiakiWatchOrderURL, id)
requestCtx, cancel := context.WithTimeout(ctx, 8*time.Second)
defer cancel()
result, err := watchorder.FetchWatchOrder(requestCtx, c.httpClient, watchOrderURL)
if err != nil {
var statusError *watchorder.HTTPStatusError
if errors.Is(err, watchorder.ErrWatchOrderMarkupNotFound) {
log.Printf("relations: watch-order markup missing for %d (%s): %v", id, watchOrderURL, err)
} else if errors.As(err, &statusError) {
log.Printf(
"relations: watch-order http error for %d (%s): status=%d server=%q cf_ray=%q location=%q content_type=%q body=%q",
id,
watchOrderURL,
statusError.StatusCode,
statusError.Server,
statusError.CFRay,
statusError.Location,
statusError.ContentType,
statusError.BodyPreview,
)
} else {
log.Printf("relations: watch-order fetch failed for %d (%s): %v", id, watchOrderURL, err)
}
return watchorder.WatchOrderResult{}, err
}
c.setCache(ctx, cacheKey, result, watchOrderCacheTTL)
return result, nil
}
func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEntry, error) { func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEntry, error) {
currentAnime, err := c.GetAnimeByID(ctx, id) currentAnime, err := c.GetAnimeByID(ctx, id)
if err != nil { if err != nil {
@@ -90,16 +40,16 @@ func (c *Client) currentOnlyRelation(ctx context.Context, id int) ([]RelationEnt
} }
func (c *Client) GetFullRelations(ctx context.Context, id int) ([]RelationEntry, error) { func (c *Client) GetFullRelations(ctx context.Context, id int) ([]RelationEntry, error) {
result, err := c.getWatchOrder(ctx, id) watchOrder, found := c.watchOrders.Get(id)
if err != nil { if !found {
log.Printf("relations: using current-only fallback for %d: %v", id, err) log.Printf("relations: no local watch-order data for %d", id)
return c.currentOnlyRelation(ctx, id) return c.currentOnlyRelation(ctx, id)
} }
seen := make(map[int]bool) seen := make(map[int]bool)
relations := make([]RelationEntry, 0, len(result.WatchOrder)+1) relations := make([]RelationEntry, 0, len(watchOrder)+1)
for _, watchOrderEntry := range result.WatchOrder { for _, watchOrderEntry := range watchOrder {
if len(relations) >= maxWatchOrderEntries { if len(relations) >= maxWatchOrderEntries {
break break
} }

View File

@@ -46,3 +46,24 @@ func TestWatchOrderTypeLabel(t *testing.T) {
}) })
} }
} }
func TestAllowedWatchOrderTypeFromDataset(t *testing.T) {
tests := []struct {
name string
input string
want bool
}{
{name: "label tv", input: "TV", want: true},
{name: "label movie", input: "Movie", want: true},
{name: "label special", input: "Special", want: false},
}
for _, testCase := range tests {
t.Run(testCase.name, func(t *testing.T) {
got := isAllowedWatchOrderType(testCase.input)
if got != testCase.want {
t.Fatalf("expected %v, got %v", testCase.want, got)
}
})
}
}

View File

@@ -0,0 +1,72 @@
package watchorder
import (
"encoding/json"
"fmt"
"os"
"strconv"
)
type Store struct {
byID map[int][]WatchOrderEntry
}
func EmptyStore() *Store {
return &Store{byID: make(map[int][]WatchOrderEntry)}
}
func (s *Store) Len() int {
if s == nil {
return 0
}
return len(s.byID)
}
func (s *Store) Get(id int) ([]WatchOrderEntry, bool) {
if s == nil {
return nil, false
}
entries, ok := s.byID[id]
if !ok {
return nil, false
}
return entries, true
}
func LoadFromFile(path string) (*Store, error) {
content, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to read watch-order file %q: %w", path, err)
}
rawMessages := make(map[string]json.RawMessage)
if err := json.Unmarshal(content, &rawMessages); err != nil {
return nil, fmt.Errorf("failed to parse watch-order file %q: %w", path, err)
}
raw := make(map[string][]WatchOrderEntry)
if wrappedData, ok := rawMessages["data"]; ok && len(rawMessages) == 1 {
if err := json.Unmarshal(wrappedData, &raw); err != nil {
return nil, fmt.Errorf("failed to parse watch-order data in file %q: %w", path, err)
}
} else {
if err := json.Unmarshal(content, &raw); err != nil {
return nil, fmt.Errorf("failed to parse watch-order file %q: %w", path, err)
}
}
byID := make(map[int][]WatchOrderEntry, len(raw))
for key, entries := range raw {
id, err := strconv.Atoi(key)
if err != nil {
return nil, fmt.Errorf("invalid anime id key %q in watch-order file %q: %w", key, path, err)
}
byID[id] = entries
}
return &Store{byID: byID}, nil
}

View File

@@ -0,0 +1,72 @@
package watchorder
import (
"os"
"path/filepath"
"testing"
)
func TestLoadFromFile_Success(t *testing.T) {
temporaryDirectory := t.TempDir()
filePath := filepath.Join(temporaryDirectory, "watch_order.json")
content := `{
"1": [{"id": 1, "type": "TV", "title": "One"}],
"2": [{"id": 2, "type": "Movie", "title": "Two"}]
}`
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
t.Fatalf("failed to write file: %v", err)
}
store, err := LoadFromFile(filePath)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if store.Len() != 2 {
t.Fatalf("expected 2 ids, got %d", store.Len())
}
entries, ok := store.Get(1)
if !ok {
t.Fatalf("expected id 1 to exist")
}
if len(entries) != 1 || entries[0].ID != 1 {
t.Fatalf("unexpected entries for id 1: %+v", entries)
}
}
func TestLoadFromFile_InvalidIDKey(t *testing.T) {
temporaryDirectory := t.TempDir()
filePath := filepath.Join(temporaryDirectory, "watch_order.json")
if err := os.WriteFile(filePath, []byte(`{"abc": []}`), 0o644); err != nil {
t.Fatalf("failed to write file: %v", err)
}
_, err := LoadFromFile(filePath)
if err == nil {
t.Fatalf("expected error for invalid id key")
}
}
func TestLoadFromFile_WrappedPayload(t *testing.T) {
temporaryDirectory := t.TempDir()
filePath := filepath.Join(temporaryDirectory, "watch_order.json")
content := `{"data":{"10":[{"id":10,"type":"TV","title":"Ten"}]}}`
if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil {
t.Fatalf("failed to write file: %v", err)
}
store, err := LoadFromFile(filePath)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if store.Len() != 1 {
t.Fatalf("expected 1 id, got %d", store.Len())
}
}

View File

@@ -0,0 +1,8 @@
package watchorder
type WatchOrderEntry struct {
ID int `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
TitleAlt string `json:"title_alt,omitempty"`
}

View File

@@ -1,230 +0,0 @@
package watchorder
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"regexp"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
const defaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
var idPattern = regexp.MustCompile(`/id/(\d+)`)
var ErrInvalidWatchOrderURL = errors.New("invalid watch order url")
var ErrWatchOrderMarkupNotFound = errors.New("watch order markup not found")
type HTTPStatusError struct {
StatusCode int
URL string
Server string
CFRay string
Location string
ContentType string
BodyPreview string
}
func (e *HTTPStatusError) Error() string {
return fmt.Sprintf(
"unexpected status code: %d (url=%s server=%s cf_ray=%s location=%s content_type=%s body=%q)",
e.StatusCode,
e.URL,
e.Server,
e.CFRay,
e.Location,
e.ContentType,
e.BodyPreview,
)
}
type WatchOrderEntry struct {
ID int `json:"id"`
Type string `json:"type"`
Title string `json:"title"`
TitleAlt string `json:"title_alt,omitempty"`
}
type WatchOrderResult struct {
ID int `json:"id"`
WatchOrder []WatchOrderEntry `json:"watch_order"`
}
type watchOrderRow struct {
id int
typeID int
title string
alternativeTitle string
}
func parseRootID(url string) (int, error) {
match := idPattern.FindStringSubmatch(url)
if len(match) != 2 {
return 0, ErrInvalidWatchOrderURL
}
id, err := strconv.Atoi(match[1])
if err != nil {
return 0, ErrInvalidWatchOrderURL
}
return id, nil
}
func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*goquery.Document, error) {
client := httpClient
if client == nil {
client = http.DefaultClient
}
request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
request.Header.Set("User-Agent", defaultUserAgent)
request.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
request.Header.Set("Accept-Language", "en-US,en;q=0.9")
request.Header.Set("Referer", "https://chiaki.site/")
request.Header.Set("Cache-Control", "no-cache")
response, err := client.Do(request)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
defer response.Body.Close()
if response.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(response.Body, 512))
return nil, &HTTPStatusError{
StatusCode: response.StatusCode,
URL: url,
Server: strings.TrimSpace(response.Header.Get("Server")),
CFRay: strings.TrimSpace(response.Header.Get("CF-Ray")),
Location: strings.TrimSpace(response.Header.Get("Location")),
ContentType: strings.TrimSpace(response.Header.Get("Content-Type")),
BodyPreview: strings.Join(strings.Fields(strings.TrimSpace(string(body))), " "),
}
}
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
return nil, fmt.Errorf("failed to parse html: %w", err)
}
return document, nil
}
func extractTypeLabelsByID(doc *goquery.Document) map[int]string {
typeLabels := make(map[int]string)
doc.Find("#wo_type_filter label").Each(func(_ int, selection *goquery.Selection) {
input := selection.Find("input[type='checkbox']")
rawID, exists := input.Attr("value")
if !exists {
return
}
typeID, err := strconv.Atoi(strings.TrimSpace(rawID))
if err != nil {
return
}
label := strings.TrimSpace(selection.Text())
if label == "" {
return
}
typeLabels[typeID] = label
})
return typeLabels
}
func parseAttrInt(selection *goquery.Selection, attrName string) (int, bool) {
rawValue, exists := selection.Attr(attrName)
if !exists {
return 0, false
}
value, err := strconv.Atoi(strings.TrimSpace(rawValue))
if err != nil {
return 0, false
}
return value, true
}
func extractRows(doc *goquery.Document) []watchOrderRow {
rows := make([]watchOrderRow, 0)
doc.Find("tr[data-id]").Each(func(_ int, selection *goquery.Selection) {
id, ok := parseAttrInt(selection, "data-id")
if !ok {
return
}
typeID, ok := parseAttrInt(selection, "data-type")
if !ok {
return
}
title := strings.TrimSpace(selection.Find(".wo_title").First().Text())
alternativeTitle := strings.TrimSpace(selection.Find(".uk-text-small").First().Text())
rows = append(rows, watchOrderRow{
id: id,
typeID: typeID,
title: title,
alternativeTitle: alternativeTitle,
})
})
return rows
}
func hasWatchOrderTable(doc *goquery.Document) bool {
return doc.Find("#wo_list").Length() > 0
}
func FetchWatchOrder(ctx context.Context, httpClient *http.Client, url string) (WatchOrderResult, error) {
rootID, err := parseRootID(url)
if err != nil {
return WatchOrderResult{}, err
}
doc, err := fetchDocument(ctx, httpClient, url)
if err != nil {
return WatchOrderResult{}, err
}
if !hasWatchOrderTable(doc) {
return WatchOrderResult{}, ErrWatchOrderMarkupNotFound
}
rows := extractRows(doc)
if len(rows) == 0 {
return WatchOrderResult{ID: rootID, WatchOrder: []WatchOrderEntry{}}, nil
}
typeByID := extractTypeLabelsByID(doc)
entries := make([]WatchOrderEntry, 0, len(rows))
for _, row := range rows {
typeName := strings.TrimSpace(typeByID[row.typeID])
entries = append(entries, WatchOrderEntry{
ID: row.id,
Type: typeName,
Title: row.title,
TitleAlt: row.alternativeTitle,
})
}
return WatchOrderResult{ID: rootID, WatchOrder: entries}, nil
}

View File

@@ -1,160 +0,0 @@
package watchorder
import (
"context"
"errors"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
func testServer(body string) *httptest.Server {
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = w.Write([]byte(body))
})
return httptest.NewServer(handler)
}
func testHTMLWithMetadata() string {
return `
<!doctype html>
<html>
<body>
<div id="wo_type_filter">
<label><input type="checkbox" value="1" checked> TV</label>
<label><input type="checkbox" value="3" checked> Movie</label>
</div>
<table id="wo_list">
<tr data-id="442" data-anilist-id="442" data-type="3">
<td>
<span class="wo_title">Naruto Movie 1</span>
<span class="uk-text-small">Naruto the Movie 1</span>
</td>
</tr>
</table>
</body>
</html>`
}
func testHTMLEmptyRows() string {
return `
<!doctype html>
<html>
<body>
<div id="wo_type_filter">
<label><input type="checkbox" value="1" checked> TV</label>
<label><input type="checkbox" value="3" checked> Movie</label>
</div>
<table id="wo_list"></table>
</body>
</html>`
}
func testHTMLWithoutWatchOrderTable() string {
return `
<!doctype html>
<html>
<body>
<p>challenge page</p>
</body>
</html>`
}
func TestFetchWatchOrder_OutputShape(t *testing.T) {
server := testServer(testHTMLWithMetadata())
defer server.Close()
url := server.URL + "/?/tools/watch_order/id/442"
result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if result.ID != 442 {
t.Fatalf("expected root id 442, got %d", result.ID)
}
if len(result.WatchOrder) != 1 {
t.Fatalf("expected 1 watch_order entry, got %d", len(result.WatchOrder))
}
entry := result.WatchOrder[0]
if entry.ID != 442 {
t.Fatalf("expected entry id 442, got %d", entry.ID)
}
if entry.Type != "Movie" {
t.Fatalf("expected type Movie, got %q", entry.Type)
}
if entry.Title != "Naruto Movie 1" {
t.Fatalf("expected title Naruto Movie 1, got %q", entry.Title)
}
if entry.TitleAlt != "Naruto the Movie 1" {
t.Fatalf("expected title_alt Naruto the Movie 1, got %q", entry.TitleAlt)
}
}
func TestFetchWatchOrder_NoRowsReturnsEmpty(t *testing.T) {
server := testServer(testHTMLEmptyRows())
defer server.Close()
url := server.URL + "/?/tools/watch_order/id/1535"
result, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url)
if err != nil {
t.Fatalf("expected no error, got %v", err)
}
if result.ID != 1535 {
t.Fatalf("expected root id 1535, got %d", result.ID)
}
if len(result.WatchOrder) != 0 {
t.Fatalf("expected no entries, got %d", len(result.WatchOrder))
}
}
func TestFetchWatchOrder_MissingMarkupReturnsError(t *testing.T) {
server := testServer(testHTMLWithoutWatchOrderTable())
defer server.Close()
url := server.URL + "/?/tools/watch_order/id/1535"
_, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url)
if !errors.Is(err, ErrWatchOrderMarkupNotFound) {
t.Fatalf("expected ErrWatchOrderMarkupNotFound, got %v", err)
}
}
func TestFetchWatchOrder_HTTPStatusErrorIncludesContext(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Server", "cloudflare")
w.Header().Set("CF-Ray", "abc123")
w.Header().Set("Content-Type", "text/html; charset=utf-8")
w.WriteHeader(http.StatusForbidden)
_, _ = w.Write([]byte("<html><body>access denied</body></html>"))
}))
defer server.Close()
url := server.URL + "/?/tools/watch_order/id/1"
_, err := FetchWatchOrder(context.Background(), &http.Client{Timeout: time.Second}, url)
if err == nil {
t.Fatalf("expected error, got nil")
}
var statusError *HTTPStatusError
if !errors.As(err, &statusError) {
t.Fatalf("expected HTTPStatusError, got %T", err)
}
if statusError.StatusCode != http.StatusForbidden {
t.Fatalf("expected 403, got %d", statusError.StatusCode)
}
if statusError.CFRay != "abc123" {
t.Fatalf("expected cf-ray abc123, got %q", statusError.CFRay)
}
if !strings.Contains(statusError.BodyPreview, "access denied") {
t.Fatalf("expected body preview to include access denied, got %q", statusError.BodyPreview)
}
}