244 lines
5.6 KiB
Go
244 lines
5.6 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"mal/internal/watchorder"
|
|
)
|
|
|
|
const defaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
|
|
|
|
var idPattern = regexp.MustCompile(`/id/(\d+)`)
|
|
|
|
type seedPayload struct {
|
|
IDs []int `json:"ids"`
|
|
}
|
|
|
|
type outputPayload struct {
|
|
Data map[string][]watchorder.WatchOrderEntry `json:"data"`
|
|
}
|
|
|
|
func parseRootID(url string) (int, error) {
|
|
match := idPattern.FindStringSubmatch(url)
|
|
if len(match) != 2 {
|
|
return 0, fmt.Errorf("invalid watch-order url: %s", url)
|
|
}
|
|
|
|
id, err := strconv.Atoi(match[1])
|
|
if err != nil {
|
|
return 0, fmt.Errorf("invalid watch-order id in url %s: %w", url, err)
|
|
}
|
|
|
|
return id, nil
|
|
}
|
|
|
|
func fetchDocument(ctx context.Context, client *http.Client, url string) (*goquery.Document, error) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
req.Header.Set("User-Agent", defaultUserAgent)
|
|
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
|
|
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
|
req.Header.Set("Referer", "https://chiaki.site/")
|
|
req.Header.Set("Cache-Control", "no-cache")
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("status %d", resp.StatusCode)
|
|
}
|
|
|
|
return goquery.NewDocumentFromReader(resp.Body)
|
|
}
|
|
|
|
func parseRows(doc *goquery.Document) []watchorder.WatchOrderEntry {
|
|
entries := make([]watchorder.WatchOrderEntry, 0)
|
|
|
|
doc.Find("tr[data-id]").Each(func(_ int, selection *goquery.Selection) {
|
|
rawID, ok := selection.Attr("data-id")
|
|
if !ok {
|
|
return
|
|
}
|
|
|
|
id, err := strconv.Atoi(strings.TrimSpace(rawID))
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
typeLabel := ""
|
|
rawTypeID, hasType := selection.Attr("data-type")
|
|
if hasType {
|
|
typeID := strings.TrimSpace(rawTypeID)
|
|
typeLabel = mapTypeByID(doc, typeID)
|
|
}
|
|
|
|
title := strings.TrimSpace(selection.Find(".wo_title").First().Text())
|
|
titleAlt := strings.TrimSpace(selection.Find(".uk-text-small").First().Text())
|
|
|
|
entries = append(entries, watchorder.WatchOrderEntry{
|
|
ID: id,
|
|
Type: typeLabel,
|
|
Title: title,
|
|
TitleAlt: titleAlt,
|
|
})
|
|
})
|
|
|
|
return entries
|
|
}
|
|
|
|
func mapTypeByID(doc *goquery.Document, typeID string) string {
|
|
label := ""
|
|
doc.Find("#wo_type_filter label").EachWithBreak(func(_ int, selection *goquery.Selection) bool {
|
|
input := selection.Find("input[type='checkbox']")
|
|
value, ok := input.Attr("value")
|
|
if ok && strings.TrimSpace(value) == typeID {
|
|
label = strings.TrimSpace(selection.Text())
|
|
return false
|
|
}
|
|
return true
|
|
})
|
|
|
|
return label
|
|
}
|
|
|
|
func parseIDList(value string) ([]int, error) {
|
|
if strings.TrimSpace(value) == "" {
|
|
return []int{}, nil
|
|
}
|
|
|
|
parts := strings.Split(value, ",")
|
|
ids := make([]int, 0, len(parts))
|
|
for _, part := range parts {
|
|
trimmed := strings.TrimSpace(part)
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
|
|
id, err := strconv.Atoi(trimmed)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid id %q: %w", trimmed, err)
|
|
}
|
|
ids = append(ids, id)
|
|
}
|
|
|
|
return ids, nil
|
|
}
|
|
|
|
func loadSeedIDs(path string) ([]int, error) {
|
|
if strings.TrimSpace(path) == "" {
|
|
return []int{}, nil
|
|
}
|
|
|
|
content, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
payload := seedPayload{}
|
|
if err := json.Unmarshal(content, &payload); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return payload.IDs, nil
|
|
}
|
|
|
|
func sortAndUnique(ids []int) []int {
|
|
seen := make(map[int]bool)
|
|
unique := make([]int, 0, len(ids))
|
|
for _, id := range ids {
|
|
if id <= 0 || seen[id] {
|
|
continue
|
|
}
|
|
seen[id] = true
|
|
unique = append(unique, id)
|
|
}
|
|
|
|
sort.Ints(unique)
|
|
return unique
|
|
}
|
|
|
|
func main() {
|
|
outputPath := flag.String("out", "data/watch_order.json", "output json file path")
|
|
seedPath := flag.String("seed", "tmp/watch_order_seed_ids.json", "seed json file path with {\"ids\": [...]} ")
|
|
idList := flag.String("ids", "", "comma-separated MAL ids")
|
|
flag.Parse()
|
|
|
|
idsFromFlag, err := parseIDList(*idList)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
idsFromSeed, err := loadSeedIDs(*seedPath)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: failed to load seed ids: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
allIDs := sortAndUnique(append(idsFromSeed, idsFromFlag...))
|
|
if len(allIDs) == 0 {
|
|
fmt.Fprintln(os.Stderr, "error: no ids provided (use -seed and/or -ids)")
|
|
os.Exit(1)
|
|
}
|
|
|
|
httpClient := &http.Client{Timeout: 12 * time.Second}
|
|
ctx := context.Background()
|
|
|
|
data := make(map[string][]watchorder.WatchOrderEntry, len(allIDs))
|
|
for _, id := range allIDs {
|
|
url := fmt.Sprintf("https://chiaki.site/?/tools/watch_order/id/%d", id)
|
|
if _, err := parseRootID(url); err != nil {
|
|
continue
|
|
}
|
|
|
|
doc, err := fetchDocument(ctx, httpClient, url)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if doc.Find("#wo_list").Length() == 0 {
|
|
continue
|
|
}
|
|
|
|
data[strconv.Itoa(id)] = parseRows(doc)
|
|
}
|
|
|
|
encoded, err := json.Marshal(outputPayload{Data: data})
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: failed to encode output: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
outputDirectory := filepath.Dir(*outputPath)
|
|
if err := os.MkdirAll(outputDirectory, 0o755); err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: failed to create data directory: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
if err := os.WriteFile(*outputPath, encoded, 0o644); err != nil {
|
|
fmt.Fprintf(os.Stderr, "error: failed to write output %q: %v\n", *outputPath, err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
fmt.Printf("wrote watch-order dataset for %d ids to %s\n", len(data), *outputPath)
|
|
}
|