diff --git a/integrations/animeschedule/animeschedule.go b/integrations/animeschedule/animeschedule.go index 6d8b2e4..9febb58 100644 --- a/integrations/animeschedule/animeschedule.go +++ b/integrations/animeschedule/animeschedule.go @@ -278,36 +278,16 @@ func addCommonHeaders(request *http.Request) { } func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*goquery.Document, string, error) { - client := httpClient - if client == nil { - client = http.DefaultClient - } - - request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err != nil { - return nil, url, fmt.Errorf("failed to create request: %w", err) - } - addCommonHeaders(request) - - response, err := client.Do(request) - if err != nil { - return nil, url, fmt.Errorf("request failed: %w", err) - } - defer func() { _ = response.Body.Close() }() - - if response.StatusCode != http.StatusOK { - body, _ := io.ReadAll(io.LimitReader(response.Body, netutil.Bytes512)) - return nil, url, &HTTPStatusError{ + document, response, err := netutil.FetchHTMLDocument(ctx, httpClient, url, addCommonHeaders, func(response *http.Response, body []byte) error { + return &HTTPStatusError{ StatusCode: response.StatusCode, URL: url, ContentType: strings.TrimSpace(response.Header.Get("Content-Type")), BodyPreview: strings.Join(strings.Fields(strings.TrimSpace(string(body))), " "), } - } - - document, err := goquery.NewDocumentFromReader(response.Body) + }) if err != nil { - return nil, url, fmt.Errorf("failed to parse html: %w", err) + return nil, url, err } return document, response.Request.URL.String(), nil diff --git a/integrations/watchorder/watch_order.go b/integrations/watchorder/watch_order.go index dbb6d57..7671b09 100644 --- a/integrations/watchorder/watch_order.go +++ b/integrations/watchorder/watch_order.go @@ -86,28 +86,8 @@ func addCommonHeaders(request *http.Request) { } func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*goquery.Document, error) { - client := httpClient - if client == nil { - client = http.DefaultClient - } - - request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err != nil { - return nil, fmt.Errorf("failed to create request: %w", err) - } - - addCommonHeaders(request) - - response, err := client.Do(request) - if err != nil { - return nil, fmt.Errorf("request failed: %w", err) - } - defer func() { _ = response.Body.Close() }() - - if response.StatusCode != http.StatusOK { - // limit body read for error context; avoid reading large error pages - body, _ := io.ReadAll(io.LimitReader(response.Body, netutil.Bytes512)) - return nil, &HTTPStatusError{ + document, _, err := netutil.FetchHTMLDocument(ctx, httpClient, url, addCommonHeaders, func(response *http.Response, body []byte) error { + return &HTTPStatusError{ StatusCode: response.StatusCode, URL: url, Server: strings.TrimSpace(response.Header.Get("Server")), @@ -116,14 +96,8 @@ func fetchDocument(ctx context.Context, httpClient *http.Client, url string) (*g ContentType: strings.TrimSpace(response.Header.Get("Content-Type")), BodyPreview: strings.Join(strings.Fields(strings.TrimSpace(string(body))), " "), } - } - - document, err := goquery.NewDocumentFromReader(response.Body) - if err != nil { - return nil, fmt.Errorf("failed to parse html: %w", err) - } - - return document, nil + }) + return document, err } func extractTypeLabelsByID(doc *goquery.Document) map[int]string { diff --git a/pkg/net/document.go b/pkg/net/document.go new file mode 100644 index 0000000..02eebff --- /dev/null +++ b/pkg/net/document.go @@ -0,0 +1,49 @@ +package netutil + +import ( + "context" + "fmt" + "io" + "net/http" + + "github.com/PuerkitoBio/goquery" +) + +func FetchHTMLDocument( + ctx context.Context, + httpClient *http.Client, + url string, + prepareRequest func(*http.Request), + buildStatusError func(*http.Response, []byte) error, +) (*goquery.Document, *http.Response, error) { + client := httpClient + if client == nil { + client = http.DefaultClient + } + + request, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, nil, fmt.Errorf("failed to create request: %w", err) + } + if prepareRequest != nil { + prepareRequest(request) + } + + response, err := client.Do(request) + if err != nil { + return nil, nil, fmt.Errorf("request failed: %w", err) + } + defer func() { _ = response.Body.Close() }() + + if response.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(response.Body, Bytes512)) + return nil, response, buildStatusError(response, body) + } + + document, err := goquery.NewDocumentFromReader(response.Body) + if err != nil { + return nil, response, fmt.Errorf("failed to parse html: %w", err) + } + + return document, response, nil +}