chore: merge code-smell-cleanup
This commit is contained in:
@@ -11,6 +11,7 @@ import {
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { solveEbayChallenge } from "../utils/ebay-challenge";
|
||||
import { fetchHtml, HttpError, RateLimitError } from "../utils/http";
|
||||
import { logger } from "../utils/logger";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
|
||||
@@ -326,17 +327,6 @@ function parseEbayPrice(
|
||||
return { cents, currency };
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
|
||||
/**
|
||||
@@ -953,9 +943,9 @@ export default async function fetchEbayItems(
|
||||
logger.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return finalizeResults(filteredListings);
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`Failed to fetch eBay search (${err.status}): ${err.message}`,
|
||||
if (err instanceof HttpError || err instanceof RateLimitError) {
|
||||
logger.warn(
|
||||
`Failed to fetch eBay search (${err instanceof HttpError ? err.statusCode : 429}): ${err.message}`,
|
||||
);
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
@@ -12,9 +12,8 @@ import {
|
||||
formatCookiesForHeader,
|
||||
parseCookieString,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import { isRecord } from "../utils/http";
|
||||
import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http";
|
||||
import { logger } from "../utils/logger";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
|
||||
@@ -219,17 +218,6 @@ export async function ensureFacebookCookies(): Promise<Cookie[]> {
|
||||
return ensureCookies(FACEBOOK_COOKIE_CONFIG);
|
||||
}
|
||||
|
||||
class HttpError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly status: number,
|
||||
public readonly url: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = "HttpError";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------- Extraction Metrics -----------------------------
|
||||
|
||||
/**
|
||||
@@ -274,112 +262,21 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
|
||||
|
||||
// ----------------------------- HTTP Client -----------------------------
|
||||
|
||||
/**
|
||||
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
|
||||
- Retries on 429 and 5xx
|
||||
- Respects X-RateLimit-Reset when present (seconds)
|
||||
- Supports custom cookies for Facebook authentication
|
||||
*/
|
||||
async function fetchHtml(
|
||||
url: string,
|
||||
DELAY_MS: number,
|
||||
opts?: {
|
||||
maxRetries?: number;
|
||||
retryBaseMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
cookies?: string;
|
||||
},
|
||||
): Promise<{ html: HTMLString; responseUrl: string }> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
||||
let lastRateLimitError: HttpError | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
const headers: Record<string, string> = {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"accept-encoding": "gzip, deflate, br",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-user": "?1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
};
|
||||
|
||||
// Add cookies if provided
|
||||
if (opts?.cookies) {
|
||||
headers.cookie = opts.cookies;
|
||||
}
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers,
|
||||
});
|
||||
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
||||
|
||||
if (!res.ok) {
|
||||
// Respect 429 reset if provided
|
||||
if (res.status === 429) {
|
||||
lastRateLimitError = new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: (attempt + 1) * retryBaseMs;
|
||||
if (attempt >= maxRetries) {
|
||||
throw lastRateLimitError;
|
||||
}
|
||||
await delay(waitMs);
|
||||
continue;
|
||||
}
|
||||
// For Facebook, 400 often means authentication required
|
||||
// Don't retry 4xx client errors except 429
|
||||
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
}
|
||||
// Retry on 5xx
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await delay((attempt + 1) * retryBaseMs);
|
||||
continue;
|
||||
}
|
||||
throw new HttpError(
|
||||
`Request failed with status ${res.status}`,
|
||||
res.status,
|
||||
url,
|
||||
);
|
||||
}
|
||||
|
||||
const html = await res.text();
|
||||
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
||||
await delay(DELAY_MS);
|
||||
return { html, responseUrl: res.url || url };
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
throw err;
|
||||
}
|
||||
if (attempt >= maxRetries) throw err;
|
||||
await delay((attempt + 1) * retryBaseMs);
|
||||
}
|
||||
}
|
||||
|
||||
throw lastRateLimitError ?? new Error("Exhausted retries without response");
|
||||
function createFacebookHeaders(cookies: string): Record<string, string> {
|
||||
return {
|
||||
accept:
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
||||
"cache-control": "no-cache",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-user": "?1",
|
||||
"user-agent":
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
cookie: cookies,
|
||||
};
|
||||
}
|
||||
|
||||
// ----------------------------- Parsing -----------------------------
|
||||
@@ -1157,6 +1054,8 @@ export default async function fetchFacebookItems(
|
||||
try {
|
||||
const response = await fetchHtml(searchUrl, DELAY_MS, {
|
||||
maxRetries: 3,
|
||||
includeResponseUrl: true,
|
||||
headers: createFacebookHeaders(cookiesHeader),
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
logger.log(
|
||||
@@ -1164,22 +1063,29 @@ export default async function fetchFacebookItems(
|
||||
);
|
||||
}
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
searchHtml = response.html;
|
||||
searchResponseUrl = response.responseUrl;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
logger.warn(
|
||||
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
|
||||
`\nFacebook marketplace access failed (${err.statusCode}): ${err.message}`,
|
||||
);
|
||||
if (err.status === 400 || err.status === 401 || err.status === 403) {
|
||||
if (
|
||||
err.statusCode === 400 ||
|
||||
err.statusCode === 401 ||
|
||||
err.statusCode === 403
|
||||
) {
|
||||
logger.warn(
|
||||
"This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
|
||||
);
|
||||
}
|
||||
return finalizeResults([]);
|
||||
}
|
||||
if (err instanceof RateLimitError) {
|
||||
logger.warn(`\nFacebook marketplace access rate limited: ${err.message}`);
|
||||
return finalizeResults([]);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
@@ -1261,6 +1167,8 @@ export async function fetchFacebookItem(
|
||||
let itemResponseUrl = itemUrl;
|
||||
try {
|
||||
const response = await fetchHtml(itemUrl, 1000, {
|
||||
includeResponseUrl: true,
|
||||
headers: createFacebookHeaders(cookiesHeader),
|
||||
onRateInfo: (remaining, reset) => {
|
||||
if (remaining && reset) {
|
||||
logger.log(
|
||||
@@ -1268,18 +1176,17 @@ export async function fetchFacebookItem(
|
||||
);
|
||||
}
|
||||
},
|
||||
cookies: cookiesHeader,
|
||||
});
|
||||
itemHtml = response.html;
|
||||
itemResponseUrl = response.responseUrl;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
logger.warn(
|
||||
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
|
||||
`\nFacebook marketplace item access failed (${err.statusCode}): ${err.message}`,
|
||||
);
|
||||
|
||||
// Enhanced error handling based on status codes
|
||||
switch (err.status) {
|
||||
switch (err.statusCode) {
|
||||
case 400:
|
||||
case 401:
|
||||
case 403:
|
||||
@@ -1305,10 +1212,19 @@ export async function fetchFacebookItem(
|
||||
);
|
||||
break;
|
||||
default:
|
||||
logger.warn(`Unexpected error status: ${err.status}`);
|
||||
logger.warn(`Unexpected error status: ${err.statusCode}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (err instanceof RateLimitError) {
|
||||
logger.warn(
|
||||
`\nFacebook marketplace item rate limited for item ${itemId}: ${err.message}`,
|
||||
);
|
||||
logger.warn(
|
||||
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
|
||||
);
|
||||
return null;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import {
|
||||
fetchHtml,
|
||||
@@ -568,78 +569,6 @@ export function parseSearch(
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse a listing page into a typed object (backward compatible).
|
||||
*/
|
||||
function _parseListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
): KijijiListingDetails | null {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
const listingKey = findApolloListingKey(
|
||||
apolloState,
|
||||
(value) => typeof value.url === "string" && typeof value.title === "string",
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
const root = apolloState[listingKey];
|
||||
if (!isRecord(root)) return null;
|
||||
|
||||
const {
|
||||
url,
|
||||
title,
|
||||
description,
|
||||
price,
|
||||
type,
|
||||
status,
|
||||
activationDate,
|
||||
endDate,
|
||||
metrics,
|
||||
location,
|
||||
} = root as ApolloListingRoot;
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
|
||||
const listingUrl =
|
||||
typeof url === "string"
|
||||
? url.startsWith("http")
|
||||
? url
|
||||
: `${BASE_URL}${url}`
|
||||
: "";
|
||||
|
||||
if (!listingUrl || !title) return null;
|
||||
|
||||
return {
|
||||
url: listingUrl,
|
||||
title,
|
||||
description,
|
||||
listingPrice: amountFormatted
|
||||
? {
|
||||
amountFormatted,
|
||||
cents:
|
||||
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
|
||||
currency: price?.currency,
|
||||
}
|
||||
: undefined,
|
||||
listingType: type,
|
||||
listingStatus: status,
|
||||
creationDate: activationDate,
|
||||
endDate,
|
||||
numberOfViews:
|
||||
numberOfViews !== undefined && Number.isFinite(numberOfViews)
|
||||
? numberOfViews
|
||||
: undefined,
|
||||
address: location?.address ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a listing page into a detailed object with all available fields
|
||||
*/
|
||||
@@ -938,9 +867,7 @@ export default async function fetchKijijiItems(
|
||||
const batchPromises = batch.map(async (link, batchIndex) => {
|
||||
try {
|
||||
if (batchIndex > 0) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, DELAY_MS * batchIndex),
|
||||
);
|
||||
await delay(DELAY_MS * batchIndex);
|
||||
}
|
||||
|
||||
const html = await fetchHtml(link, 0, {
|
||||
@@ -962,11 +889,11 @@ export default async function fetchKijijiItems(
|
||||
return parsed;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
logger.warn(
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
logger.warn(
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
@@ -984,7 +911,7 @@ export default async function fetchKijijiItems(
|
||||
results.push(...batchResults);
|
||||
|
||||
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
||||
await new Promise((resolve) => setTimeout(resolve, DELAY_MS));
|
||||
await delay(DELAY_MS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import type { HTMLString } from "../types/common";
|
||||
import { delay } from "./delay";
|
||||
|
||||
/** Custom error class for HTTP-related failures */
|
||||
@@ -60,10 +61,57 @@ export function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
/**
|
||||
* Calculate exponential backoff delay with jitter
|
||||
*/
|
||||
function calculateBackoffDelay(attempt: number, baseMs: number): number {
|
||||
function calculateBackoffDelay(
|
||||
attempt: number,
|
||||
baseMs: number,
|
||||
jitter: () => number = Math.random,
|
||||
): number {
|
||||
const exponentialDelay = baseMs * 2 ** attempt;
|
||||
const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter
|
||||
return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds
|
||||
const jitterDelay = jitter() * 0.1 * exponentialDelay; // 10% jitter
|
||||
return Math.min(exponentialDelay + jitterDelay, 30000); // Cap at 30 seconds
|
||||
}
|
||||
|
||||
const MAX_RATE_LIMIT_WAIT_MS = 30_000;
|
||||
const MAX_DELTA_RESET_SECONDS = 86_400;
|
||||
|
||||
function mergeHeaders(
|
||||
defaultHeaders: Record<string, string>,
|
||||
customHeaders?: Record<string, string>,
|
||||
): Record<string, string> {
|
||||
const merged: Record<string, string> = {};
|
||||
|
||||
for (const [key, value] of Object.entries(defaultHeaders)) {
|
||||
merged[key.toLowerCase()] = value;
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(customHeaders ?? {})) {
|
||||
merged[key.toLowerCase()] = value;
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
function calculateRateLimitWaitMs(
|
||||
resetHeader: string | null,
|
||||
fallbackWaitMs: number,
|
||||
): number {
|
||||
if (!resetHeader) return fallbackWaitMs;
|
||||
|
||||
const resetValue = Number(resetHeader);
|
||||
if (!Number.isFinite(resetValue)) return fallbackWaitMs;
|
||||
|
||||
const waitMs =
|
||||
resetValue <= MAX_DELTA_RESET_SECONDS
|
||||
? resetValue * 1000
|
||||
: resetValue * 1000 - Date.now();
|
||||
|
||||
return Math.min(Math.max(0, waitMs), MAX_RATE_LIMIT_WAIT_MS);
|
||||
}
|
||||
|
||||
/** Result type when includeResponseUrl is true */
|
||||
export interface FetchHtmlResult {
|
||||
html: HTMLString;
|
||||
responseUrl: string;
|
||||
}
|
||||
|
||||
/** Options for fetchHtml */
|
||||
@@ -73,6 +121,8 @@ export interface FetchHtmlOptions {
|
||||
timeoutMs?: number;
|
||||
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
||||
headers?: Record<string, string>;
|
||||
includeResponseUrl?: boolean;
|
||||
jitter?: () => number;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -80,14 +130,24 @@ export interface FetchHtmlOptions {
|
||||
* @param url - The URL to fetch
|
||||
* @param delayMs - Delay in milliseconds between requests (rate limiting)
|
||||
* @param opts - Optional fetch options
|
||||
* @returns The HTML content as a string
|
||||
* @returns The HTML content as a string, or an object with html and responseUrl
|
||||
* @throws HttpError, NetworkError, or RateLimitError on failure
|
||||
*/
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts: FetchHtmlOptions & { includeResponseUrl: true },
|
||||
): Promise<FetchHtmlResult>;
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: FetchHtmlOptions,
|
||||
): Promise<string> {
|
||||
): Promise<HTMLString>;
|
||||
export async function fetchHtml(
|
||||
url: string,
|
||||
delayMs: number,
|
||||
opts?: FetchHtmlOptions,
|
||||
): Promise<HTMLString | FetchHtmlResult> {
|
||||
const maxRetries = opts?.maxRetries ?? 3;
|
||||
const retryBaseMs = opts?.retryBaseMs ?? 1000;
|
||||
const timeoutMs = opts?.timeoutMs ?? 30000;
|
||||
@@ -118,13 +178,17 @@ export async function fetchHtml(
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: "GET",
|
||||
headers: { ...defaultHeaders, ...opts?.headers },
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
const res = await (async () => {
|
||||
try {
|
||||
return await fetch(url, {
|
||||
method: "GET",
|
||||
headers: mergeHeaders(defaultHeaders, opts?.headers),
|
||||
signal: controller.signal,
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timeoutId);
|
||||
}
|
||||
})();
|
||||
|
||||
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
||||
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
||||
@@ -136,12 +200,17 @@ export async function fetchHtml(
|
||||
const resetSeconds = rateLimitReset
|
||||
? Number(rateLimitReset)
|
||||
: Number.NaN;
|
||||
const waitMs = Number.isFinite(resetSeconds)
|
||||
? Math.max(0, resetSeconds * 1000)
|
||||
: calculateBackoffDelay(attempt, retryBaseMs);
|
||||
const waitMs = calculateRateLimitWaitMs(
|
||||
rateLimitReset,
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) => setTimeout(resolve, waitMs));
|
||||
await delay(waitMs);
|
||||
continue;
|
||||
}
|
||||
throw new RateLimitError(
|
||||
@@ -153,8 +222,12 @@ export async function fetchHtml(
|
||||
|
||||
// Retry on server errors
|
||||
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
await delay(
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -170,7 +243,9 @@ export async function fetchHtml(
|
||||
|
||||
// Respect per-request delay to maintain rate limiting
|
||||
await delay(delayMs);
|
||||
return html;
|
||||
return opts?.includeResponseUrl
|
||||
? { html, responseUrl: res.url || url }
|
||||
: html;
|
||||
} catch (err) {
|
||||
// Re-throw known errors
|
||||
if (
|
||||
@@ -183,8 +258,12 @@ export async function fetchHtml(
|
||||
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
await delay(
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
@@ -193,8 +272,12 @@ export async function fetchHtml(
|
||||
|
||||
// Network or other errors
|
||||
if (attempt < maxRetries) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, calculateBackoffDelay(attempt, retryBaseMs)),
|
||||
await delay(
|
||||
calculateBackoffDelay(
|
||||
attempt,
|
||||
retryBaseMs,
|
||||
opts?.jitter ?? Math.random,
|
||||
),
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -33,6 +33,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () => Promise.resolve("<html><body></body></html>"),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
@@ -70,6 +71,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -90,10 +92,26 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("returns empty results when eBay rate-limits the request", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: false,
|
||||
status: 429,
|
||||
headers: { get: () => "0" },
|
||||
text: () => Promise.resolve(""),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
test("deduplicates repeated item links from the same card", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -120,6 +138,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -152,6 +171,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -194,6 +214,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -295,6 +316,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -324,6 +346,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -353,6 +376,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -382,6 +406,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -424,6 +449,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -456,6 +482,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -488,6 +515,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -521,6 +549,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -548,6 +577,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -580,6 +610,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -610,6 +641,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -655,6 +687,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
@@ -693,6 +726,7 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
headers: { get: () => null },
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
|
||||
@@ -38,4 +38,87 @@ describe("fetchHtml", () => {
|
||||
|
||||
expect(scheduledDelays).not.toContain(1000);
|
||||
});
|
||||
|
||||
test("fetchHtml returns responseUrl when includeResponseUrl is true", async () => {
|
||||
process.env.NODE_ENV = "test";
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
status: 200,
|
||||
url: "https://example.test/final",
|
||||
headers: { get: () => null },
|
||||
text: () => Promise.resolve("<html></html>"),
|
||||
}),
|
||||
) as unknown as typeof fetch;
|
||||
|
||||
const result = await fetchHtml("https://example.test", 0, {
|
||||
includeResponseUrl: true,
|
||||
});
|
||||
expect(result.html).toBe("<html></html>");
|
||||
expect(result.responseUrl).toBe("https://example.test/final");
|
||||
});
|
||||
|
||||
test("rate limit epoch reset uses bounded wait", async () => {
|
||||
process.env.NODE_ENV = "production";
|
||||
const scheduledDelays: number[] = [];
|
||||
const farFutureEpochSeconds = Math.floor(Date.now() / 1000) + 315_360_000;
|
||||
let calls = 0;
|
||||
|
||||
global.fetch = mock(() => {
|
||||
calls += 1;
|
||||
return Promise.resolve({
|
||||
ok: calls > 1,
|
||||
status: calls > 1 ? 200 : 429,
|
||||
url: "https://example.test",
|
||||
headers: {
|
||||
get: (name: string) =>
|
||||
name === "X-RateLimit-Reset" ? String(farFutureEpochSeconds) : null,
|
||||
},
|
||||
text: () => Promise.resolve("<html></html>"),
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
globalThis.setTimeout = mock((handler: TimerHandler, timeout?: number) => {
|
||||
scheduledDelays.push(Number(timeout));
|
||||
if (timeout !== 1_234_567 && typeof handler === "function") {
|
||||
handler();
|
||||
}
|
||||
return 0 as unknown as ReturnType<typeof setTimeout>;
|
||||
}) as unknown as typeof setTimeout;
|
||||
globalThis.clearTimeout = mock(() => {}) as unknown as typeof clearTimeout;
|
||||
|
||||
await fetchHtml("https://example.test", 0, {
|
||||
maxRetries: 1,
|
||||
timeoutMs: 1_234_567,
|
||||
});
|
||||
|
||||
expect(scheduledDelays).toContain(30_000);
|
||||
expect(scheduledDelays).not.toContain(farFutureEpochSeconds * 1000);
|
||||
});
|
||||
|
||||
test("custom Accept header overrides default accept without duplicate casing", async () => {
|
||||
process.env.NODE_ENV = "test";
|
||||
const customAccept = "text/plain";
|
||||
let requestHeaders: HeadersInit | undefined;
|
||||
|
||||
global.fetch = mock((_url: string | URL | Request, init?: RequestInit) => {
|
||||
requestHeaders = init?.headers;
|
||||
return Promise.resolve({
|
||||
ok: true,
|
||||
status: 200,
|
||||
url: "https://example.test",
|
||||
headers: { get: () => null },
|
||||
text: () => Promise.resolve("<html></html>"),
|
||||
});
|
||||
}) as unknown as typeof fetch;
|
||||
|
||||
await fetchHtml("https://example.test", 0, {
|
||||
headers: { Accept: customAccept },
|
||||
});
|
||||
|
||||
expect(requestHeaders).toBeDefined();
|
||||
expect((requestHeaders as Record<string, string>).accept).toBe(
|
||||
customAccept,
|
||||
);
|
||||
expect((requestHeaders as Record<string, string>).Accept).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,11 +1,6 @@
|
||||
// Test setup for Bun test runner
|
||||
// This file is loaded before any tests run due to bunfig.toml preload
|
||||
|
||||
// Mock fetch globally for tests
|
||||
global.fetch =
|
||||
global.fetch ||
|
||||
(() => {
|
||||
throw new Error("fetch is not available in test environment");
|
||||
});
|
||||
|
||||
// Add any global test utilities here
|
||||
global.fetch = Object.assign(
|
||||
() => {
|
||||
throw new Error("Tests must mock fetch explicitly");
|
||||
},
|
||||
{ preconnect: fetch.preconnect },
|
||||
) as typeof fetch;
|
||||
|
||||
Reference in New Issue
Block a user