chore: merge code-smell-cleanup

This commit is contained in:
2026-04-30 21:08:34 -04:00
12 changed files with 779 additions and 366 deletions

View File

@@ -11,6 +11,7 @@ import {
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { solveEbayChallenge } from "../utils/ebay-challenge";
import { fetchHtml, HttpError, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
@@ -326,17 +327,6 @@ function parseEbayPrice(
return { cents, currency };
}
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Parsing -----------------------------
/**
@@ -953,9 +943,9 @@ export default async function fetchEbayItems(
logger.log(`Parsed ${filteredListings.length} eBay listings.`);
return finalizeResults(filteredListings);
} catch (err) {
if (err instanceof HttpError) {
console.error(
`Failed to fetch eBay search (${err.status}): ${err.message}`,
if (err instanceof HttpError || err instanceof RateLimitError) {
logger.warn(
`Failed to fetch eBay search (${err instanceof HttpError ? err.statusCode : 429}): ${err.message}`,
);
return finalizeResults([]);
}

View File

@@ -12,9 +12,8 @@ import {
formatCookiesForHeader,
parseCookieString,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http";
import { fetchHtml, HttpError, isRecord, RateLimitError } from "../utils/http";
import { logger } from "../utils/logger";
import { classifyUnstableListings } from "../utils/unstable";
@@ -219,17 +218,6 @@ export async function ensureFacebookCookies(): Promise<Cookie[]> {
return ensureCookies(FACEBOOK_COOKIE_CONFIG);
}
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- Extraction Metrics -----------------------------
/**
@@ -274,112 +262,21 @@ function logExtractionMetrics(success: boolean, itemId?: string) {
// ----------------------------- HTTP Client -----------------------------
/**
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
- Retries on 429 and 5xx
- Respects X-RateLimit-Reset when present (seconds)
- Supports custom cookies for Facebook authentication
*/
async function fetchHtml(
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
cookies?: string;
},
): Promise<{ html: HTMLString; responseUrl: string }> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
let lastRateLimitError: HttpError | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const headers: Record<string, string> = {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"accept-encoding": "gzip, deflate, br",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
};
// Add cookies if provided
if (opts?.cookies) {
headers.cookie = opts.cookies;
}
const res = await fetch(url, {
method: "GET",
headers,
});
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
lastRateLimitError = new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
const resetSeconds = rateLimitReset
? Number(rateLimitReset)
: Number.NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
if (attempt >= maxRetries) {
throw lastRateLimitError;
}
await delay(waitMs);
continue;
}
// For Facebook, 400 often means authentication required
// Don't retry 4xx client errors except 429
if (res.status >= 400 && res.status < 500 && res.status !== 429) {
throw new HttpError(
`Request failed with status ${res.status} (Facebook may require authentication cookies for access)`,
res.status,
url,
);
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return { html, responseUrl: res.url || url };
} catch (err) {
if (err instanceof HttpError) {
throw err;
}
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
}
}
throw lastRateLimitError ?? new Error("Exhausted retries without response");
function createFacebookHeaders(cookies: string): Record<string, string> {
return {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
cookie: cookies,
};
}
// ----------------------------- Parsing -----------------------------
@@ -1157,6 +1054,8 @@ export default async function fetchFacebookItems(
try {
const response = await fetchHtml(searchUrl, DELAY_MS, {
maxRetries: 3,
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
logger.log(
@@ -1164,22 +1063,29 @@ export default async function fetchFacebookItems(
);
}
},
cookies: cookiesHeader,
});
searchHtml = response.html;
searchResponseUrl = response.responseUrl;
} catch (err) {
if (err instanceof HttpError) {
logger.warn(
`\nFacebook marketplace access failed (${err.status}): ${err.message}`,
`\nFacebook marketplace access failed (${err.statusCode}): ${err.message}`,
);
if (err.status === 400 || err.status === 401 || err.status === 403) {
if (
err.statusCode === 400 ||
err.statusCode === 401 ||
err.statusCode === 403
) {
logger.warn(
"This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
);
}
return finalizeResults([]);
}
if (err instanceof RateLimitError) {
logger.warn(`\nFacebook marketplace access rate limited: ${err.message}`);
return finalizeResults([]);
}
throw err;
}
@@ -1261,6 +1167,8 @@ export async function fetchFacebookItem(
let itemResponseUrl = itemUrl;
try {
const response = await fetchHtml(itemUrl, 1000, {
includeResponseUrl: true,
headers: createFacebookHeaders(cookiesHeader),
onRateInfo: (remaining, reset) => {
if (remaining && reset) {
logger.log(
@@ -1268,18 +1176,17 @@ export async function fetchFacebookItem(
);
}
},
cookies: cookiesHeader,
});
itemHtml = response.html;
itemResponseUrl = response.responseUrl;
} catch (err) {
if (err instanceof HttpError) {
logger.warn(
`\nFacebook marketplace item access failed (${err.status}): ${err.message}`,
`\nFacebook marketplace item access failed (${err.statusCode}): ${err.message}`,
);
// Enhanced error handling based on status codes
switch (err.status) {
switch (err.statusCode) {
case 400:
case 401:
case 403:
@@ -1305,10 +1212,19 @@ export async function fetchFacebookItem(
);
break;
default:
logger.warn(`Unexpected error status: ${err.status}`);
logger.warn(`Unexpected error status: ${err.statusCode}`);
}
return null;
}
if (err instanceof RateLimitError) {
logger.warn(
`\nFacebook marketplace item rate limited for item ${itemId}: ${err.message}`,
);
logger.warn(
"Rate limited: Too many requests. Facebook is blocking access temporarily.",
);
return null;
}
throw err;
}

View File

@@ -11,6 +11,7 @@ import {
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import {
fetchHtml,
@@ -568,78 +569,6 @@ export function parseSearch(
return results;
}
/**
Parse a listing page into a typed object (backward compatible).
*/
function _parseListing(
htmlString: HTMLString,
BASE_URL: string,
): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
const listingKey = findApolloListingKey(
apolloState,
(value) => typeof value.url === "string" && typeof value.title === "string",
);
if (!listingKey) return null;
const root = apolloState[listingKey];
if (!isRecord(root)) return null;
const {
url,
title,
description,
price,
type,
status,
activationDate,
endDate,
metrics,
location,
} = root as ApolloListingRoot;
const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted =
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined;
const listingUrl =
typeof url === "string"
? url.startsWith("http")
? url
: `${BASE_URL}${url}`
: "";
if (!listingUrl || !title) return null;
return {
url: listingUrl,
title,
description,
listingPrice: amountFormatted
? {
amountFormatted,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
listingType: type,
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
/**
* Parse a listing page into a detailed object with all available fields
*/
@@ -938,9 +867,7 @@ export default async function fetchKijijiItems(
const batchPromises = batch.map(async (link, batchIndex) => {
try {
if (batchIndex > 0) {
await new Promise((resolve) =>
setTimeout(resolve, DELAY_MS * batchIndex),
);
await delay(DELAY_MS * batchIndex);
}
const html = await fetchHtml(link, 0, {
@@ -962,11 +889,11 @@ export default async function fetchKijijiItems(
return parsed;
} catch (err) {
if (err instanceof HttpError) {
console.error(
logger.warn(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
);
} else {
console.error(
logger.warn(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
);
}
@@ -984,7 +911,7 @@ export default async function fetchKijijiItems(
results.push(...batchResults);
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
await new Promise((resolve) => setTimeout(resolve, DELAY_MS));
await delay(DELAY_MS);
}
}