refactor: clean kijiji scraper internals

This commit is contained in:
2026-04-30 20:48:15 -04:00
parent f95b974c7e
commit 31866de787

View File

@@ -11,6 +11,7 @@ import {
formatCookiesForHeader, formatCookiesForHeader,
loadCookiesOptional, loadCookiesOptional,
} from "../utils/cookies"; } from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format"; import { formatCentsToCurrency } from "../utils/format";
import { import {
fetchHtml, fetchHtml,
@@ -568,78 +569,6 @@ export function parseSearch(
return results; return results;
} }
/**
Parse a listing page into a typed object (backward compatible).
*/
function _parseListing(
htmlString: HTMLString,
BASE_URL: string,
): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
const listingKey = findApolloListingKey(
apolloState,
(value) => typeof value.url === "string" && typeof value.title === "string",
);
if (!listingKey) return null;
const root = apolloState[listingKey];
if (!isRecord(root)) return null;
const {
url,
title,
description,
price,
type,
status,
activationDate,
endDate,
metrics,
location,
} = root as ApolloListingRoot;
const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted =
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined;
const listingUrl =
typeof url === "string"
? url.startsWith("http")
? url
: `${BASE_URL}${url}`
: "";
if (!listingUrl || !title) return null;
return {
url: listingUrl,
title,
description,
listingPrice: amountFormatted
? {
amountFormatted,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
listingType: type,
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
/** /**
* Parse a listing page into a detailed object with all available fields * Parse a listing page into a detailed object with all available fields
*/ */
@@ -928,9 +857,7 @@ export default async function fetchKijijiItems(
const batchPromises = batch.map(async (link, batchIndex) => { const batchPromises = batch.map(async (link, batchIndex) => {
try { try {
if (batchIndex > 0) { if (batchIndex > 0) {
await new Promise((resolve) => await delay(DELAY_MS * batchIndex);
setTimeout(resolve, DELAY_MS * batchIndex),
);
} }
const html = await fetchHtml(link, 0, { const html = await fetchHtml(link, 0, {
@@ -952,11 +879,11 @@ export default async function fetchKijijiItems(
return parsed; return parsed;
} catch (err) { } catch (err) {
if (err instanceof HttpError) { if (err instanceof HttpError) {
console.error( logger.warn(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`, `\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
); );
} else { } else {
console.error( logger.warn(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`, `\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
); );
} }
@@ -974,7 +901,7 @@ export default async function fetchKijijiItems(
results.push(...batchResults); results.push(...batchResults);
if (i + CONCURRENT_REQUESTS < newListingLinks.length) { if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
await new Promise((resolve) => setTimeout(resolve, DELAY_MS)); await delay(DELAY_MS);
} }
} }