refactor: clean kijiji scraper internals

This commit is contained in:
2026-04-30 20:48:15 -04:00
parent f95b974c7e
commit 31866de787

View File

@@ -11,6 +11,7 @@ import {
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import {
fetchHtml,
@@ -568,78 +569,6 @@ export function parseSearch(
return results;
}
/**
Parse a listing page into a typed object (backward compatible).
*/
function _parseListing(
htmlString: HTMLString,
BASE_URL: string,
): KijijiListingDetails | null {
const apolloState = extractApolloState(htmlString);
if (!apolloState) return null;
const listingKey = findApolloListingKey(
apolloState,
(value) => typeof value.url === "string" && typeof value.title === "string",
);
if (!listingKey) return null;
const root = apolloState[listingKey];
if (!isRecord(root)) return null;
const {
url,
title,
description,
price,
type,
status,
activationDate,
endDate,
metrics,
location,
} = root as ApolloListingRoot;
const cents = price?.amount != null ? Number(price.amount) : undefined;
const amountFormatted =
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
const numberOfViews =
metrics?.views != null ? Number(metrics.views) : undefined;
const listingUrl =
typeof url === "string"
? url.startsWith("http")
? url
: `${BASE_URL}${url}`
: "";
if (!listingUrl || !title) return null;
return {
url: listingUrl,
title,
description,
listingPrice: amountFormatted
? {
amountFormatted,
cents:
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
currency: price?.currency,
}
: undefined,
listingType: type,
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews:
numberOfViews !== undefined && Number.isFinite(numberOfViews)
? numberOfViews
: undefined,
address: location?.address ?? null,
};
}
/**
* Parse a listing page into a detailed object with all available fields
*/
@@ -928,9 +857,7 @@ export default async function fetchKijijiItems(
const batchPromises = batch.map(async (link, batchIndex) => {
try {
if (batchIndex > 0) {
await new Promise((resolve) =>
setTimeout(resolve, DELAY_MS * batchIndex),
);
await delay(DELAY_MS * batchIndex);
}
const html = await fetchHtml(link, 0, {
@@ -952,11 +879,11 @@ export default async function fetchKijijiItems(
return parsed;
} catch (err) {
if (err instanceof HttpError) {
console.error(
logger.warn(
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
);
} else {
console.error(
logger.warn(
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
);
}
@@ -974,7 +901,7 @@ export default async function fetchKijijiItems(
results.push(...batchResults);
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
await new Promise((resolve) => setTimeout(resolve, DELAY_MS));
await delay(DELAY_MS);
}
}