refactor: clean kijiji scraper internals
This commit is contained in:
@@ -11,6 +11,7 @@ import {
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
} from "../utils/cookies";
|
||||
import { delay } from "../utils/delay";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import {
|
||||
fetchHtml,
|
||||
@@ -568,78 +569,6 @@ export function parseSearch(
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
Parse a listing page into a typed object (backward compatible).
|
||||
*/
|
||||
function _parseListing(
|
||||
htmlString: HTMLString,
|
||||
BASE_URL: string,
|
||||
): KijijiListingDetails | null {
|
||||
const apolloState = extractApolloState(htmlString);
|
||||
if (!apolloState) return null;
|
||||
|
||||
const listingKey = findApolloListingKey(
|
||||
apolloState,
|
||||
(value) => typeof value.url === "string" && typeof value.title === "string",
|
||||
);
|
||||
if (!listingKey) return null;
|
||||
|
||||
const root = apolloState[listingKey];
|
||||
if (!isRecord(root)) return null;
|
||||
|
||||
const {
|
||||
url,
|
||||
title,
|
||||
description,
|
||||
price,
|
||||
type,
|
||||
status,
|
||||
activationDate,
|
||||
endDate,
|
||||
metrics,
|
||||
location,
|
||||
} = root as ApolloListingRoot;
|
||||
|
||||
const cents = price?.amount != null ? Number(price.amount) : undefined;
|
||||
const amountFormatted =
|
||||
cents != null ? formatCentsToCurrency(cents, "en-CA") : undefined;
|
||||
|
||||
const numberOfViews =
|
||||
metrics?.views != null ? Number(metrics.views) : undefined;
|
||||
|
||||
const listingUrl =
|
||||
typeof url === "string"
|
||||
? url.startsWith("http")
|
||||
? url
|
||||
: `${BASE_URL}${url}`
|
||||
: "";
|
||||
|
||||
if (!listingUrl || !title) return null;
|
||||
|
||||
return {
|
||||
url: listingUrl,
|
||||
title,
|
||||
description,
|
||||
listingPrice: amountFormatted
|
||||
? {
|
||||
amountFormatted,
|
||||
cents:
|
||||
cents !== undefined && Number.isFinite(cents) ? cents : undefined,
|
||||
currency: price?.currency,
|
||||
}
|
||||
: undefined,
|
||||
listingType: type,
|
||||
listingStatus: status,
|
||||
creationDate: activationDate,
|
||||
endDate,
|
||||
numberOfViews:
|
||||
numberOfViews !== undefined && Number.isFinite(numberOfViews)
|
||||
? numberOfViews
|
||||
: undefined,
|
||||
address: location?.address ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a listing page into a detailed object with all available fields
|
||||
*/
|
||||
@@ -928,9 +857,7 @@ export default async function fetchKijijiItems(
|
||||
const batchPromises = batch.map(async (link, batchIndex) => {
|
||||
try {
|
||||
if (batchIndex > 0) {
|
||||
await new Promise((resolve) =>
|
||||
setTimeout(resolve, DELAY_MS * batchIndex),
|
||||
);
|
||||
await delay(DELAY_MS * batchIndex);
|
||||
}
|
||||
|
||||
const html = await fetchHtml(link, 0, {
|
||||
@@ -952,11 +879,11 @@ export default async function fetchKijijiItems(
|
||||
return parsed;
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
logger.warn(
|
||||
`\nFailed to fetch ${link}\n - ${err.statusCode} ${err.message}`,
|
||||
);
|
||||
} else {
|
||||
console.error(
|
||||
logger.warn(
|
||||
`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`,
|
||||
);
|
||||
}
|
||||
@@ -974,7 +901,7 @@ export default async function fetchKijijiItems(
|
||||
results.push(...batchResults);
|
||||
|
||||
if (i + CONCURRENT_REQUESTS < newListingLinks.length) {
|
||||
await new Promise((resolve) => setTimeout(resolve, DELAY_MS));
|
||||
await delay(DELAY_MS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user