From 46a8ac92cf003408d1f268dd3d98bfce3863bd2b Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Wed, 17 Sep 2025 22:03:24 -0400 Subject: [PATCH] feat: extract Kijiji scraping logic into reusable function This commit extracts the Kijiji scraping functionality into a reusable function `fetchKijijiItems`. This allows for easier integration into other parts of the application and improves code modularity. The function accepts search query, requests per second, and base URL as parameters, enabling customizable scraping. --- src/index.ts | 6 ++++++ src/kijiji.ts | 43 ++++++++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/index.ts b/src/index.ts index e69de29..60e340d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -0,0 +1,6 @@ +import fetchKijijiItems from "./kijiji"; + +const SEARCH_QUERY = "playstation 5"; +const items = await fetchKijijiItems(SEARCH_QUERY); + +console.log(items); diff --git a/src/kijiji.ts b/src/kijiji.ts index 6c29d97..bf5629b 100644 --- a/src/kijiji.ts +++ b/src/kijiji.ts @@ -51,11 +51,6 @@ type ListingDetails = { // ----------------------------- Config ----------------------------- -const REQUESTS_PER_SECOND = 1; -const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); -const BASE_URL = "https://www.kijiji.ca"; -const SEARCH_QUERY = "playstation 5"; - // ----------------------------- Utilities ----------------------------- /** @@ -105,6 +100,7 @@ class HttpError extends Error { */ async function fetchHtml( url: string, + DELAY_MS: number, opts?: { maxRetries?: number; retryBaseMs?: number; @@ -191,7 +187,10 @@ function extractApolloState(htmlString: HTMLString): ApolloRecord | null { Parse search page apollo state into SearchListing[]. Filters keys likely to be listing entities and ensures url/title exist. */ -function parseSearch(htmlString: HTMLString): SearchListing[] { +function parseSearch( + htmlString: HTMLString, + BASE_URL: string, +): SearchListing[] { const apolloState = extractApolloState(htmlString); if (!apolloState) return []; @@ -217,7 +216,10 @@ function parseSearch(htmlString: HTMLString): SearchListing[] { /** Parse a listing page into a typed object. */ -function parseListing(htmlString: HTMLString): ListingDetails | null { +function parseListing( + htmlString: HTMLString, + BASE_URL: string, +): ListingDetails | null { const apolloState = extractApolloState(htmlString); if (!apolloState) return null; @@ -280,11 +282,17 @@ function parseListing(htmlString: HTMLString): ListingDetails | null { // ----------------------------- Main ----------------------------- -async function main() { +export default async function fetchKijijiItems( + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + BASE_URL = "https://www.kijiji.ca", +) { + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const searchUrl = `${BASE_URL}/b-canada/${encodeURIComponent(SEARCH_QUERY)}/k0l0?dc=true&view=list`; console.log(`Fetching search: ${searchUrl}`); - const searchHtml = await fetchHtml(searchUrl, { + const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { onRateInfo: (remaining, reset) => { if (remaining && reset) { console.log( @@ -294,7 +302,7 @@ async function main() { }, }); - const searchResults = parseSearch(searchHtml); + const searchResults = parseSearch(searchHtml, BASE_URL); if (searchResults.length === 0) { console.warn("No search results parsed from page."); return; @@ -312,7 +320,7 @@ async function main() { const items: ListingDetails[] = []; for (const link of listingLinks) { try { - const html = await fetchHtml(link, { + const html = await fetchHtml(link, DELAY_MS, { onRateInfo: (remaining, reset) => { if (remaining && reset) { console.log( @@ -321,7 +329,7 @@ async function main() { } }, }); - const parsed = parseListing(html); + const parsed = parseListing(html, BASE_URL); if (parsed) items.push(parsed); } catch (err) { if (err instanceof HttpError) { @@ -335,10 +343,11 @@ async function main() { } console.log(`Parsed ${items.length} listings.`); - console.log(items); + return items; + // console.log(items); } -void main().catch((err) => { - console.error("Fatal error:", err); - process.exitCode = 1; -}); +// void main().catch((err) => { +// console.error("Fatal error:", err); +// process.exitCode = 1; +// });