From 23dc9cae4c9ed5649b5432e250887cfb62befe1f Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Wed, 17 Sep 2025 20:43:07 -0400 Subject: [PATCH] feat: scrape listing details from Kijiji ads --- src/kijiji.ts | 171 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 154 insertions(+), 17 deletions(-) diff --git a/src/kijiji.ts b/src/kijiji.ts index 508b267..d566ec3 100644 --- a/src/kijiji.ts +++ b/src/kijiji.ts @@ -13,12 +13,31 @@ interface ApolloSearchState { }; } +interface ApolloListingState { + [key: string]: any; +} + +function formatCentsToCurrency(num: number | string, locale = "en-US") { + if (typeof num === "string") num = parseInt(num); + const numberInDollars = num / 100; + + const formatter = new Intl.NumberFormat(locale, { + minimumFractionDigits: 2, + maximumFractionDigits: 2, + useGrouping: true, + }); + + return formatter.format(numberInDollars); +} + const searchQuery = "playstation 5"; +const REQUESTS_PER_SECOND = 1; +const DELAY_MS = 1000 / REQUESTS_PER_SECOND; // const exampleSearchHTML = Bun.file("./example-kijiji-search.html"); // const exampleSearchHTMLData = await exampleSearchHTML.text(); -function extractSearchListingsFromNextData(htmlString: string) { +function parseSearch(htmlString: string) { const { document } = parseHTML(htmlString); const nextData = document.getElementById("__NEXT_DATA__"); @@ -44,18 +63,15 @@ function extractSearchListingsFromNextData(htmlString: string) { } } - const searchListings: (SearchListing | undefined)[] = listingsKeys.map( - (key) => { - const listing = apolloState[key]; - if (!listing) return undefined; - return { - listingLink: listing.url, - name: listing.title, - }; - }, - ); + const searchListings: SearchListing[] = listingsKeys.map((key) => { + const listing = apolloState[key]; + return { + listingLink: listing!.url, + name: listing!.title, + }; + }); - console.log(searchListings); + // console.log(searchListings); return searchListings; } catch (error) { @@ -64,8 +80,9 @@ function extractSearchListingsFromNextData(htmlString: string) { } } -const makeKijijiRequest = async (url: string): Promise => { - const request = await fetch(url, { +const makeKijijiRequest = async (url: string): Promise => { + console.log(`Making a request at ${new Date()}`); + const response = await fetch(url, { headers: { accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", @@ -85,9 +102,129 @@ const makeKijijiRequest = async (url: string): Promise => { method: "GET", }); - return await request.text(); + const rateLimitRemaining = response.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = response.headers.get("X-RateLimit-Reset"); + + if (rateLimitRemaining !== null && rateLimitReset !== null) { + console.log( + `Rate limit remaining: ${rateLimitRemaining}, Reset in: ${rateLimitReset} seconds`, + ); + } + + const data: T = (await response.text()) as T; + return data; }; -// https://www.kijiji.ca/b-canada/${searchQuery}/k0l0?dc=true&view=list +async function delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} -// extractSearchListingsFromNextData(await kijijiRequest.text()); +// const exampleListing = await Bun.file("./examples/apollo_listing.json").json(); +// const exampleListingApolloState = +// exampleListing.props.pageProps.__APOLLO_STATE__; + +const parseListing = (htmlString: string) => { + const { document } = parseHTML(htmlString); + const nextData = document.getElementById("__NEXT_DATA__"); + + if (!nextData) { + console.error("Could not find __NEXT_DATA__ script element."); + return []; + } + + if (!nextData.textContent) { + console.error("__NEXT_DATA__ element is empty!"); + return []; + } + const jsonData = JSON.parse(nextData.textContent); + const apolloState: ApolloListingState = + jsonData.props.pageProps.__APOLLO_STATE__; + + const getListingId = (apolloState: { [key: string]: any }): + | string + | undefined => { + const apolloStateKeys = Object.keys(apolloState); + const key = apolloStateKeys.find((key) => key.includes("Listing")); + if (!key) return undefined; + return key; + }; + + const listingKey = getListingId(apolloState); + + if (!listingKey) { + throw new Error("No listing key found in listing apolloState!"); + } + + const { + url, + title, + description, + price, + type, + status, + activationDate, + endDate, + metrics, + // attributes, + location, + } = apolloState[listingKey]; + + const listingObject = { + url, + title, + description, + listingPrice: { + amount: formatCentsToCurrency(price.amount), + currency: price.currency, + }, + listingType: type, + listingStatus: status, + creationDate: activationDate, + endDate, + numberOfViews: metrics.views, + // condition: attributes.all.find( + // (attr: { [key: string]: unknown }) => attr.canonicalName === "condition", + // ).canonicalValues[0], + address: location.address, + }; + + return listingObject; +}; + +const searchHtml: string = await makeKijijiRequest( + `https://www.kijiji.ca/b-canada/${searchQuery}/k0l0?dc=true&view=list`, +); + +const searchResults = parseSearch(searchHtml); + +// if (searchResults.length === 0) { +// throw new Error("Search didn't return an HTML!") +// } +// console.log(searchResults); + +const fetchAllWithRateLimit = async (links: string[]) => { + const results: string[] = []; + for (const link of links) { + try { + const data: string = await makeKijijiRequest(link); + // console.log(data); + results.push(data); + } catch (error) { + console.error(`Failed to fetch data from ${link}:`, error); + } + await delay(DELAY_MS); + } + return results; +}; + +const listingsLinks: string[] = searchResults.map((item) => { + return item.listingLink; +}); + +// console.log(listingsLinks); + +const fetchResults = await fetchAllWithRateLimit(listingsLinks); + +const itemsData = fetchResults.map((itemHtml) => parseListing(itemHtml)); + +console.log(itemsData);