feat: scrape listing details from Kijiji ads

This commit is contained in:
2025-09-17 20:43:07 -04:00
parent 257edf394c
commit 23dc9cae4c

View File

@@ -13,12 +13,31 @@ interface ApolloSearchState {
}; };
} }
interface ApolloListingState {
[key: string]: any;
}
function formatCentsToCurrency(num: number | string, locale = "en-US") {
if (typeof num === "string") num = parseInt(num);
const numberInDollars = num / 100;
const formatter = new Intl.NumberFormat(locale, {
minimumFractionDigits: 2,
maximumFractionDigits: 2,
useGrouping: true,
});
return formatter.format(numberInDollars);
}
const searchQuery = "playstation 5"; const searchQuery = "playstation 5";
const REQUESTS_PER_SECOND = 1;
const DELAY_MS = 1000 / REQUESTS_PER_SECOND;
// const exampleSearchHTML = Bun.file("./example-kijiji-search.html"); // const exampleSearchHTML = Bun.file("./example-kijiji-search.html");
// const exampleSearchHTMLData = await exampleSearchHTML.text(); // const exampleSearchHTMLData = await exampleSearchHTML.text();
function extractSearchListingsFromNextData(htmlString: string) { function parseSearch(htmlString: string) {
const { document } = parseHTML(htmlString); const { document } = parseHTML(htmlString);
const nextData = document.getElementById("__NEXT_DATA__"); const nextData = document.getElementById("__NEXT_DATA__");
@@ -44,18 +63,15 @@ function extractSearchListingsFromNextData(htmlString: string) {
} }
} }
const searchListings: (SearchListing | undefined)[] = listingsKeys.map( const searchListings: SearchListing[] = listingsKeys.map((key) => {
(key) => { const listing = apolloState[key];
const listing = apolloState[key]; return {
if (!listing) return undefined; listingLink: listing!.url,
return { name: listing!.title,
listingLink: listing.url, };
name: listing.title, });
};
},
);
console.log(searchListings); // console.log(searchListings);
return searchListings; return searchListings;
} catch (error) { } catch (error) {
@@ -64,8 +80,9 @@ function extractSearchListingsFromNextData(htmlString: string) {
} }
} }
const makeKijijiRequest = async (url: string): Promise<string> => { const makeKijijiRequest = async <T>(url: string): Promise<T> => {
const request = await fetch(url, { console.log(`Making a request at ${new Date()}`);
const response = await fetch(url, {
headers: { headers: {
accept: accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
@@ -85,9 +102,129 @@ const makeKijijiRequest = async (url: string): Promise<string> => {
method: "GET", method: "GET",
}); });
return await request.text(); const rateLimitRemaining = response.headers.get("X-RateLimit-Remaining");
const rateLimitReset = response.headers.get("X-RateLimit-Reset");
if (rateLimitRemaining !== null && rateLimitReset !== null) {
console.log(
`Rate limit remaining: ${rateLimitRemaining}, Reset in: ${rateLimitReset} seconds`,
);
}
const data: T = (await response.text()) as T;
return data;
}; };
// https://www.kijiji.ca/b-canada/${searchQuery}/k0l0?dc=true&view=list async function delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// extractSearchListingsFromNextData(await kijijiRequest.text()); // const exampleListing = await Bun.file("./examples/apollo_listing.json").json();
// const exampleListingApolloState =
// exampleListing.props.pageProps.__APOLLO_STATE__;
const parseListing = (htmlString: string) => {
const { document } = parseHTML(htmlString);
const nextData = document.getElementById("__NEXT_DATA__");
if (!nextData) {
console.error("Could not find __NEXT_DATA__ script element.");
return [];
}
if (!nextData.textContent) {
console.error("__NEXT_DATA__ element is empty!");
return [];
}
const jsonData = JSON.parse(nextData.textContent);
const apolloState: ApolloListingState =
jsonData.props.pageProps.__APOLLO_STATE__;
const getListingId = (apolloState: { [key: string]: any }):
| string
| undefined => {
const apolloStateKeys = Object.keys(apolloState);
const key = apolloStateKeys.find((key) => key.includes("Listing"));
if (!key) return undefined;
return key;
};
const listingKey = getListingId(apolloState);
if (!listingKey) {
throw new Error("No listing key found in listing apolloState!");
}
const {
url,
title,
description,
price,
type,
status,
activationDate,
endDate,
metrics,
// attributes,
location,
} = apolloState[listingKey];
const listingObject = {
url,
title,
description,
listingPrice: {
amount: formatCentsToCurrency(price.amount),
currency: price.currency,
},
listingType: type,
listingStatus: status,
creationDate: activationDate,
endDate,
numberOfViews: metrics.views,
// condition: attributes.all.find(
// (attr: { [key: string]: unknown }) => attr.canonicalName === "condition",
// ).canonicalValues[0],
address: location.address,
};
return listingObject;
};
const searchHtml: string = await makeKijijiRequest(
`https://www.kijiji.ca/b-canada/${searchQuery}/k0l0?dc=true&view=list`,
);
const searchResults = parseSearch(searchHtml);
// if (searchResults.length === 0) {
// throw new Error("Search didn't return an HTML!")
// }
// console.log(searchResults);
const fetchAllWithRateLimit = async (links: string[]) => {
const results: string[] = [];
for (const link of links) {
try {
const data: string = await makeKijijiRequest(link);
// console.log(data);
results.push(data);
} catch (error) {
console.error(`Failed to fetch data from ${link}:`, error);
}
await delay(DELAY_MS);
}
return results;
};
const listingsLinks: string[] = searchResults.map((item) => {
return item.listingLink;
});
// console.log(listingsLinks);
const fetchResults = await fetchAllWithRateLimit(listingsLinks);
const itemsData = fetchResults.map((itemHtml) => parseListing(itemHtml));
console.log(itemsData);