feat: scrape listing details from Kijiji ads
This commit is contained in:
171
src/kijiji.ts
171
src/kijiji.ts
@@ -13,12 +13,31 @@ interface ApolloSearchState {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ApolloListingState {
|
||||||
|
[key: string]: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatCentsToCurrency(num: number | string, locale = "en-US") {
|
||||||
|
if (typeof num === "string") num = parseInt(num);
|
||||||
|
const numberInDollars = num / 100;
|
||||||
|
|
||||||
|
const formatter = new Intl.NumberFormat(locale, {
|
||||||
|
minimumFractionDigits: 2,
|
||||||
|
maximumFractionDigits: 2,
|
||||||
|
useGrouping: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
return formatter.format(numberInDollars);
|
||||||
|
}
|
||||||
|
|
||||||
const searchQuery = "playstation 5";
|
const searchQuery = "playstation 5";
|
||||||
|
const REQUESTS_PER_SECOND = 1;
|
||||||
|
const DELAY_MS = 1000 / REQUESTS_PER_SECOND;
|
||||||
|
|
||||||
// const exampleSearchHTML = Bun.file("./example-kijiji-search.html");
|
// const exampleSearchHTML = Bun.file("./example-kijiji-search.html");
|
||||||
// const exampleSearchHTMLData = await exampleSearchHTML.text();
|
// const exampleSearchHTMLData = await exampleSearchHTML.text();
|
||||||
|
|
||||||
function extractSearchListingsFromNextData(htmlString: string) {
|
function parseSearch(htmlString: string) {
|
||||||
const { document } = parseHTML(htmlString);
|
const { document } = parseHTML(htmlString);
|
||||||
const nextData = document.getElementById("__NEXT_DATA__");
|
const nextData = document.getElementById("__NEXT_DATA__");
|
||||||
|
|
||||||
@@ -44,18 +63,15 @@ function extractSearchListingsFromNextData(htmlString: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const searchListings: (SearchListing | undefined)[] = listingsKeys.map(
|
const searchListings: SearchListing[] = listingsKeys.map((key) => {
|
||||||
(key) => {
|
const listing = apolloState[key];
|
||||||
const listing = apolloState[key];
|
return {
|
||||||
if (!listing) return undefined;
|
listingLink: listing!.url,
|
||||||
return {
|
name: listing!.title,
|
||||||
listingLink: listing.url,
|
};
|
||||||
name: listing.title,
|
});
|
||||||
};
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
console.log(searchListings);
|
// console.log(searchListings);
|
||||||
|
|
||||||
return searchListings;
|
return searchListings;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -64,8 +80,9 @@ function extractSearchListingsFromNextData(htmlString: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const makeKijijiRequest = async (url: string): Promise<string> => {
|
const makeKijijiRequest = async <T>(url: string): Promise<T> => {
|
||||||
const request = await fetch(url, {
|
console.log(`Making a request at ${new Date()}`);
|
||||||
|
const response = await fetch(url, {
|
||||||
headers: {
|
headers: {
|
||||||
accept:
|
accept:
|
||||||
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||||||
@@ -85,9 +102,129 @@ const makeKijijiRequest = async (url: string): Promise<string> => {
|
|||||||
method: "GET",
|
method: "GET",
|
||||||
});
|
});
|
||||||
|
|
||||||
return await request.text();
|
const rateLimitRemaining = response.headers.get("X-RateLimit-Remaining");
|
||||||
|
const rateLimitReset = response.headers.get("X-RateLimit-Reset");
|
||||||
|
|
||||||
|
if (rateLimitRemaining !== null && rateLimitReset !== null) {
|
||||||
|
console.log(
|
||||||
|
`Rate limit remaining: ${rateLimitRemaining}, Reset in: ${rateLimitReset} seconds`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data: T = (await response.text()) as T;
|
||||||
|
return data;
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://www.kijiji.ca/b-canada/${searchQuery}/k0l0?dc=true&view=list
|
async function delay(ms: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
// extractSearchListingsFromNextData(await kijijiRequest.text());
|
// const exampleListing = await Bun.file("./examples/apollo_listing.json").json();
|
||||||
|
// const exampleListingApolloState =
|
||||||
|
// exampleListing.props.pageProps.__APOLLO_STATE__;
|
||||||
|
|
||||||
|
const parseListing = (htmlString: string) => {
|
||||||
|
const { document } = parseHTML(htmlString);
|
||||||
|
const nextData = document.getElementById("__NEXT_DATA__");
|
||||||
|
|
||||||
|
if (!nextData) {
|
||||||
|
console.error("Could not find __NEXT_DATA__ script element.");
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!nextData.textContent) {
|
||||||
|
console.error("__NEXT_DATA__ element is empty!");
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const jsonData = JSON.parse(nextData.textContent);
|
||||||
|
const apolloState: ApolloListingState =
|
||||||
|
jsonData.props.pageProps.__APOLLO_STATE__;
|
||||||
|
|
||||||
|
const getListingId = (apolloState: { [key: string]: any }):
|
||||||
|
| string
|
||||||
|
| undefined => {
|
||||||
|
const apolloStateKeys = Object.keys(apolloState);
|
||||||
|
const key = apolloStateKeys.find((key) => key.includes("Listing"));
|
||||||
|
if (!key) return undefined;
|
||||||
|
return key;
|
||||||
|
};
|
||||||
|
|
||||||
|
const listingKey = getListingId(apolloState);
|
||||||
|
|
||||||
|
if (!listingKey) {
|
||||||
|
throw new Error("No listing key found in listing apolloState!");
|
||||||
|
}
|
||||||
|
|
||||||
|
const {
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
price,
|
||||||
|
type,
|
||||||
|
status,
|
||||||
|
activationDate,
|
||||||
|
endDate,
|
||||||
|
metrics,
|
||||||
|
// attributes,
|
||||||
|
location,
|
||||||
|
} = apolloState[listingKey];
|
||||||
|
|
||||||
|
const listingObject = {
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
description,
|
||||||
|
listingPrice: {
|
||||||
|
amount: formatCentsToCurrency(price.amount),
|
||||||
|
currency: price.currency,
|
||||||
|
},
|
||||||
|
listingType: type,
|
||||||
|
listingStatus: status,
|
||||||
|
creationDate: activationDate,
|
||||||
|
endDate,
|
||||||
|
numberOfViews: metrics.views,
|
||||||
|
// condition: attributes.all.find(
|
||||||
|
// (attr: { [key: string]: unknown }) => attr.canonicalName === "condition",
|
||||||
|
// ).canonicalValues[0],
|
||||||
|
address: location.address,
|
||||||
|
};
|
||||||
|
|
||||||
|
return listingObject;
|
||||||
|
};
|
||||||
|
|
||||||
|
const searchHtml: string = await makeKijijiRequest(
|
||||||
|
`https://www.kijiji.ca/b-canada/${searchQuery}/k0l0?dc=true&view=list`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const searchResults = parseSearch(searchHtml);
|
||||||
|
|
||||||
|
// if (searchResults.length === 0) {
|
||||||
|
// throw new Error("Search didn't return an HTML!")
|
||||||
|
// }
|
||||||
|
// console.log(searchResults);
|
||||||
|
|
||||||
|
const fetchAllWithRateLimit = async (links: string[]) => {
|
||||||
|
const results: string[] = [];
|
||||||
|
for (const link of links) {
|
||||||
|
try {
|
||||||
|
const data: string = await makeKijijiRequest(link);
|
||||||
|
// console.log(data);
|
||||||
|
results.push(data);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to fetch data from ${link}:`, error);
|
||||||
|
}
|
||||||
|
await delay(DELAY_MS);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
|
||||||
|
const listingsLinks: string[] = searchResults.map((item) => {
|
||||||
|
return item.listingLink;
|
||||||
|
});
|
||||||
|
|
||||||
|
// console.log(listingsLinks);
|
||||||
|
|
||||||
|
const fetchResults = await fetchAllWithRateLimit(listingsLinks);
|
||||||
|
|
||||||
|
const itemsData = fetchResults.map((itemHtml) => parseListing(itemHtml));
|
||||||
|
|
||||||
|
console.log(itemsData);
|
||||||
|
|||||||
Reference in New Issue
Block a user