diff --git a/src/kijiji.ts b/src/kijiji.ts index d566ec3..6c29d97 100644 --- a/src/kijiji.ts +++ b/src/kijiji.ts @@ -1,159 +1,234 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ import { parseHTML } from "linkedom"; +// ----------------------------- Types ----------------------------- + +type HTMLString = string; + type SearchListing = { name: string; listingLink: string; }; -interface ApolloSearchState { - [key: string]: { - [key: string]: unknown; - url: string; - title: string; +type ApolloRecord = Record; + +interface ApolloSearchItem { + url?: string; + title?: string; + [k: string]: unknown; +} + +interface ApolloListingRoot { + url?: string; + title?: string; + description?: string; + price?: { amount?: number | string; currency?: string }; + type?: string; + status?: string; + activationDate?: string; + endDate?: string; + metrics?: { views?: number | string }; + location?: { address?: string | null }; + [k: string]: unknown; +} + +type ListingDetails = { + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; }; -} + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; +}; -interface ApolloListingState { - [key: string]: any; -} +// ----------------------------- Config ----------------------------- -function formatCentsToCurrency(num: number | string, locale = "en-US") { - if (typeof num === "string") num = parseInt(num); - const numberInDollars = num / 100; +const REQUESTS_PER_SECOND = 1; +const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); +const BASE_URL = "https://www.kijiji.ca"; +const SEARCH_QUERY = "playstation 5"; +// ----------------------------- Utilities ----------------------------- + +/** + * Turns cents to localized currency string. + */ +function formatCentsToCurrency( + num: number | string | undefined, + locale = "en-US", +): string { + if (num == null) return ""; + const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; + if (Number.isNaN(cents)) return ""; + const dollars = cents / 100; const formatter = new Intl.NumberFormat(locale, { minimumFractionDigits: 2, maximumFractionDigits: 2, useGrouping: true, }); - - return formatter.format(numberInDollars); + return formatter.format(dollars); } -const searchQuery = "playstation 5"; -const REQUESTS_PER_SECOND = 1; -const DELAY_MS = 1000 / REQUESTS_PER_SECOND; - -// const exampleSearchHTML = Bun.file("./example-kijiji-search.html"); -// const exampleSearchHTMLData = await exampleSearchHTML.text(); - -function parseSearch(htmlString: string) { - const { document } = parseHTML(htmlString); - const nextData = document.getElementById("__NEXT_DATA__"); - - if (!nextData) { - console.error("Could not find __NEXT_DATA__ script element."); - return []; - } - - try { - if (!nextData.textContent) { - console.error("__NEXT_DATA__ element is empty!"); - return []; - } - const jsonData = JSON.parse(nextData.textContent); - const apolloState: ApolloSearchState = - jsonData.props.pageProps.__APOLLO_STATE__; - - const listingsKeys: string[] = []; - - for (const key in apolloState) { - if (key.includes("Listing")) { - listingsKeys.push(key); - } - } - - const searchListings: SearchListing[] = listingsKeys.map((key) => { - const listing = apolloState[key]; - return { - listingLink: listing!.url, - name: listing!.title, - }; - }); - - // console.log(searchListings); - - return searchListings; - } catch (error) { - console.error("Error parsing __NEXT_DATA__:", error); - return []; - } +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; } -const makeKijijiRequest = async (url: string): Promise => { - console.log(`Making a request at ${new Date()}`); - const response = await fetch(url, { - headers: { - accept: - "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", - "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", - "cache-control": "max-age=0", - priority: "u=0, i", - "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138"', - "sec-ch-ua-mobile": "?0", - "sec-ch-ua-platform": '"Linux"', - "sec-fetch-dest": "document", - "sec-fetch-mode": "navigate", - "sec-fetch-site": "same-origin", - "sec-fetch-user": "?1", - "upgrade-insecure-requests": "1", - }, - body: null, - method: "GET", - }); - - const rateLimitRemaining = response.headers.get("X-RateLimit-Remaining"); - const rateLimitReset = response.headers.get("X-RateLimit-Reset"); - - if (rateLimitRemaining !== null && rateLimitReset !== null) { - console.log( - `Rate limit remaining: ${rateLimitRemaining}, Reset in: ${rateLimitReset} seconds`, - ); - } - - const data: T = (await response.text()) as T; - return data; -}; - async function delay(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)); + await new Promise((resolve) => setTimeout(resolve, ms)); } -// const exampleListing = await Bun.file("./examples/apollo_listing.json").json(); -// const exampleListingApolloState = -// exampleListing.props.pageProps.__APOLLO_STATE__; +class HttpError extends Error { + constructor( + message: string, + public readonly status: number, + public readonly url: string, + ) { + super(message); + this.name = "HttpError"; + } +} -const parseListing = (htmlString: string) => { +// ----------------------------- HTTP Client ----------------------------- + +/** + Fetch HTML with a basic retry strategy and simple rate-limit delay between calls. + - Retries on 429 and 5xx + - Respects X-RateLimit-Reset when present (seconds) +*/ +async function fetchHtml( + url: string, + opts?: { + maxRetries?: number; + retryBaseMs?: number; + onRateInfo?: (remaining: string | null, reset: string | null) => void; + }, +): Promise { + const maxRetries = opts?.maxRetries ?? 3; + const retryBaseMs = opts?.retryBaseMs ?? 500; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const res = await fetch(url, { + method: "GET", + headers: { + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", + "cache-control": "no-cache", + "upgrade-insecure-requests": "1", + "user-agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", + }, + }); + + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = res.headers.get("X-RateLimit-Reset"); + opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); + + if (!res.ok) { + // Respect 429 reset if provided + if (res.status === 429) { + const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; + const waitMs = Number.isFinite(resetSeconds) + ? Math.max(0, resetSeconds * 1000) + : (attempt + 1) * retryBaseMs; + await delay(waitMs); + continue; + } + // Retry on 5xx + if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { + await delay((attempt + 1) * retryBaseMs); + continue; + } + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + url, + ); + } + + const html = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); + return html; + } catch (err) { + if (attempt >= maxRetries) throw err; + await delay((attempt + 1) * retryBaseMs); + } + } + + throw new Error("Exhausted retries without response"); +} + +// ----------------------------- Parsing ----------------------------- + +/** + Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML. +*/ +function extractApolloState(htmlString: HTMLString): ApolloRecord | null { const { document } = parseHTML(htmlString); const nextData = document.getElementById("__NEXT_DATA__"); + if (!nextData || !nextData.textContent) return null; - if (!nextData) { - console.error("Could not find __NEXT_DATA__ script element."); - return []; + try { + const jsonData = JSON.parse(nextData.textContent); + const apollo = jsonData?.props?.pageProps?.__APOLLO_STATE__; + return isRecord(apollo) ? apollo : null; + } catch { + return null; } +} - if (!nextData.textContent) { - console.error("__NEXT_DATA__ element is empty!"); - return []; +/** + Parse search page apollo state into SearchListing[]. + Filters keys likely to be listing entities and ensures url/title exist. +*/ +function parseSearch(htmlString: HTMLString): SearchListing[] { + const apolloState = extractApolloState(htmlString); + if (!apolloState) return []; + + const results: SearchListing[] = []; + for (const [key, value] of Object.entries(apolloState)) { + // Heuristic: Kijiji listing keys usually contain "Listing" + if (!key.includes("Listing")) continue; + if (!isRecord(value)) continue; + + const item = value as ApolloSearchItem; + if (typeof item.url === "string" && typeof item.title === "string") { + results.push({ + listingLink: item.url.startsWith("http") + ? item.url + : `${BASE_URL}${item.url}`, + name: item.title, + }); + } } - const jsonData = JSON.parse(nextData.textContent); - const apolloState: ApolloListingState = - jsonData.props.pageProps.__APOLLO_STATE__; + return results; +} - const getListingId = (apolloState: { [key: string]: any }): - | string - | undefined => { - const apolloStateKeys = Object.keys(apolloState); - const key = apolloStateKeys.find((key) => key.includes("Listing")); - if (!key) return undefined; - return key; - }; +/** + Parse a listing page into a typed object. +*/ +function parseListing(htmlString: HTMLString): ListingDetails | null { + const apolloState = extractApolloState(htmlString); + if (!apolloState) return null; - const listingKey = getListingId(apolloState); + // Find the listing root key + const listingKey = Object.keys(apolloState).find((k) => + k.includes("Listing"), + ); + if (!listingKey) return null; - if (!listingKey) { - throw new Error("No listing key found in listing apolloState!"); - } + const root = apolloState[listingKey]; + if (!isRecord(root)) return null; const { url, @@ -165,66 +240,105 @@ const parseListing = (htmlString: string) => { activationDate, endDate, metrics, - // attributes, location, - } = apolloState[listingKey]; + } = root as ApolloListingRoot; - const listingObject = { - url, + const cents = price?.amount != null ? Number(price.amount) : undefined; + const amountFormatted = formatCentsToCurrency(cents); + + const numberOfViews = + metrics?.views != null ? Number(metrics.views) : undefined; + + const listingUrl = + typeof url === "string" + ? url.startsWith("http") + ? url + : `${BASE_URL}${url}` + : ""; + + if (!listingUrl || !title) return null; + + return { + url: listingUrl, title, description, - listingPrice: { - amount: formatCentsToCurrency(price.amount), - currency: price.currency, - }, + listingPrice: amountFormatted + ? { + amountFormatted, + cents: Number.isFinite(cents!) ? cents : undefined, + currency: price?.currency, + } + : undefined, listingType: type, listingStatus: status, creationDate: activationDate, endDate, - numberOfViews: metrics.views, - // condition: attributes.all.find( - // (attr: { [key: string]: unknown }) => attr.canonicalName === "condition", - // ).canonicalValues[0], - address: location.address, + numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined, + address: location?.address ?? null, }; +} - return listingObject; -}; +// ----------------------------- Main ----------------------------- -const searchHtml: string = await makeKijijiRequest( - `https://www.kijiji.ca/b-canada/${searchQuery}/k0l0?dc=true&view=list`, -); +async function main() { + const searchUrl = `${BASE_URL}/b-canada/${encodeURIComponent(SEARCH_QUERY)}/k0l0?dc=true&view=list`; -const searchResults = parseSearch(searchHtml); + console.log(`Fetching search: ${searchUrl}`); + const searchHtml = await fetchHtml(searchUrl, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `Search - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + }); -// if (searchResults.length === 0) { -// throw new Error("Search didn't return an HTML!") -// } -// console.log(searchResults); - -const fetchAllWithRateLimit = async (links: string[]) => { - const results: string[] = []; - for (const link of links) { - try { - const data: string = await makeKijijiRequest(link); - // console.log(data); - results.push(data); - } catch (error) { - console.error(`Failed to fetch data from ${link}:`, error); - } - await delay(DELAY_MS); + const searchResults = parseSearch(searchHtml); + if (searchResults.length === 0) { + console.warn("No search results parsed from page."); + return; } - return results; -}; -const listingsLinks: string[] = searchResults.map((item) => { - return item.listingLink; + // Deduplicate links + const listingLinks = Array.from( + new Set(searchResults.map((r) => r.listingLink)), + ); + + console.log( + `Found ${listingLinks.length} listing links. Fetching details...`, + ); + + const items: ListingDetails[] = []; + for (const link of listingLinks) { + try { + const html = await fetchHtml(link, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log( + `Item - Rate limit remaining: ${remaining}, reset in: ${reset}s`, + ); + } + }, + }); + const parsed = parseListing(html); + if (parsed) items.push(parsed); + } catch (err) { + if (err instanceof HttpError) { + console.error(`Failed to fetch ${link} - ${err.status} ${err.message}`); + } else { + console.error( + `Failed to fetch ${link} - ${String((err as Error)?.message || err)}`, + ); + } + } + } + + console.log(`Parsed ${items.length} listings.`); + console.log(items); +} + +void main().catch((err) => { + console.error("Fatal error:", err); + process.exitCode = 1; }); - -// console.log(listingsLinks); - -const fetchResults = await fetchAllWithRateLimit(listingsLinks); - -const itemsData = fetchResults.map((itemHtml) => parseListing(itemHtml)); - -console.log(itemsData);