diff --git a/src/index.ts b/src/index.ts index 113ef62..7884176 100644 --- a/src/index.ts +++ b/src/index.ts @@ -26,8 +26,12 @@ const server = Bun.serve({ { status: 400 }, ); - const items = await fetchKijijiItems(SEARCH_QUERY, 5); - if (!items) + const items = await fetchKijijiItems(SEARCH_QUERY, 1, undefined, {}, { + includeImages: true, + sellerDataDepth: 'detailed', + includeClientSideData: false, + }); + if (!items || items.length === 0) return Response.json( { message: "Search didn't return any results!" }, { status: 404 }, @@ -85,11 +89,13 @@ const server = Bun.serve({ ); // Parse optional parameters with defaults - const minPrice = reqUrl.searchParams.get("minPrice") - ? parseInt(reqUrl.searchParams.get("minPrice")!) + const minPriceParam = reqUrl.searchParams.get("minPrice"); + const minPrice = minPriceParam + ? Number.parseInt(minPriceParam, 10) : undefined; - const maxPrice = reqUrl.searchParams.get("maxPrice") - ? parseInt(reqUrl.searchParams.get("maxPrice")!) + const maxPriceParam = reqUrl.searchParams.get("maxPrice"); + const maxPrice = maxPriceParam + ? Number.parseInt(maxPriceParam, 10) : undefined; const strictMode = reqUrl.searchParams.get("strictMode") === "true"; const exclusionsParam = reqUrl.searchParams.get("exclusions"); diff --git a/src/kijiji.ts b/src/kijiji.ts index 6cd4091..4379bcc 100644 --- a/src/kijiji.ts +++ b/src/kijiji.ts @@ -26,16 +26,29 @@ interface ApolloListingRoot { url?: string; title?: string; description?: string; - price?: { amount?: number | string; currency?: string }; + price?: { amount?: number | string; currency?: string; type?: string }; type?: string; status?: string; activationDate?: string; endDate?: string; metrics?: { views?: number | string }; - location?: { address?: string | null }; + location?: { + address?: string | null; + id?: number; + name?: string; + coordinates?: { latitude: number; longitude: number }; + }; + imageUrls?: string[]; + imageCount?: number; + categoryId?: number; + adSource?: string; + flags?: { topAd?: boolean; priceDrop?: boolean }; + posterInfo?: { posterId?: string; rating?: number }; + attributes?: Array<{ canonicalName?: string; canonicalValues?: string[] }>; [k: string]: unknown; } +// Keep existing interface for backward compatibility type ListingDetails = { url: string; title: string; @@ -53,10 +66,178 @@ type ListingDetails = { address?: string | null; }; +// New comprehensive interface for detailed listings +interface DetailedListing extends ListingDetails { + images: string[]; + categoryId: number; + adSource: string; + flags: { + topAd: boolean; + priceDrop: boolean; + }; + attributes: Record; + location: { + id: number; + name: string; + coordinates?: { + latitude: number; + longitude: number; + }; + }; + sellerInfo?: { + posterId: string; + rating?: number; + accountType?: string; + memberSince?: string; + reviewCount?: number; + reviewScore?: number; + }; +} + +// Configuration interfaces +interface SearchOptions { + location?: number | string; // Location ID or name + category?: number | string; // Category ID or name + keywords?: string; + sortBy?: 'relevancy' | 'date' | 'price' | 'distance'; + sortOrder?: 'desc' | 'asc'; + maxPages?: number; // Default: 5 + priceMin?: number; + priceMax?: number; +} + +interface ListingFetchOptions { + includeImages?: boolean; // Default: true + sellerDataDepth?: 'basic' | 'detailed' | 'full'; // Default: 'detailed' + includeClientSideData?: boolean; // Default: false +} + +// ----------------------------- Constants & Mappings ----------------------------- + +// Location mappings from KIJIJI.md +const LOCATION_MAPPINGS: Record = { + 'canada': 0, + 'ontario': 9004, + 'toronto': 1700273, + 'gta': 1700272, + 'oshawa': 1700275, + 'quebec': 9001, + 'nova scotia': 9002, + 'alberta': 9003, + 'new brunswick': 9005, + 'manitoba': 9006, + 'british columbia': 9007, + 'newfoundland': 9008, + 'saskatchewan': 9009, + 'territories': 9010, + 'pei': 9011, + 'prince edward island': 9011, +}; + +// Category mappings from KIJIJI.md (Buy & Sell main categories) +const CATEGORY_MAPPINGS: Record = { + 'all': 0, + 'buy-sell': 10, + 'arts-collectibles': 12, + 'audio': 767, + 'baby-items': 253, + 'bags-luggage': 931, + 'bikes': 644, + 'books': 109, + 'cameras': 103, + 'cds': 104, + 'clothing': 274, + 'computers': 16, + 'computer-accessories': 128, + 'electronics': 29659001, + 'free-stuff': 17220001, + 'furniture': 235, + 'garage-sales': 638, + 'health-special-needs': 140, + 'hobbies-crafts': 139, + 'home-appliances': 107, + 'home-indoor': 717, + 'home-outdoor': 727, + 'jewellery': 133, + 'musical-instruments': 17, + 'phones': 132, + 'sporting-goods': 111, + 'tools': 110, + 'toys-games': 108, + 'tvs-video': 15093001, + 'video-games': 141, + 'other': 26, +}; + +// Sort parameter mappings +const SORT_MAPPINGS: Record = { + 'relevancy': 'MATCH', + 'date': 'DATE', + 'price': 'PRICE', + 'distance': 'DISTANCE', +}; + +// ----------------------------- Exports for Testing ----------------------------- +// Note: These are exported for testing purposes only + +export { resolveLocationId, resolveCategoryId, buildSearchUrl }; +export { extractApolloState, parseSearch }; +export { parseDetailedListing }; +export { HttpError, NetworkError, ParseError, RateLimitError, ValidationError }; + // ----------------------------- Utilities ----------------------------- const SEPS = new Set([" ", "–", "—", "/", ":", ";", ",", ".", "-"]); +/** + * Resolve location ID from name or return numeric ID + */ +function resolveLocationId(location?: number | string): number { + if (typeof location === 'number') return location; + if (typeof location === 'string') { + const normalized = location.toLowerCase().replace(/\s+/g, '-'); + return LOCATION_MAPPINGS[normalized] ?? 0; // Default to Canada (0) + } + return 0; // Default to Canada +} + +/** + * Resolve category ID from name or return numeric ID + */ +function resolveCategoryId(category?: number | string): number { + if (typeof category === 'number') return category; + if (typeof category === 'string') { + const normalized = category.toLowerCase().replace(/\s+/g, '-'); + return CATEGORY_MAPPINGS[normalized] ?? 0; // Default to all categories + } + return 0; // Default to all categories +} + +/** + * Build search URL with enhanced parameters + */ +function buildSearchUrl( + keywords: string, + options: SearchOptions & { page?: number }, + BASE_URL = "https://www.kijiji.ca" +): string { + const locationId = resolveLocationId(options.location); + const categoryId = resolveCategoryId(options.category); + + const categorySlug = categoryId === 0 ? 'buy-sell' : 'buy-sell'; // Could be enhanced + const locationSlug = locationId === 0 ? 'canada' : 'canada'; // Could be enhanced + + let url = `${BASE_URL}/b-${categorySlug}/${locationSlug}/${slugify(keywords)}/k0c${categoryId}l${locationId}`; + + const sortParam = options.sortBy ? `&sort=${SORT_MAPPINGS[options.sortBy]}` : ''; + const sortOrder = options.sortOrder === 'asc' ? 'ASC' : 'DESC'; + const pageParam = options.page && options.page > 1 ? `&page=${options.page}` : ''; + + url += `?sort=relevancyDesc&view=list${sortParam}&order=${sortOrder}${pageParam}`; + + return url; +} + /** * Slugifies a string for search */ @@ -67,13 +248,14 @@ export function slugify(input: string): string { for (let i = 0; i < s.length; i++) { const ch = s[i]; - const code = ch!.charCodeAt(0); + if (!ch) continue; + const code = ch.charCodeAt(0); // a-z or 0-9 if ((code >= 97 && code <= 122) || (code >= 48 && code <= 57)) { - out.push(ch!); + out.push(ch); lastHyphen = false; - } else if (SEPS.has(ch!)) { + } else if (SEPS.has(ch)) { if (!lastHyphen) { out.push("-"); lastHyphen = true; @@ -87,30 +269,33 @@ export function slugify(input: string): string { /** * Turns cents to localized currency string. */ -function formatCentsToCurrency( +export function formatCentsToCurrency( num: number | string | undefined, locale = "en-US", ): string { - if (num == null) return ""; - const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; - if (Number.isNaN(cents)) return ""; + if (num == null) return ""; + const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; + if (Number.isNaN(cents)) return ""; const dollars = cents / 100; const formatter = new Intl.NumberFormat(locale, { + style: 'currency', + currency: 'USD', minimumFractionDigits: 2, maximumFractionDigits: 2, - useGrouping: true, }); return formatter.format(dollars); } function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null; + return typeof value === "object" && value !== null && !Array.isArray(value); } async function delay(ms: number): Promise { await new Promise((resolve) => setTimeout(resolve, ms)); } +// ----------------------------- Error Classes ----------------------------- + class HttpError extends Error { constructor( message: string, @@ -122,12 +307,52 @@ class HttpError extends Error { } } +class NetworkError extends Error { + constructor( + message: string, + public readonly url: string, + public readonly cause?: Error, + ) { + super(message); + this.name = "NetworkError"; + } +} + +class ParseError extends Error { + constructor( + message: string, + public readonly data?: unknown, + ) { + super(message); + this.name = "ParseError"; + } +} + +class RateLimitError extends Error { + constructor( + message: string, + public readonly url: string, + public readonly resetTime?: number, + ) { + super(message); + this.name = "RateLimitError"; + } +} + +class ValidationError extends Error { + constructor(message: string) { + super(message); + this.name = "ValidationError"; + } +} + // ----------------------------- HTTP Client ----------------------------- /** - Fetch HTML with a basic retry strategy and simple rate-limit delay between calls. - - Retries on 429 and 5xx - - Respects X-RateLimit-Reset when present (seconds) + Fetch HTML with enhanced retry strategy and exponential backoff. + - Retries on 429, 5xx, and network errors + - Respects X-RateLimit-Reset when present (seconds) + - Exponential backoff with jitter */ async function fetchHtml( url: string, @@ -139,11 +364,13 @@ async function fetchHtml( }, ): Promise { const maxRetries = opts?.maxRetries ?? 3; - const retryBaseMs = opts?.retryBaseMs ?? 500; + const retryBaseMs = opts?.retryBaseMs ?? 1000; for (let attempt = 0; attempt <= maxRetries; attempt++) { try { - // console.log(`Fetching: `, url); + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30000); // 30s timeout + const res = await fetch(url, { method: "GET", headers: { @@ -155,27 +382,40 @@ async function fetchHtml( "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", }, + signal: controller.signal, }); + clearTimeout(timeoutId); + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); const rateLimitReset = res.headers.get("X-RateLimit-Reset"); opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); if (!res.ok) { - // Respect 429 reset if provided + // Handle rate limiting if (res.status === 429) { - const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; + const resetSeconds = rateLimitReset ? Number(rateLimitReset) : Number.NaN; const waitMs = Number.isFinite(resetSeconds) ? Math.max(0, resetSeconds * 1000) - : (attempt + 1) * retryBaseMs; - await delay(waitMs); - continue; + : calculateBackoffDelay(attempt, retryBaseMs); + + if (attempt < maxRetries) { + await delay(waitMs); + continue; + } + throw new RateLimitError( + `Rate limit exceeded for ${url}`, + url, + resetSeconds, + ); } - // Retry on 5xx + + // Retry on server errors if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { - await delay((attempt + 1) * retryBaseMs); + await delay(calculateBackoffDelay(attempt, retryBaseMs)); continue; } + throw new HttpError( `Request failed with status ${res.status}`, res.status, @@ -184,22 +424,177 @@ async function fetchHtml( } const html = await res.text(); - // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + + // Respect per-request delay to maintain rate limiting await delay(DELAY_MS); return html; + } catch (err) { - if (attempt >= maxRetries) throw err; - await delay((attempt + 1) * retryBaseMs); + // Handle different error types + if (err instanceof RateLimitError || err instanceof HttpError) { + throw err; // Re-throw known errors + } + + if (err instanceof Error && err.name === 'AbortError') { + if (attempt < maxRetries) { + await delay(calculateBackoffDelay(attempt, retryBaseMs)); + continue; + } + throw new NetworkError(`Request timeout for ${url}`, url, err); + } + + // Network or other errors + if (attempt < maxRetries) { + await delay(calculateBackoffDelay(attempt, retryBaseMs)); + continue; + } + throw new NetworkError( + `Network error fetching ${url}: ${err instanceof Error ? err.message : String(err)}`, + url, + err instanceof Error ? err : undefined + ); } } - throw new Error("Exhausted retries without response"); + throw new NetworkError(`Exhausted retries without response for ${url}`, url); +} + +/** + * Calculate exponential backoff delay with jitter + */ +function calculateBackoffDelay(attempt: number, baseMs: number): number { + const exponentialDelay = baseMs * (2 ** attempt); + const jitter = Math.random() * 0.1 * exponentialDelay; // 10% jitter + return Math.min(exponentialDelay + jitter, 30000); // Cap at 30 seconds +} + +// ----------------------------- GraphQL Client ----------------------------- + +/** + * Fetch additional data via GraphQL API + */ +async function fetchGraphQLData( + query: string, + variables: Record, + BASE_URL = "https://www.kijiji.ca" +): Promise { + const endpoint = `${BASE_URL}/anvil/api`; + + try { + const response = await fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'apollo-require-preflight': 'true', + }, + body: JSON.stringify({ + query, + variables, + }), + }); + + if (!response.ok) { + throw new HttpError( + `GraphQL request failed with status ${response.status}`, + response.status, + endpoint + ); + } + + const result = await response.json(); + + if (result.errors) { + throw new ParseError(`GraphQL errors: ${JSON.stringify(result.errors)}`, result.errors); + } + + return result.data; + } catch (err) { + if (err instanceof HttpError || err instanceof ParseError) { + throw err; + } + throw new NetworkError( + `Failed to fetch GraphQL data: ${err instanceof Error ? err.message : String(err)}`, + endpoint, + err instanceof Error ? err : undefined + ); + } +} + +// GraphQL response interfaces +interface GraphQLReviewResponse { + user?: { + reviewSummary?: { + count?: number; + score?: number; + }; + }; +} + +interface GraphQLProfileResponse { + user?: { + memberSince?: string; + accountType?: string; + }; +} + +// GraphQL queries from KIJIJI.md +const GRAPHQL_QUERIES = { + getReviewSummary: ` + query GetReviewSummary($userId: String!) { + user(id: $userId) { + reviewSummary { + count + score + __typename + } + __typename + } + } + `, + getProfileMetrics: ` + query GetProfileMetrics($profileId: String!) { + user(id: $profileId) { + memberSince + accountType + __typename + } + } + `, +} as const; + +/** + * Fetch additional seller data via GraphQL + */ +async function fetchSellerDetails( + posterId: string, + BASE_URL = "https://www.kijiji.ca" +): Promise<{ reviewCount?: number; reviewScore?: number; memberSince?: string; accountType?: string }> { + try { + const [reviewData, profileData] = await Promise.all([ + fetchGraphQLData(GRAPHQL_QUERIES.getReviewSummary, { userId: posterId }, BASE_URL), + fetchGraphQLData(GRAPHQL_QUERIES.getProfileMetrics, { profileId: posterId }, BASE_URL), + ]); + + const reviewResponse = reviewData as GraphQLReviewResponse; + const profileResponse = profileData as GraphQLProfileResponse; + + return { + reviewCount: reviewResponse?.user?.reviewSummary?.count, + reviewScore: reviewResponse?.user?.reviewSummary?.score, + memberSince: profileResponse?.user?.memberSince, + accountType: profileResponse?.user?.accountType, + }; + } catch (err) { + // Silently fail for GraphQL errors - not critical for basic functionality + console.warn(`Failed to fetch seller details for ${posterId}:`, err instanceof Error ? err.message : String(err)); + return {}; + } } // ----------------------------- Parsing ----------------------------- /** - Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML. + Extracts json.props.pageProps.__APOLLO_STATE__ safely from a Kijiji page HTML. */ function extractApolloState(htmlString: HTMLString): ApolloRecord | null { const { document } = parseHTML(htmlString); @@ -299,7 +694,7 @@ function parseListing( listingPrice: amountFormatted ? { amountFormatted, - cents: Number.isFinite(cents!) ? cents : undefined, + cents: cents !== undefined && Number.isFinite(cents) ? cents : undefined, currency: price?.currency, } : undefined, @@ -307,91 +702,252 @@ function parseListing( listingStatus: status, creationDate: activationDate, endDate, - numberOfViews: Number.isFinite(numberOfViews!) ? numberOfViews : undefined, + numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined, address: location?.address ?? null, }; } +/** + * Parse a listing page into a detailed object with all available fields + */ +async function parseDetailedListing( + htmlString: HTMLString, + BASE_URL: string, + options: ListingFetchOptions = {} +): Promise { + const apolloState = extractApolloState(htmlString); + if (!apolloState) return null; + + // Find the listing root key + const listingKey = Object.keys(apolloState).find((k) => + k.includes("Listing"), + ); + if (!listingKey) return null; + + const root = apolloState[listingKey]; + if (!isRecord(root)) return null; + + const { + url, + title, + description, + price, + type, + status, + activationDate, + endDate, + metrics, + location, + imageUrls, + imageCount, + categoryId, + adSource, + flags, + posterInfo, + attributes, + } = root as ApolloListingRoot; + + const cents = price?.amount != null ? Number(price.amount) : undefined; + const amountFormatted = formatCentsToCurrency(cents); + + const numberOfViews = + metrics?.views != null ? Number(metrics.views) : undefined; + + const listingUrl = + typeof url === "string" + ? url.startsWith("http") + ? url + : `${BASE_URL}${url}` + : ""; + + if (!listingUrl || !title) return null; + + // Only include fixed-price listings + if (!amountFormatted || cents === undefined) return null; + + // Extract images if requested + const images = options.includeImages !== false && Array.isArray(imageUrls) + ? imageUrls.filter((url): url is string => typeof url === 'string') + : []; + + // Extract attributes as key-value pairs + const attributeMap: Record = {}; + if (Array.isArray(attributes)) { + for (const attr of attributes) { + if (attr?.canonicalName && Array.isArray(attr.canonicalValues)) { + attributeMap[attr.canonicalName] = attr.canonicalValues; + } + } + } + + // Extract seller info based on depth setting + let sellerInfo: DetailedListing['sellerInfo']; + const depth = options.sellerDataDepth ?? 'detailed'; + + if (posterInfo?.posterId) { + sellerInfo = { + posterId: posterInfo.posterId, + rating: typeof posterInfo.rating === 'number' ? posterInfo.rating : undefined, + }; + + // Add more detailed info if requested and client-side data is enabled + if ((depth === 'detailed' || depth === 'full') && options.includeClientSideData) { + try { + const additionalData = await fetchSellerDetails(posterInfo.posterId, BASE_URL); + sellerInfo = { + ...sellerInfo, + ...additionalData, + }; + } catch (err) { + // Silently fail - GraphQL data is optional + console.warn(`Failed to fetch additional seller data for ${posterInfo.posterId}`); + } + } + } + + return { + url: listingUrl, + title, + description, + listingPrice: { + amountFormatted, + cents, + currency: price?.currency, + }, + listingType: type, + listingStatus: status, + creationDate: activationDate, + endDate, + numberOfViews: numberOfViews !== undefined && Number.isFinite(numberOfViews) ? numberOfViews : undefined, + address: location?.address ?? null, + images, + categoryId: typeof categoryId === 'number' ? categoryId : 0, + adSource: typeof adSource === 'string' ? adSource : 'UNKNOWN', + flags: { + topAd: flags?.topAd === true, + priceDrop: flags?.priceDrop === true, + }, + attributes: attributeMap, + location: { + id: typeof location?.id === 'number' ? location.id : 0, + name: typeof location?.name === 'string' ? location.name : 'Unknown', + coordinates: location?.coordinates ? { + latitude: location.coordinates.latitude, + longitude: location.coordinates.longitude, + } : undefined, + }, + sellerInfo, + }; +} + // ----------------------------- Main ----------------------------- export default async function fetchKijijiItems( SEARCH_QUERY: string, REQUESTS_PER_SECOND = 1, BASE_URL = "https://www.kijiji.ca", + searchOptions: SearchOptions = {}, + listingOptions: ListingFetchOptions = {}, ) { const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); - const searchUrl = `${BASE_URL}/b-gta-greater-toronto-area/${slugify(SEARCH_QUERY)}/k0l1700272?sort=relevancyDesc&view=list`; + // Set defaults for configuration + const finalSearchOptions: Required = { + location: searchOptions.location ?? 1700272, // Default to GTA + category: searchOptions.category ?? 0, // Default to all categories + keywords: searchOptions.keywords ?? SEARCH_QUERY, + sortBy: searchOptions.sortBy ?? 'relevancy', + sortOrder: searchOptions.sortOrder ?? 'desc', + maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages + priceMin: searchOptions.priceMin, + priceMax: searchOptions.priceMax, + }; - console.log(`Fetching search: ${searchUrl}`); - const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - "\n" + - `Search - Rate limit remaining: ${remaining}, reset in: ${reset}s`, - ); + const finalListingOptions: Required = { + includeImages: listingOptions.includeImages ?? true, + sellerDataDepth: listingOptions.sellerDataDepth ?? 'detailed', + includeClientSideData: listingOptions.includeClientSideData ?? false, + }; + + const allListings: DetailedListing[] = []; + const seenUrls = new Set(); + + // Fetch multiple pages + for (let page = 1; page <= finalSearchOptions.maxPages; page++) { + const searchUrl = buildSearchUrl(finalSearchOptions.keywords, { + ...finalSearchOptions, + // Add page parameter for pagination + ...(page > 1 && { page }), + }, BASE_URL); + + console.log(`Fetching search page ${page}: ${searchUrl}`); + const searchHtml = await fetchHtml(searchUrl, DELAY_MS, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log(`\nSearch - Rate limit remaining: ${remaining}, reset in: ${reset}s`); + } + }, + }); + + const searchResults = parseSearch(searchHtml, BASE_URL); + if (searchResults.length === 0) { + console.log(`No more results found on page ${page}. Stopping pagination.`); + break; + } + + // Deduplicate links across pages + const newListingLinks = searchResults + .map((r) => r.listingLink) + .filter((link) => !seenUrls.has(link)); + + for (const link of newListingLinks) { + seenUrls.add(link); + } + + console.log(`\nFound ${newListingLinks.length} new listing links on page ${page}. Total unique: ${seenUrls.size}`); + + // Fetch details for this page's listings + const progressBar = new cliProgress.SingleBar( + {}, + cliProgress.Presets.shades_classic, + ); + const totalProgress = newListingLinks.length; + let currentProgress = 0; + progressBar.start(totalProgress, currentProgress); + + for (const link of newListingLinks) { + try { + const html = await fetchHtml(link, DELAY_MS, { + onRateInfo: (remaining, reset) => { + if (remaining && reset) { + console.log(`\nItem - Rate limit remaining: ${remaining}, reset in: ${reset}s`); + } + }, + }); + const parsed = await parseDetailedListing(html, BASE_URL, finalListingOptions); + if (parsed) { + allListings.push(parsed); + } + } catch (err) { + if (err instanceof HttpError) { + console.error(`\nFailed to fetch ${link}\n - ${err.status} ${err.message}`); + } else { + console.error(`\nFailed to fetch ${link}\n - ${String((err as Error)?.message || err)}`); + } + } finally { + currentProgress++; + progressBar.update(currentProgress); } - }, - }); + } - const searchResults = parseSearch(searchHtml, BASE_URL); - if (searchResults.length === 0) { - console.warn("No search results parsed from page."); - return; - } + progressBar.stop(); - // Deduplicate links - const listingLinks = Array.from( - new Set(searchResults.map((r) => r.listingLink)), - ); - - console.log( - "\n" + `Found ${listingLinks.length} listing links. Fetching details...`, - ); - - const progressBar = new cliProgress.SingleBar( - {}, - cliProgress.Presets.shades_classic, - ); - const totalProgress = listingLinks.length; - let currentProgress = 0; - progressBar.start(totalProgress, currentProgress); - - const items: ListingDetails[] = []; - for (const link of listingLinks) { - try { - const html = await fetchHtml(link, DELAY_MS, { - onRateInfo: (remaining, reset) => { - if (remaining && reset) { - console.log( - "\n" + - `Item - Rate limit remaining: ${remaining}, reset in: ${reset}s`, - ); - } - }, - }); - const parsed = parseListing(html, BASE_URL); - if (parsed) { - if (parsed.listingPrice?.cents) items.push(parsed); - } - } catch (err) { - if (err instanceof HttpError) { - console.error( - "\n" + `Failed to fetch ${link}\n - ${err.status} ${err.message}`, - ); - } else { - console.error( - "\n" + - `Failed to fetch ${link}\n - ${String((err as Error)?.message || err)}`, - ); - } - } finally { - currentProgress++; - progressBar.update(currentProgress); + // If we got fewer results than expected (40 per page), we've reached the end + if (searchResults.length < 40) { + break; } } - console.log("\n" + `Parsed ${items.length} listings.`); - return items; + console.log(`\nParsed ${allListings.length} detailed listings.`); + return allListings; }