diff --git a/packages/core/src/scrapers/ebay.ts b/packages/core/src/scrapers/ebay.ts index 5913620..1d1355e 100644 --- a/packages/core/src/scrapers/ebay.ts +++ b/packages/core/src/scrapers/ebay.ts @@ -1,27 +1,27 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { parseHTML } from "linkedom"; -import { isRecord } from "../utils/http"; +import type { HTMLString } from "../types/common"; import { delay } from "../utils/delay"; import { formatCentsToCurrency } from "../utils/format"; -import type { HTMLString } from "../types/common"; +import { isRecord } from "../utils/http"; // ----------------------------- Types ----------------------------- export interface EbayListingDetails { - url: string; - title: string; - description?: string; - listingPrice?: { - amountFormatted: string; - cents?: number; - currency?: string; - }; - listingType?: string; - listingStatus?: string; - creationDate?: string; - endDate?: string; - numberOfViews?: number; - address?: string | null; + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; + }; + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; } // ----------------------------- Utilities ----------------------------- @@ -29,43 +29,49 @@ export interface EbayListingDetails { /** * Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents */ -function parseEbayPrice(priceText: string): { cents: number; currency: string } | null { - if (!priceText || typeof priceText !== 'string') return null; +function parseEbayPrice( + priceText: string, +): { cents: number; currency: string } | null { + if (!priceText || typeof priceText !== "string") return null; - // Clean up the price text and extract currency and amount - const cleaned = priceText.trim(); + // Clean up the price text and extract currency and amount + const cleaned = priceText.trim(); - // Find all numbers in the string (including decimals) - const numberMatches = cleaned.match(/[\d,]+\.?\d*/); - if (!numberMatches) return null; + // Find all numbers in the string (including decimals) + const numberMatches = cleaned.match(/[\d,]+\.?\d*/); + if (!numberMatches) return null; - const amountStr = numberMatches[0].replace(/,/g, ''); - const dollars = parseFloat(amountStr); - if (isNaN(dollars)) return null; + const amountStr = numberMatches[0].replace(/,/g, ""); + const dollars = parseFloat(amountStr); + if (isNaN(dollars)) return null; - const cents = Math.round(dollars * 100); + const cents = Math.round(dollars * 100); - // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. - let currency = 'USD'; // Default + // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. + let currency = "USD"; // Default - if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) { - currency = 'CAD'; - } else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) { - currency = 'USD'; - } + if ( + cleaned.toUpperCase().includes("CAD") || + cleaned.includes("CA$") || + cleaned.includes("C $") + ) { + currency = "CAD"; + } else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) { + currency = "USD"; + } - return { cents, currency }; + return { cents, currency }; } class HttpError extends Error { - constructor( - message: string, - public readonly status: number, - public readonly url: string, - ) { - super(message); - this.name = "HttpError"; - } + constructor( + message: string, + public readonly status: number, + public readonly url: string, + ) { + super(message); + this.name = "HttpError"; + } } // ----------------------------- Parsing ----------------------------- @@ -74,290 +80,341 @@ class HttpError extends Error { Parse eBay search page HTML and extract listings using DOM selectors */ function parseEbayListings( - htmlString: HTMLString, - keywords: string[], - exclusions: string[], - strictMode: boolean + htmlString: HTMLString, + keywords: string[], + exclusions: string[], + strictMode: boolean, ): EbayListingDetails[] { - const { document } = parseHTML(htmlString); - const results: EbayListingDetails[] = []; + const { document } = parseHTML(htmlString); + const results: EbayListingDetails[] = []; - // Find all listing links by looking for eBay item URLs (/itm/) - const linkElements = document.querySelectorAll('a[href*="itm/"]'); + // Find all listing links by looking for eBay item URLs (/itm/) + const linkElements = document.querySelectorAll('a[href*="itm/"]'); + for (const linkElement of linkElements) { + try { + // Get href attribute + let href = linkElement.getAttribute("href"); + if (!href) continue; - for (const linkElement of linkElements) { - try { - // Get href attribute - let href = linkElement.getAttribute('href'); - if (!href) continue; + // Make href absolute + if (!href.startsWith("http")) { + href = href.startsWith("//") + ? `https:${href}` + : `https://www.ebay.com${href}`; + } - // Make href absolute - if (!href.startsWith('http')) { - href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`; - } + // Find the container - go up several levels to find the item container + // Modern eBay uses complex nested structures + let container = linkElement.parentElement?.parentElement?.parentElement; + if (!container) { + // Try a different level + container = linkElement.parentElement?.parentElement; + } + if (!container) continue; - // Find the container - go up several levels to find the item container - // Modern eBay uses complex nested structures - let container = linkElement.parentElement?.parentElement?.parentElement; - if (!container) { - // Try a different level - container = linkElement.parentElement?.parentElement; - } - if (!container) continue; + // Extract title - look for heading or title-related elements near the link + // Modern eBay often uses h3, span, or div with text content near the link + let titleElement = container.querySelector( + 'h3, [role="heading"], .s-item__title span', + ); - // Extract title - look for heading or title-related elements near the link - // Modern eBay often uses h3, span, or div with text content near the link - let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span'); + // If no direct title element, try finding text content around the link + if (!titleElement) { + // Look for spans or divs with text near this link + const nearbySpans = container.querySelectorAll("span, div"); + for (const span of nearbySpans) { + const text = span.textContent?.trim(); + if ( + text && + text.length > 10 && + text.length < 200 && + !text.includes("$") && + !text.includes("item") + ) { + titleElement = span; + break; + } + } + } - // If no direct title element, try finding text content around the link - if (!titleElement) { - // Look for spans or divs with text near this link - const nearbySpans = container.querySelectorAll('span, div'); - for (const span of nearbySpans) { - const text = span.textContent?.trim(); - if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) { - titleElement = span; - break; - } - } - } + let title = titleElement?.textContent?.trim(); - let title = titleElement?.textContent?.trim(); + // Clean up eBay UI strings that get included in titles + if (title) { + // Remove common eBay UI strings that appear at the end of titles + const uiStrings = [ + "Opens in a new window", + "Opens in a new tab", + "Opens in a new window or tab", + "opens in a new window", + "opens in a new tab", + "opens in a new window or tab", + ]; - // Clean up eBay UI strings that get included in titles - if (title) { - // Remove common eBay UI strings that appear at the end of titles - const uiStrings = [ - 'Opens in a new window', - 'Opens in a new tab', - 'Opens in a new window or tab', - 'opens in a new window', - 'opens in a new tab', - 'opens in a new window or tab' - ]; + for (const uiString of uiStrings) { + const uiIndex = title.indexOf(uiString); + if (uiIndex !== -1) { + title = title.substring(0, uiIndex).trim(); + break; // Only remove one UI string per title + } + } - for (const uiString of uiStrings) { - const uiIndex = title.indexOf(uiString); - if (uiIndex !== -1) { - title = title.substring(0, uiIndex).trim(); - break; // Only remove one UI string per title - } - } + // If the title became empty or too short after cleaning, skip this item + if (title.length < 10) { + continue; + } + } - // If the title became empty or too short after cleaning, skip this item - if (title.length < 10) { - continue; - } - } + if (!title) continue; - if (!title) continue; + // Skip irrelevant eBay ads + if (title === "Shop on eBay" || title.length < 3) continue; - // Skip irrelevant eBay ads - if (title === "Shop on eBay" || title.length < 3) continue; + // Extract price - look for eBay's price classes, preferring sale/discount prices + let priceElement = container.querySelector( + '[class*="s-item__price"], .s-item__price, [class*="price"]', + ); - // Extract price - look for eBay's price classes, preferring sale/discount prices - let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]'); + // If no direct price class, look for spans containing $ (but not titles) + if (!priceElement) { + const spansAndElements = container.querySelectorAll( + "span, div, b, em, strong", + ); + for (const el of spansAndElements) { + const text = el.textContent?.trim(); + // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words + if ( + text && + text.includes("$") && + text.length < 100 && + !text.includes("laptop") && + !text.includes("computer") && + !text.includes("intel") && + !text.includes("core") && + !text.includes("ram") && + !text.includes("ssd") && + !/\d{4}/.test(text) && // Avoid years like "2024" + !text.includes('"') // Avoid measurements + ) { + priceElement = el; + break; + } + } + } - // If no direct price class, look for spans containing $ (but not titles) - if (!priceElement) { - const spansAndElements = container.querySelectorAll('span, div, b, em, strong'); - for (const el of spansAndElements) { - const text = el.textContent?.trim(); - // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words - if (text && text.includes('$') && text.length < 100 && - !text.includes('laptop') && !text.includes('computer') && !text.includes('intel') && - !text.includes('core') && !text.includes('ram') && !text.includes('ssd') && - ! /\d{4}/.test(text) && // Avoid years like "2024" - !text.includes('"') // Avoid measurements - ) { - priceElement = el; - break; - } - } - } + // For discounted items, eBay shows both original and sale price + // Prefer sale/current price over original/strikethrough price + if (priceElement) { + // Check if this element or its parent contains multiple price elements + const priceContainer = + priceElement.closest('[class*="s-item__price"]') || + priceElement.parentElement; - // For discounted items, eBay shows both original and sale price - // Prefer sale/current price over original/strikethrough price - if (priceElement) { - // Check if this element or its parent contains multiple price elements - const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement; + if (priceContainer) { + // Look for all price elements within this container, including strikethrough prices + const allPriceElements = priceContainer.querySelectorAll( + '[class*="s-item__price"], span, b, em, strong, s, del, strike', + ); - if (priceContainer) { - // Look for all price elements within this container, including strikethrough prices - const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike'); + // Filter to only elements that actually contain prices (not labels) + const actualPrices: HTMLElement[] = []; + for (const el of allPriceElements) { + const text = el.textContent?.trim(); + if ( + text && + /^\s*[$£€¥]/u.test(text) && + text.length < 50 && + !/\d{4}/.test(text) + ) { + actualPrices.push(el); + } + } - // Filter to only elements that actually contain prices (not labels) - const actualPrices: HTMLElement[] = []; - for (const el of allPriceElements) { - const text = el.textContent?.trim(); - if (text && /^\s*[$£€¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) { - actualPrices.push(el); - } - } + // Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices) + if (actualPrices.length > 1) { + // First, look for prices that are NOT struck through + const nonStrikethroughPrices = actualPrices.filter((el) => { + const tagName = el.tagName.toLowerCase(); + const styles = + el.classList.contains("s-strikethrough") || + el.classList.contains("u-flStrike") || + el.closest("s, del, strike"); + return ( + tagName !== "s" && + tagName !== "del" && + tagName !== "strike" && + !styles + ); + }); - // Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices) - if (actualPrices.length > 1) { - // First, look for prices that are NOT struck through - const nonStrikethroughPrices = actualPrices.filter(el => { - const tagName = el.tagName.toLowerCase(); - const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') || - el.closest('s, del, strike'); - return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles; - }); + if (nonStrikethroughPrices.length > 0) { + // Use the first non-strikethrough price (sale price) + priceElement = nonStrikethroughPrices[0]; + } else { + // Fallback: use the last price (likely the most current) + const lastPrice = actualPrices[actualPrices.length - 1]; + priceElement = lastPrice; + } + } + } + } - if (nonStrikethroughPrices.length > 0) { - // Use the first non-strikethrough price (sale price) - priceElement = nonStrikethroughPrices[0]; - } else { - // Fallback: use the last price (likely the most current) - const lastPrice = actualPrices[actualPrices.length - 1]; - priceElement = lastPrice; - } - } - } - } + const priceText = priceElement?.textContent?.trim(); - const priceText = priceElement?.textContent?.trim(); + if (!priceText) continue; - if (!priceText) continue; + // Parse price into cents and currency + const priceInfo = parseEbayPrice(priceText); + if (!priceInfo) continue; - // Parse price into cents and currency - const priceInfo = parseEbayPrice(priceText); - if (!priceInfo) continue; + // Apply exclusion filters + if ( + exclusions.some((exclusion) => + title.toLowerCase().includes(exclusion.toLowerCase()), + ) + ) { + continue; + } - // Apply exclusion filters - if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) { - continue; - } + // Apply strict mode filter (title must contain at least one keyword) + if ( + strictMode && + title && + !keywords.some((keyword) => + title.toLowerCase().includes(keyword.toLowerCase()), + ) + ) { + continue; + } - // Apply strict mode filter (title must contain at least one keyword) - if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) { - continue; - } + const listing: EbayListingDetails = { + url: href, + title, + listingPrice: { + amountFormatted: priceText, + cents: priceInfo.cents, + currency: priceInfo.currency, + }, + listingType: "OFFER", // eBay listings are typically offers + listingStatus: "ACTIVE", + address: null, // eBay doesn't typically show detailed addresses in search results + }; - const listing: EbayListingDetails = { - url: href, - title, - listingPrice: { - amountFormatted: priceText, - cents: priceInfo.cents, - currency: priceInfo.currency, - }, - listingType: "OFFER", // eBay listings are typically offers - listingStatus: "ACTIVE", - address: null, // eBay doesn't typically show detailed addresses in search results - }; + results.push(listing); + } catch (err) { + console.warn(`Error parsing eBay listing: ${err}`); + } + } - results.push(listing); - } catch (err) { - console.warn(`Error parsing eBay listing: ${err}`); - } - } - - return results; + return results; } // ----------------------------- Main ----------------------------- export default async function fetchEbayItems( - SEARCH_QUERY: string, - REQUESTS_PER_SECOND = 1, - opts: { - minPrice?: number; - maxPrice?: number; - strictMode?: boolean; - exclusions?: string[]; - keywords?: string[]; - buyItNowOnly?: boolean; - canadaOnly?: boolean; - } = {}, + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + opts: { + minPrice?: number; + maxPrice?: number; + strictMode?: boolean; + exclusions?: string[]; + keywords?: string[]; + buyItNowOnly?: boolean; + canadaOnly?: boolean; + } = {}, ) { - const { - minPrice = 0, - maxPrice = Number.MAX_SAFE_INTEGER, - strictMode = false, - exclusions = [], - keywords = [SEARCH_QUERY], // Default to search query if no keywords provided - buyItNowOnly = true, - canadaOnly = true, - } = opts; + const { + minPrice = 0, + maxPrice = Number.MAX_SAFE_INTEGER, + strictMode = false, + exclusions = [], + keywords = [SEARCH_QUERY], // Default to search query if no keywords provided + buyItNowOnly = true, + canadaOnly = true, + } = opts; - // Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference - const urlParams = new URLSearchParams({ - _nkw: SEARCH_QUERY, - _sacat: "0", - _from: "R40", - }); + // Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference + const urlParams = new URLSearchParams({ + _nkw: SEARCH_QUERY, + _sacat: "0", + _from: "R40", + }); - if (buyItNowOnly) { - urlParams.set("LH_BIN", "1"); - } + if (buyItNowOnly) { + urlParams.set("LH_BIN", "1"); + } - if (canadaOnly) { - urlParams.set("LH_PrefLoc", "1"); - } + if (canadaOnly) { + urlParams.set("LH_PrefLoc", "1"); + } - const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`; + const searchUrl = `https://www.ebay.ca/sch/i.html?${urlParams.toString()}`; - const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); - console.log(`Fetching eBay search: ${searchUrl}`); + console.log(`Fetching eBay search: ${searchUrl}`); - try { - // Use custom headers modeled after real browser requests to bypass bot detection - const headers: Record = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Accept-Encoding': 'gzip, deflate, br', - 'Referer': 'https://www.ebay.ca/', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - 'Sec-Fetch-Dest': 'document', - 'Sec-Fetch-Mode': 'navigate', - 'Sec-Fetch-Site': 'same-origin', - 'Sec-Fetch-User': '?1', - 'Priority': 'u=0, i' - }; + try { + // Use custom headers modeled after real browser requests to bypass bot detection + const headers: Record = { + "User-Agent": + "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + Referer: "https://www.ebay.ca/", + Connection: "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "same-origin", + "Sec-Fetch-User": "?1", + Priority: "u=0, i", + }; - const res = await fetch(searchUrl, { - method: "GET", - headers, - }); + const res = await fetch(searchUrl, { + method: "GET", + headers, + }); - if (!res.ok) { - throw new HttpError( - `Request failed with status ${res.status}`, - res.status, - searchUrl, - ); - } + if (!res.ok) { + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + searchUrl, + ); + } - const searchHtml = await res.text(); - // Respect per-request delay to keep at or under REQUESTS_PER_SECOND - await delay(DELAY_MS); + const searchHtml = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); - console.log(`\nParsing eBay listings...`); + console.log(`\nParsing eBay listings...`); - const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode); + const listings = parseEbayListings( + searchHtml, + keywords, + exclusions, + strictMode, + ); - // Filter by price range (additional safety check) - const filteredListings = listings.filter(listing => { - const cents = listing.listingPrice?.cents; - return cents && cents >= minPrice && cents <= maxPrice; - }); + // Filter by price range (additional safety check) + const filteredListings = listings.filter((listing) => { + const cents = listing.listingPrice?.cents; + return cents && cents >= minPrice && cents <= maxPrice; + }); - console.log(`Parsed ${filteredListings.length} eBay listings.`); - return filteredListings; - - } catch (err) { - if (err instanceof HttpError) { - console.error( - `Failed to fetch eBay search (${err.status}): ${err.message}`, - ); - return []; - } - throw err; - } + console.log(`Parsed ${filteredListings.length} eBay listings.`); + return filteredListings; + } catch (err) { + if (err instanceof HttpError) { + console.error( + `Failed to fetch eBay search (${err.status}): ${err.message}`, + ); + return []; + } + throw err; + } }