From fa7ac59c456033c330c1f573a1d37875f124a906 Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Thu, 2 Oct 2025 13:52:29 -0400 Subject: [PATCH] feat: ebay parser Signed-off-by: Dmytro Stanchiev --- src/ebay.ts | 445 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/index.ts | 52 ++++++ 2 files changed, 497 insertions(+) create mode 100644 src/ebay.ts diff --git a/src/ebay.ts b/src/ebay.ts new file mode 100644 index 0000000..0fc9320 --- /dev/null +++ b/src/ebay.ts @@ -0,0 +1,445 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { parseHTML } from "linkedom"; +import cliProgress from "cli-progress"; + +// ----------------------------- Types ----------------------------- + +type HTMLString = string; + +type ListingDetails = { + url: string; + title: string; + description?: string; + listingPrice?: { + amountFormatted: string; + cents?: number; + currency?: string; + }; + listingType?: string; + listingStatus?: string; + creationDate?: string; + endDate?: string; + numberOfViews?: number; + address?: string | null; +}; + +// ----------------------------- Utilities ----------------------------- + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +async function delay(ms: number): Promise { + await new Promise((resolve) => setTimeout(resolve, ms)); +} + +/** + * Turns cents to localized currency string. + */ +function formatCentsToCurrency( + num: number | string | undefined, + locale = "en-US", +): string { + if (num == null) return ""; + const cents = typeof num === "string" ? Number.parseInt(num, 10) : num; + if (Number.isNaN(cents)) return ""; + const dollars = cents / 100; + const formatter = new Intl.NumberFormat(locale, { + minimumFractionDigits: 2, + maximumFractionDigits: 2, + useGrouping: true, + }); + return formatter.format(dollars); +} + +/** + * Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents + */ +function parseEbayPrice(priceText: string): { cents: number; currency: string } | null { + if (!priceText || typeof priceText !== 'string') return null; + + // Clean up the price text and extract currency and amount + const cleaned = priceText.trim(); + + // Find all numbers in the string (including decimals) + const numberMatches = cleaned.match(/[\d,]+\.?\d*/); + if (!numberMatches) return null; + + const amountStr = numberMatches[0].replace(/,/g, ''); + const dollars = parseFloat(amountStr); + if (isNaN(dollars)) return null; + + const cents = Math.round(dollars * 100); + + // Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc. + let currency = 'USD'; // Default + + if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) { + currency = 'CAD'; + } else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) { + currency = 'USD'; + } + + return { cents, currency }; +} + +class HttpError extends Error { + constructor( + message: string, + public readonly status: number, + public readonly url: string, + ) { + super(message); + this.name = "HttpError"; + } +} + +// ----------------------------- HTTP Client ----------------------------- + +/** + Fetch HTML with a basic retry strategy and simple rate-limit delay between calls. + - Retries on 429 and 5xx + - Respects X-RateLimit-Reset when present (seconds) +*/ +async function fetchHtml( + url: string, + DELAY_MS: number, + opts?: { + maxRetries?: number; + retryBaseMs?: number; + onRateInfo?: (remaining: string | null, reset: string | null) => void; + }, +): Promise { + const maxRetries = opts?.maxRetries ?? 3; + const retryBaseMs = opts?.retryBaseMs ?? 500; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + const res = await fetch(url, { + method: "GET", + headers: { + accept: + "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", + "accept-language": "en-CA,en-US;q=0.9,en;q=0.8", + "cache-control": "no-cache", + "upgrade-insecure-requests": "1", + "user-agent": + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36", + }, + }); + + const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining"); + const rateLimitReset = res.headers.get("X-RateLimit-Reset"); + opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset); + + if (!res.ok) { + // Respect 429 reset if provided + if (res.status === 429) { + const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN; + const waitMs = Number.isFinite(resetSeconds) + ? Math.max(0, resetSeconds * 1000) + : (attempt + 1) * retryBaseMs; + await delay(waitMs); + continue; + } + // Retry on 5xx + if (res.status >= 500 && res.status < 600 && attempt < maxRetries) { + await delay((attempt + 1) * retryBaseMs); + continue; + } + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + url, + ); + } + + const html = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); + return html; + } catch (err) { + if (attempt >= maxRetries) throw err; + await delay((attempt + 1) * retryBaseMs); + } + } + + throw new Error("Exhausted retries without response"); +} + +// ----------------------------- Parsing ----------------------------- + +/** + Parse eBay search page HTML and extract listings using DOM selectors +*/ +function parseEbayListings( + htmlString: HTMLString, + keywords: string[], + exclusions: string[], + strictMode: boolean +): ListingDetails[] { + const { document } = parseHTML(htmlString); + const results: ListingDetails[] = []; + + // Find all listing links by looking for eBay item URLs (/itm/) + const linkElements = document.querySelectorAll('a[href*="itm/"]'); + + + for (const linkElement of linkElements) { + try { + // Get href attribute + let href = linkElement.getAttribute('href'); + if (!href) continue; + + // Make href absolute + if (!href.startsWith('http')) { + href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`; + } + + // Find the container - go up several levels to find the item container + // Modern eBay uses complex nested structures + let container = linkElement.parentElement?.parentElement?.parentElement; + if (!container) { + // Try a different level + container = linkElement.parentElement?.parentElement; + } + if (!container) continue; + + // Extract title - look for heading or title-related elements near the link + // Modern eBay often uses h3, span, or div with text content near the link + let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span'); + + // If no direct title element, try finding text content around the link + if (!titleElement) { + // Look for spans or divs with text near this link + const nearbySpans = container.querySelectorAll('span, div'); + for (const span of nearbySpans) { + const text = span.textContent?.trim(); + if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) { + titleElement = span; + break; + } + } + } + + let title = titleElement?.textContent?.trim(); + + // Clean up eBay UI strings that get included in titles + if (title) { + // Remove common eBay UI strings that appear at the end of titles + const uiStrings = [ + 'Opens in a new window', + 'Opens in a new tab', + 'Opens in a new window or tab', + 'opens in a new window', + 'opens in a new tab', + 'opens in a new window or tab' + ]; + + for (const uiString of uiStrings) { + const uiIndex = title.indexOf(uiString); + if (uiIndex !== -1) { + title = title.substring(0, uiIndex).trim(); + break; // Only remove one UI string per title + } + } + + // If the title became empty or too short after cleaning, skip this item + if (title.length < 10) { + continue; + } + } + + if (!title) continue; + + // Skip irrelevant eBay ads + if (title === "Shop on eBay" || title.length < 3) continue; + + // Extract price - look for eBay's price classes, preferring sale/discount prices + let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]'); + + // If no direct price class, look for spans containing $ (but not titles) + if (!priceElement) { + const spansAndElements = container.querySelectorAll('span, div, b, em, strong'); + for (const el of spansAndElements) { + const text = el.textContent?.trim(); + // Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words + if (text && text.includes('$') && text.length < 100 && + !text.includes('laptop') && !text.includes('computer') && !text.includes('intel') && + !text.includes('core') && !text.includes('ram') && !text.includes('ssd') && + ! /\d{4}/.test(text) && // Avoid years like "2024" + !text.includes('"') // Avoid measurements + ) { + priceElement = el; + break; + } + } + } + + // For discounted items, eBay shows both original and sale price + // Prefer sale/current price over original/strikethrough price + if (priceElement) { + // Check if this element or its parent contains multiple price elements + const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement; + + if (priceContainer) { + // Look for all price elements within this container, including strikethrough prices + const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike'); + + // Filter to only elements that actually contain prices (not labels) + const actualPrices: HTMLElement[] = []; + for (const el of allPriceElements) { + const text = el.textContent?.trim(); + if (text && /^\s*[\$£€¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) { + actualPrices.push(el); + } + } + + // Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices) + if (actualPrices.length > 1) { + // First, look for prices that are NOT struck through + const nonStrikethroughPrices = actualPrices.filter(el => { + const tagName = el.tagName.toLowerCase(); + const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') || + el.closest('s, del, strike'); + return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles; + }); + + if (nonStrikethroughPrices.length > 0) { + // Use the first non-strikethrough price (sale price) + priceElement = nonStrikethroughPrices[0]; + } else { + // Fallback: use the last price (likely the most current) + const lastPrice = actualPrices[actualPrices.length - 1]; + priceElement = lastPrice; + } + } + } + } + + let priceText = priceElement?.textContent?.trim(); + + if (!priceText) continue; + + // Parse price into cents and currency + const priceInfo = parseEbayPrice(priceText); + if (!priceInfo) continue; + + // Apply exclusion filters + if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) { + continue; + } + + // Apply strict mode filter (title must contain at least one keyword) + if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) { + continue; + } + + const listing: ListingDetails = { + url: href, + title, + listingPrice: { + amountFormatted: priceText, + cents: priceInfo.cents, + currency: priceInfo.currency, + }, + listingType: "OFFER", // eBay listings are typically offers + listingStatus: "ACTIVE", + address: null, // eBay doesn't typically show detailed addresses in search results + }; + + results.push(listing); + } catch (err) { + console.warn(`Error parsing eBay listing: ${err}`); + continue; + } + } + + return results; +} + +// ----------------------------- Main ----------------------------- + +export default async function fetchEbayItems( + SEARCH_QUERY: string, + REQUESTS_PER_SECOND = 1, + opts: { + minPrice?: number; + maxPrice?: number; + strictMode?: boolean; + exclusions?: string[]; + keywords?: string[]; + } = {}, +) { + const { + minPrice = 0, + maxPrice = Number.MAX_SAFE_INTEGER, + strictMode = false, + exclusions = [], + keywords = [SEARCH_QUERY] // Default to search query if no keywords provided + } = opts; + + // Build eBay search URL - use Canadian site and tracking parameters like real browser + const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`; + + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); + + console.log(`Fetching eBay search: ${searchUrl}`); + + try { + // Use custom headers modeled after real browser requests to bypass bot detection + const headers: Record = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate, br', + 'Referer': 'https://www.ebay.ca/', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + 'Sec-Fetch-Dest': 'document', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-Site': 'same-origin', + 'Sec-Fetch-User': '?1', + 'Priority': 'u=0, i' + }; + + const res = await fetch(searchUrl, { + method: "GET", + headers, + }); + + if (!res.ok) { + throw new HttpError( + `Request failed with status ${res.status}`, + res.status, + searchUrl, + ); + } + + const searchHtml = await res.text(); + // Respect per-request delay to keep at or under REQUESTS_PER_SECOND + await delay(DELAY_MS); + + console.log(`\nParsing eBay listings...`); + + const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode); + + // Filter by price range (additional safety check) + const filteredListings = listings.filter(listing => { + const cents = listing.listingPrice?.cents; + return cents && cents >= minPrice && cents <= maxPrice; + }); + + console.log(`Parsed ${filteredListings.length} eBay listings.`); + return filteredListings; + + } catch (err) { + if (err instanceof HttpError) { + console.error( + `Failed to fetch eBay search (${err.status}): ${err.message}`, + ); + return []; + } + throw err; + } +} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 63a7a54..113ef62 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,6 @@ import fetchKijijiItems from "@/kijiji"; import fetchFacebookItems from "@/facebook"; +import fetchEbayItems from "@/ebay"; const PORT = process.env.PORT || 4005; @@ -69,6 +70,57 @@ const server = Bun.serve({ } }, + "/api/ebay": async (req: Request) => { + const reqUrl = new URL(req.url); + + const SEARCH_QUERY = + req.headers.get("query") || reqUrl.searchParams.get("q") || null; + if (!SEARCH_QUERY) + return Response.json( + { + message: + "Request didn't have 'query' header or 'q' search parameter!", + }, + { status: 400 }, + ); + + // Parse optional parameters with defaults + const minPrice = reqUrl.searchParams.get("minPrice") + ? parseInt(reqUrl.searchParams.get("minPrice")!) + : undefined; + const maxPrice = reqUrl.searchParams.get("maxPrice") + ? parseInt(reqUrl.searchParams.get("maxPrice")!) + : undefined; + const strictMode = reqUrl.searchParams.get("strictMode") === "true"; + const exclusionsParam = reqUrl.searchParams.get("exclusions"); + const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : []; + const keywordsParam = reqUrl.searchParams.get("keywords"); + const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY]; + + try { + const items = await fetchEbayItems(SEARCH_QUERY, 5, { + minPrice, + maxPrice, + strictMode, + exclusions, + keywords, + }); + if (!items || items.length === 0) + return Response.json( + { message: "Search didn't return any results!" }, + { status: 404 }, + ); + return Response.json(items, { status: 200 }); + } catch (error) { + console.error("eBay scraping error:", error); + const errorMessage = error instanceof Error ? error.message : "Unknown error occurred"; + return Response.json( + { message: errorMessage }, + { status: 400 }, + ); + } + }, + // Wildcard route for all routes that start with "/api/" and aren't otherwise matched "/api/*": Response.json({ message: "Not found" }, { status: 404 }),