feat: ebay parser

Signed-off-by: Dmytro Stanchiev <git@dmytros.dev>
This commit is contained in:
2025-10-02 13:52:29 -04:00
parent 8c52efe5e7
commit fa7ac59c45
2 changed files with 497 additions and 0 deletions

445
src/ebay.ts Normal file
View File

@@ -0,0 +1,445 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { parseHTML } from "linkedom";
import cliProgress from "cli-progress";
// ----------------------------- Types -----------------------------
type HTMLString = string;
type ListingDetails = {
url: string;
title: string;
description?: string;
listingPrice?: {
amountFormatted: string;
cents?: number;
currency?: string;
};
listingType?: string;
listingStatus?: string;
creationDate?: string;
endDate?: string;
numberOfViews?: number;
address?: string | null;
};
// ----------------------------- Utilities -----------------------------
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null;
}
async function delay(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Turns cents to localized currency string.
*/
function formatCentsToCurrency(
num: number | string | undefined,
locale = "en-US",
): string {
if (num == null) return "";
const cents = typeof num === "string" ? Number.parseInt(num, 10) : num;
if (Number.isNaN(cents)) return "";
const dollars = cents / 100;
const formatter = new Intl.NumberFormat(locale, {
minimumFractionDigits: 2,
maximumFractionDigits: 2,
useGrouping: true,
});
return formatter.format(dollars);
}
/**
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
*/
function parseEbayPrice(priceText: string): { cents: number; currency: string } | null {
if (!priceText || typeof priceText !== 'string') return null;
// Clean up the price text and extract currency and amount
const cleaned = priceText.trim();
// Find all numbers in the string (including decimals)
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
if (!numberMatches) return null;
const amountStr = numberMatches[0].replace(/,/g, '');
const dollars = parseFloat(amountStr);
if (isNaN(dollars)) return null;
const cents = Math.round(dollars * 100);
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
let currency = 'USD'; // Default
if (cleaned.toUpperCase().includes('CAD') || cleaned.includes('CA$') || cleaned.includes('C $')) {
currency = 'CAD';
} else if (cleaned.toUpperCase().includes('USD') || cleaned.includes('$')) {
currency = 'USD';
}
return { cents, currency };
}
class HttpError extends Error {
constructor(
message: string,
public readonly status: number,
public readonly url: string,
) {
super(message);
this.name = "HttpError";
}
}
// ----------------------------- HTTP Client -----------------------------
/**
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
- Retries on 429 and 5xx
- Respects X-RateLimit-Reset when present (seconds)
*/
async function fetchHtml(
url: string,
DELAY_MS: number,
opts?: {
maxRetries?: number;
retryBaseMs?: number;
onRateInfo?: (remaining: string | null, reset: string | null) => void;
},
): Promise<HTMLString> {
const maxRetries = opts?.maxRetries ?? 3;
const retryBaseMs = opts?.retryBaseMs ?? 500;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const res = await fetch(url, {
method: "GET",
headers: {
accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "en-CA,en-US;q=0.9,en;q=0.8",
"cache-control": "no-cache",
"upgrade-insecure-requests": "1",
"user-agent":
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
},
});
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
if (!res.ok) {
// Respect 429 reset if provided
if (res.status === 429) {
const resetSeconds = rateLimitReset ? Number(rateLimitReset) : NaN;
const waitMs = Number.isFinite(resetSeconds)
? Math.max(0, resetSeconds * 1000)
: (attempt + 1) * retryBaseMs;
await delay(waitMs);
continue;
}
// Retry on 5xx
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
await delay((attempt + 1) * retryBaseMs);
continue;
}
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
url,
);
}
const html = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
return html;
} catch (err) {
if (attempt >= maxRetries) throw err;
await delay((attempt + 1) * retryBaseMs);
}
}
throw new Error("Exhausted retries without response");
}
// ----------------------------- Parsing -----------------------------
/**
Parse eBay search page HTML and extract listings using DOM selectors
*/
function parseEbayListings(
htmlString: HTMLString,
keywords: string[],
exclusions: string[],
strictMode: boolean
): ListingDetails[] {
const { document } = parseHTML(htmlString);
const results: ListingDetails[] = [];
// Find all listing links by looking for eBay item URLs (/itm/)
const linkElements = document.querySelectorAll('a[href*="itm/"]');
for (const linkElement of linkElements) {
try {
// Get href attribute
let href = linkElement.getAttribute('href');
if (!href) continue;
// Make href absolute
if (!href.startsWith('http')) {
href = href.startsWith('//') ? `https:${href}` : `https://www.ebay.com${href}`;
}
// Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures
let container = linkElement.parentElement?.parentElement?.parentElement;
if (!container) {
// Try a different level
container = linkElement.parentElement?.parentElement;
}
if (!container) continue;
// Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link
let titleElement = container.querySelector('h3, [role="heading"], .s-item__title span');
// If no direct title element, try finding text content around the link
if (!titleElement) {
// Look for spans or divs with text near this link
const nearbySpans = container.querySelectorAll('span, div');
for (const span of nearbySpans) {
const text = span.textContent?.trim();
if (text && text.length > 10 && text.length < 200 && !text.includes('$') && !text.includes('item')) {
titleElement = span;
break;
}
}
}
let title = titleElement?.textContent?.trim();
// Clean up eBay UI strings that get included in titles
if (title) {
// Remove common eBay UI strings that appear at the end of titles
const uiStrings = [
'Opens in a new window',
'Opens in a new tab',
'Opens in a new window or tab',
'opens in a new window',
'opens in a new tab',
'opens in a new window or tab'
];
for (const uiString of uiStrings) {
const uiIndex = title.indexOf(uiString);
if (uiIndex !== -1) {
title = title.substring(0, uiIndex).trim();
break; // Only remove one UI string per title
}
}
// If the title became empty or too short after cleaning, skip this item
if (title.length < 10) {
continue;
}
}
if (!title) continue;
// Skip irrelevant eBay ads
if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices
let priceElement = container.querySelector('[class*="s-item__price"], .s-item__price, [class*="price"]');
// If no direct price class, look for spans containing $ (but not titles)
if (!priceElement) {
const spansAndElements = container.querySelectorAll('span, div, b, em, strong');
for (const el of spansAndElements) {
const text = el.textContent?.trim();
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
if (text && text.includes('$') && text.length < 100 &&
!text.includes('laptop') && !text.includes('computer') && !text.includes('intel') &&
!text.includes('core') && !text.includes('ram') && !text.includes('ssd') &&
! /\d{4}/.test(text) && // Avoid years like "2024"
!text.includes('"') // Avoid measurements
) {
priceElement = el;
break;
}
}
}
// For discounted items, eBay shows both original and sale price
// Prefer sale/current price over original/strikethrough price
if (priceElement) {
// Check if this element or its parent contains multiple price elements
const priceContainer = priceElement.closest('[class*="s-item__price"]') || priceElement.parentElement;
if (priceContainer) {
// Look for all price elements within this container, including strikethrough prices
const allPriceElements = priceContainer.querySelectorAll('[class*="s-item__price"], span, b, em, strong, s, del, strike');
// Filter to only elements that actually contain prices (not labels)
const actualPrices: HTMLElement[] = [];
for (const el of allPriceElements) {
const text = el.textContent?.trim();
if (text && /^\s*[\$£¥]/u.test(text) && text.length < 50 && !/\d{4}/.test(text)) {
actualPrices.push(el);
}
}
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
if (actualPrices.length > 1) {
// First, look for prices that are NOT struck through
const nonStrikethroughPrices = actualPrices.filter(el => {
const tagName = el.tagName.toLowerCase();
const styles = el.classList.contains('s-strikethrough') || el.classList.contains('u-flStrike') ||
el.closest('s, del, strike');
return tagName !== 's' && tagName !== 'del' && tagName !== 'strike' && !styles;
});
if (nonStrikethroughPrices.length > 0) {
// Use the first non-strikethrough price (sale price)
priceElement = nonStrikethroughPrices[0];
} else {
// Fallback: use the last price (likely the most current)
const lastPrice = actualPrices[actualPrices.length - 1];
priceElement = lastPrice;
}
}
}
}
let priceText = priceElement?.textContent?.trim();
if (!priceText) continue;
// Parse price into cents and currency
const priceInfo = parseEbayPrice(priceText);
if (!priceInfo) continue;
// Apply exclusion filters
if (exclusions.some(exclusion => title.toLowerCase().includes(exclusion.toLowerCase()))) {
continue;
}
// Apply strict mode filter (title must contain at least one keyword)
if (strictMode && !keywords.some(keyword => title!.toLowerCase().includes(keyword.toLowerCase()))) {
continue;
}
const listing: ListingDetails = {
url: href,
title,
listingPrice: {
amountFormatted: priceText,
cents: priceInfo.cents,
currency: priceInfo.currency,
},
listingType: "OFFER", // eBay listings are typically offers
listingStatus: "ACTIVE",
address: null, // eBay doesn't typically show detailed addresses in search results
};
results.push(listing);
} catch (err) {
console.warn(`Error parsing eBay listing: ${err}`);
continue;
}
}
return results;
}
// ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
opts: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
} = {},
) {
const {
minPrice = 0,
maxPrice = Number.MAX_SAFE_INTEGER,
strictMode = false,
exclusions = [],
keywords = [SEARCH_QUERY] // Default to search query if no keywords provided
} = opts;
// Build eBay search URL - use Canadian site and tracking parameters like real browser
const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`;
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
console.log(`Fetching eBay search: ${searchUrl}`);
try {
// Use custom headers modeled after real browser requests to bypass bot detection
const headers: Record<string, string> = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://www.ebay.ca/',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Priority': 'u=0, i'
};
const res = await fetch(searchUrl, {
method: "GET",
headers,
});
if (!res.ok) {
throw new HttpError(
`Request failed with status ${res.status}`,
res.status,
searchUrl,
);
}
const searchHtml = await res.text();
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
await delay(DELAY_MS);
console.log(`\nParsing eBay listings...`);
const listings = parseEbayListings(searchHtml, keywords, exclusions, strictMode);
// Filter by price range (additional safety check)
const filteredListings = listings.filter(listing => {
const cents = listing.listingPrice?.cents;
return cents && cents >= minPrice && cents <= maxPrice;
});
console.log(`Parsed ${filteredListings.length} eBay listings.`);
return filteredListings;
} catch (err) {
if (err instanceof HttpError) {
console.error(
`Failed to fetch eBay search (${err.status}): ${err.message}`,
);
return [];
}
throw err;
}
}

View File

@@ -1,5 +1,6 @@
import fetchKijijiItems from "@/kijiji";
import fetchFacebookItems from "@/facebook";
import fetchEbayItems from "@/ebay";
const PORT = process.env.PORT || 4005;
@@ -69,6 +70,57 @@ const server = Bun.serve({
}
},
"/api/ebay": async (req: Request) => {
const reqUrl = new URL(req.url);
const SEARCH_QUERY =
req.headers.get("query") || reqUrl.searchParams.get("q") || null;
if (!SEARCH_QUERY)
return Response.json(
{
message:
"Request didn't have 'query' header or 'q' search parameter!",
},
{ status: 400 },
);
// Parse optional parameters with defaults
const minPrice = reqUrl.searchParams.get("minPrice")
? parseInt(reqUrl.searchParams.get("minPrice")!)
: undefined;
const maxPrice = reqUrl.searchParams.get("maxPrice")
? parseInt(reqUrl.searchParams.get("maxPrice")!)
: undefined;
const strictMode = reqUrl.searchParams.get("strictMode") === "true";
const exclusionsParam = reqUrl.searchParams.get("exclusions");
const exclusions = exclusionsParam ? exclusionsParam.split(",").map(s => s.trim()) : [];
const keywordsParam = reqUrl.searchParams.get("keywords");
const keywords = keywordsParam ? keywordsParam.split(",").map(s => s.trim()) : [SEARCH_QUERY];
try {
const items = await fetchEbayItems(SEARCH_QUERY, 5, {
minPrice,
maxPrice,
strictMode,
exclusions,
keywords,
});
if (!items || items.length === 0)
return Response.json(
{ message: "Search didn't return any results!" },
{ status: 404 },
);
return Response.json(items, { status: 200 });
} catch (error) {
console.error("eBay scraping error:", error);
const errorMessage = error instanceof Error ? error.message : "Unknown error occurred";
return Response.json(
{ message: errorMessage },
{ status: 400 },
);
}
},
// Wildcard route for all routes that start with "/api/" and aren't otherwise matched
"/api/*": Response.json({ message: "Not found" }, { status: 404 }),