502 lines
15 KiB
TypeScript
502 lines
15 KiB
TypeScript
import cliProgress from "cli-progress";
|
|
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
import { parseHTML } from "linkedom";
|
|
|
|
// ----------------------------- Types -----------------------------
|
|
|
|
type HTMLString = string;
|
|
|
|
type ListingDetails = {
|
|
url: string;
|
|
title: string;
|
|
description?: string;
|
|
listingPrice?: {
|
|
amountFormatted: string;
|
|
cents?: number;
|
|
currency?: string;
|
|
};
|
|
listingType?: string;
|
|
listingStatus?: string;
|
|
creationDate?: string;
|
|
endDate?: string;
|
|
numberOfViews?: number;
|
|
address?: string | null;
|
|
};
|
|
|
|
// ----------------------------- Utilities -----------------------------
|
|
|
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
return typeof value === "object" && value !== null;
|
|
}
|
|
|
|
async function delay(ms: number): Promise<void> {
|
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
/**
|
|
* Turns cents to localized currency string.
|
|
*/
|
|
function formatCentsToCurrency(
|
|
num: number | string | undefined,
|
|
locale = "en-US",
|
|
): string {
|
|
if (num == null) return "";
|
|
const cents = typeof num === "string" ? Number.parseInt(num, 10) : num;
|
|
if (Number.isNaN(cents)) return "";
|
|
const dollars = cents / 100;
|
|
const formatter = new Intl.NumberFormat(locale, {
|
|
minimumFractionDigits: 2,
|
|
maximumFractionDigits: 2,
|
|
useGrouping: true,
|
|
});
|
|
return formatter.format(dollars);
|
|
}
|
|
|
|
/**
|
|
* Parse eBay currency string like "$1.50 CAD" or "CA $1.50" into cents
|
|
*/
|
|
function parseEbayPrice(
|
|
priceText: string,
|
|
): { cents: number; currency: string } | null {
|
|
if (!priceText || typeof priceText !== "string") return null;
|
|
|
|
// Clean up the price text and extract currency and amount
|
|
const cleaned = priceText.trim();
|
|
|
|
// Find all numbers in the string (including decimals)
|
|
const numberMatches = cleaned.match(/[\d,]+\.?\d*/);
|
|
if (!numberMatches) return null;
|
|
|
|
const amountStr = numberMatches[0].replace(/,/g, "");
|
|
const dollars = Number.parseFloat(amountStr);
|
|
if (Number.isNaN(dollars)) return null;
|
|
|
|
const cents = Math.round(dollars * 100);
|
|
|
|
// Extract currency - look for common formats like "CAD", "USD", "C $", "$CA", etc.
|
|
let currency = "USD"; // Default
|
|
|
|
if (
|
|
cleaned.toUpperCase().includes("CAD") ||
|
|
cleaned.includes("CA$") ||
|
|
cleaned.includes("C $")
|
|
) {
|
|
currency = "CAD";
|
|
} else if (cleaned.toUpperCase().includes("USD") || cleaned.includes("$")) {
|
|
currency = "USD";
|
|
}
|
|
|
|
return { cents, currency };
|
|
}
|
|
|
|
class HttpError extends Error {
|
|
constructor(
|
|
message: string,
|
|
public readonly status: number,
|
|
public readonly url: string,
|
|
) {
|
|
super(message);
|
|
this.name = "HttpError";
|
|
}
|
|
}
|
|
|
|
// ----------------------------- HTTP Client -----------------------------
|
|
|
|
/**
|
|
Fetch HTML with a basic retry strategy and simple rate-limit delay between calls.
|
|
- Retries on 429 and 5xx
|
|
- Respects X-RateLimit-Reset when present (seconds)
|
|
*/
|
|
async function fetchHtml(
|
|
url: string,
|
|
DELAY_MS: number,
|
|
opts?: {
|
|
maxRetries?: number;
|
|
retryBaseMs?: number;
|
|
onRateInfo?: (remaining: string | null, reset: string | null) => void;
|
|
},
|
|
): Promise<HTMLString> {
|
|
const maxRetries = opts?.maxRetries ?? 3;
|
|
const retryBaseMs = opts?.retryBaseMs ?? 500;
|
|
|
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
try {
|
|
const res = await fetch(url, {
|
|
method: "GET",
|
|
headers: {
|
|
accept:
|
|
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
"accept-language": "en-CA,en-US;q=0.9,en;q=0.8",
|
|
"cache-control": "no-cache",
|
|
"upgrade-insecure-requests": "1",
|
|
"user-agent":
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
|
|
},
|
|
});
|
|
|
|
const rateLimitRemaining = res.headers.get("X-RateLimit-Remaining");
|
|
const rateLimitReset = res.headers.get("X-RateLimit-Reset");
|
|
opts?.onRateInfo?.(rateLimitRemaining, rateLimitReset);
|
|
|
|
if (!res.ok) {
|
|
// Respect 429 reset if provided
|
|
if (res.status === 429) {
|
|
const resetSeconds = rateLimitReset
|
|
? Number(rateLimitReset)
|
|
: Number.NaN;
|
|
const waitMs = Number.isFinite(resetSeconds)
|
|
? Math.max(0, resetSeconds * 1000)
|
|
: (attempt + 1) * retryBaseMs;
|
|
await delay(waitMs);
|
|
continue;
|
|
}
|
|
// Retry on 5xx
|
|
if (res.status >= 500 && res.status < 600 && attempt < maxRetries) {
|
|
await delay((attempt + 1) * retryBaseMs);
|
|
continue;
|
|
}
|
|
throw new HttpError(
|
|
`Request failed with status ${res.status}`,
|
|
res.status,
|
|
url,
|
|
);
|
|
}
|
|
|
|
const html = await res.text();
|
|
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
|
await delay(DELAY_MS);
|
|
return html;
|
|
} catch (err) {
|
|
if (attempt >= maxRetries) throw err;
|
|
await delay((attempt + 1) * retryBaseMs);
|
|
}
|
|
}
|
|
|
|
throw new Error("Exhausted retries without response");
|
|
}
|
|
|
|
// ----------------------------- Parsing -----------------------------
|
|
|
|
/**
|
|
Parse eBay search page HTML and extract listings using DOM selectors
|
|
*/
|
|
function parseEbayListings(
|
|
htmlString: HTMLString,
|
|
keywords: string[],
|
|
exclusions: string[],
|
|
strictMode: boolean,
|
|
): ListingDetails[] {
|
|
const { document } = parseHTML(htmlString);
|
|
const results: ListingDetails[] = [];
|
|
|
|
// Find all listing links by looking for eBay item URLs (/itm/)
|
|
const linkElements = document.querySelectorAll('a[href*="itm/"]');
|
|
|
|
for (const linkElement of linkElements) {
|
|
try {
|
|
// Get href attribute
|
|
let href = linkElement.getAttribute("href");
|
|
if (!href) continue;
|
|
|
|
// Make href absolute
|
|
if (!href.startsWith("http")) {
|
|
href = href.startsWith("//")
|
|
? `https:${href}`
|
|
: `https://www.ebay.com${href}`;
|
|
}
|
|
|
|
// Find the container - go up several levels to find the item container
|
|
// Modern eBay uses complex nested structures
|
|
let container = linkElement.parentElement?.parentElement?.parentElement;
|
|
if (!container) {
|
|
// Try a different level
|
|
container = linkElement.parentElement?.parentElement;
|
|
}
|
|
if (!container) continue;
|
|
|
|
// Extract title - look for heading or title-related elements near the link
|
|
// Modern eBay often uses h3, span, or div with text content near the link
|
|
let titleElement = container.querySelector(
|
|
'h3, [role="heading"], .s-item__title span',
|
|
);
|
|
|
|
// If no direct title element, try finding text content around the link
|
|
if (!titleElement) {
|
|
// Look for spans or divs with text near this link
|
|
const nearbySpans = container.querySelectorAll("span, div");
|
|
for (const span of nearbySpans) {
|
|
const text = span.textContent?.trim();
|
|
if (
|
|
text &&
|
|
text.length > 10 &&
|
|
text.length < 200 &&
|
|
!text.includes("$") &&
|
|
!text.includes("item")
|
|
) {
|
|
titleElement = span;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
let title = titleElement?.textContent?.trim();
|
|
|
|
// Clean up eBay UI strings that get included in titles
|
|
if (title) {
|
|
// Remove common eBay UI strings that appear at the end of titles
|
|
const uiStrings = [
|
|
"Opens in a new window",
|
|
"Opens in a new tab",
|
|
"Opens in a new window or tab",
|
|
"opens in a new window",
|
|
"opens in a new tab",
|
|
"opens in a new window or tab",
|
|
];
|
|
|
|
for (const uiString of uiStrings) {
|
|
const uiIndex = title.indexOf(uiString);
|
|
if (uiIndex !== -1) {
|
|
title = title.substring(0, uiIndex).trim();
|
|
break; // Only remove one UI string per title
|
|
}
|
|
}
|
|
|
|
// If the title became empty or too short after cleaning, skip this item
|
|
if (title.length < 10) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (!title) continue;
|
|
|
|
// Skip irrelevant eBay ads
|
|
if (title === "Shop on eBay" || title.length < 3) continue;
|
|
|
|
// Extract price - look for eBay's price classes, preferring sale/discount prices
|
|
let priceElement = container.querySelector(
|
|
'[class*="s-item__price"], .s-item__price, [class*="price"]',
|
|
);
|
|
|
|
// If no direct price class, look for spans containing $ (but not titles)
|
|
if (!priceElement) {
|
|
const spansAndElements = container.querySelectorAll(
|
|
"span, div, b, em, strong",
|
|
);
|
|
for (const el of spansAndElements) {
|
|
const text = el.textContent?.trim();
|
|
// Must contain $, be reasonably short (price shouldn't be paragraph), and not contain product words
|
|
if (
|
|
text?.includes("$") &&
|
|
text.length < 100 &&
|
|
!text.includes("laptop") &&
|
|
!text.includes("computer") &&
|
|
!text.includes("intel") &&
|
|
!text.includes("core") &&
|
|
!text.includes("ram") &&
|
|
!text.includes("ssd") &&
|
|
!/\d{4}/.test(text) && // Avoid years like "2024"
|
|
!text.includes('"') // Avoid measurements
|
|
) {
|
|
priceElement = el;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// For discounted items, eBay shows both original and sale price
|
|
// Prefer sale/current price over original/strikethrough price
|
|
if (priceElement) {
|
|
// Check if this element or its parent contains multiple price elements
|
|
const priceContainer =
|
|
priceElement.closest('[class*="s-item__price"]') ||
|
|
priceElement.parentElement;
|
|
|
|
if (priceContainer) {
|
|
// Look for all price elements within this container, including strikethrough prices
|
|
const allPriceElements = priceContainer.querySelectorAll(
|
|
'[class*="s-item__price"], span, b, em, strong, s, del, strike',
|
|
);
|
|
|
|
// Filter to only elements that actually contain prices (not labels)
|
|
const actualPrices: HTMLElement[] = [];
|
|
for (const el of allPriceElements) {
|
|
const text = el.textContent?.trim();
|
|
if (
|
|
text &&
|
|
/^\s*[\$£€¥]/u.test(text) &&
|
|
text.length < 50 &&
|
|
!/\d{4}/.test(text)
|
|
) {
|
|
actualPrices.push(el);
|
|
}
|
|
}
|
|
|
|
// Prefer non-strikethrough prices (sale prices) over strikethrough ones (original prices)
|
|
if (actualPrices.length > 1) {
|
|
// First, look for prices that are NOT struck through
|
|
const nonStrikethroughPrices = actualPrices.filter((el) => {
|
|
const tagName = el.tagName.toLowerCase();
|
|
const styles =
|
|
el.classList.contains("s-strikethrough") ||
|
|
el.classList.contains("u-flStrike") ||
|
|
el.closest("s, del, strike");
|
|
return (
|
|
tagName !== "s" &&
|
|
tagName !== "del" &&
|
|
tagName !== "strike" &&
|
|
!styles
|
|
);
|
|
});
|
|
|
|
if (nonStrikethroughPrices.length > 0) {
|
|
// Use the first non-strikethrough price (sale price)
|
|
priceElement = nonStrikethroughPrices[0];
|
|
} else {
|
|
// Fallback: use the last price (likely the most current)
|
|
const lastPrice = actualPrices[actualPrices.length - 1];
|
|
priceElement = lastPrice;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const priceText = priceElement?.textContent?.trim();
|
|
|
|
if (!priceText) continue;
|
|
|
|
// Parse price into cents and currency
|
|
const priceInfo = parseEbayPrice(priceText);
|
|
if (!priceInfo) continue;
|
|
|
|
// Apply exclusion filters
|
|
if (
|
|
exclusions.some((exclusion) =>
|
|
title.toLowerCase().includes(exclusion.toLowerCase()),
|
|
)
|
|
) {
|
|
continue;
|
|
}
|
|
|
|
// Apply strict mode filter (title must contain at least one keyword)
|
|
if (
|
|
strictMode &&
|
|
!keywords.some((keyword) =>
|
|
title?.toLowerCase().includes(keyword.toLowerCase()),
|
|
)
|
|
) {
|
|
continue;
|
|
}
|
|
|
|
const listing: ListingDetails = {
|
|
url: href,
|
|
title,
|
|
listingPrice: {
|
|
amountFormatted: priceText,
|
|
cents: priceInfo.cents,
|
|
currency: priceInfo.currency,
|
|
},
|
|
listingType: "OFFER", // eBay listings are typically offers
|
|
listingStatus: "ACTIVE",
|
|
address: null, // eBay doesn't typically show detailed addresses in search results
|
|
};
|
|
|
|
results.push(listing);
|
|
} catch (err) {
|
|
console.warn(`Error parsing eBay listing: ${err}`);
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ----------------------------- Main -----------------------------
|
|
|
|
export default async function fetchEbayItems(
|
|
SEARCH_QUERY: string,
|
|
REQUESTS_PER_SECOND = 1,
|
|
opts: {
|
|
minPrice?: number;
|
|
maxPrice?: number;
|
|
strictMode?: boolean;
|
|
exclusions?: string[];
|
|
keywords?: string[];
|
|
} = {},
|
|
) {
|
|
const {
|
|
minPrice = 0,
|
|
maxPrice = Number.MAX_SAFE_INTEGER,
|
|
strictMode = false,
|
|
exclusions = [],
|
|
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
|
|
} = opts;
|
|
|
|
// Build eBay search URL - use Canadian site and tracking parameters like real browser
|
|
const searchUrl = `https://www.ebay.ca/sch/i.html?_nkw=${encodeURIComponent(SEARCH_QUERY)}^&_sacat=0^&_from=R40^&_trksid=p4432023.m570.l1313`;
|
|
|
|
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
|
|
|
console.log(`Fetching eBay search: ${searchUrl}`);
|
|
|
|
try {
|
|
// Use custom headers modeled after real browser requests to bypass bot detection
|
|
const headers: Record<string, string> = {
|
|
"User-Agent":
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
|
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Accept-Language": "en-US,en;q=0.5",
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
Referer: "https://www.ebay.ca/",
|
|
Connection: "keep-alive",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
"Sec-Fetch-Dest": "document",
|
|
"Sec-Fetch-Mode": "navigate",
|
|
"Sec-Fetch-Site": "same-origin",
|
|
"Sec-Fetch-User": "?1",
|
|
Priority: "u=0, i",
|
|
};
|
|
|
|
const res = await fetch(searchUrl, {
|
|
method: "GET",
|
|
headers,
|
|
});
|
|
|
|
if (!res.ok) {
|
|
throw new HttpError(
|
|
`Request failed with status ${res.status}`,
|
|
res.status,
|
|
searchUrl,
|
|
);
|
|
}
|
|
|
|
const searchHtml = await res.text();
|
|
// Respect per-request delay to keep at or under REQUESTS_PER_SECOND
|
|
await delay(DELAY_MS);
|
|
|
|
console.log("\nParsing eBay listings...");
|
|
|
|
const listings = parseEbayListings(
|
|
searchHtml,
|
|
keywords,
|
|
exclusions,
|
|
strictMode,
|
|
);
|
|
|
|
// Filter by price range (additional safety check)
|
|
const filteredListings = listings.filter((listing) => {
|
|
const cents = listing.listingPrice?.cents;
|
|
return cents && cents >= minPrice && cents <= maxPrice;
|
|
});
|
|
|
|
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
|
return filteredListings;
|
|
} catch (err) {
|
|
if (err instanceof HttpError) {
|
|
console.error(
|
|
`Failed to fetch eBay search (${err.status}): ${err.message}`,
|
|
);
|
|
return [];
|
|
}
|
|
throw err;
|
|
}
|
|
}
|