feat: implement cookie priority hierarchy (URL param > env var > file) for Facebook and eBay scrapers

This commit is contained in:
2026-01-23 15:32:07 -05:00
parent df0c528535
commit cf9784a565
8 changed files with 262 additions and 85 deletions

View File

@@ -101,13 +101,26 @@ function parseEbayListings(
}
// Find the container - go up several levels to find the item container
// Modern eBay uses complex nested structures
let container = linkElement.parentElement?.parentElement?.parentElement;
if (!container) {
// Try a different level
container = linkElement.parentElement?.parentElement;
// Modern eBay uses complex nested structures (often 5-10 levels deep)
let container: Element | null = linkElement;
let depth = 0;
const maxDepth = 15;
// Walk up until we find a list item or results container
while (container && depth < maxDepth) {
const classes = container.className || "";
if (
classes.includes("s-item") ||
classes.includes("srp-results") ||
container.tagName === "LI"
) {
break;
}
container = container.parentElement;
depth++;
}
if (!container) continue;
if (!container || depth >= maxDepth) continue;
// Extract title - look for heading or title-related elements near the link
// Modern eBay often uses h3, span, or div with text content near the link
@@ -168,8 +181,9 @@ function parseEbayListings(
if (title === "Shop on eBay" || title.length < 3) continue;
// Extract price - look for eBay's price classes, preferring sale/discount prices
// Updated for 2026 eBay HTML structure
let priceElement = container.querySelector(
'[class*="s-item__price"], .s-item__price, [class*="price"]',
'[class*="s-item__price"], .s-item__price, .s-card__attribute-row, [class*="price"]',
);
// If no direct price class, look for spans containing $ (but not titles)
@@ -305,6 +319,58 @@ function parseEbayListings(
return results;
}
// ----------------------------- Cookie Loading -----------------------------
/**
* Load eBay cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie string from URL parameter (highest priority)
* @param cookiePath - Path to cookie file (default: ./cookies/ebay.json) (lowest priority)
* @returns Cookie string for HTTP header or undefined if no cookies found
*/
async function loadEbayCookies(
cookiesSource?: string,
cookiePath = "./cookies/ebay.json",
): Promise<string | undefined> {
// Priority 1: URL parameter (if provided)
if (cookiesSource?.trim()) {
console.log("Loaded eBay cookies from URL parameter");
return cookiesSource.trim();
}
// Priority 2: Environment variable
const envCookies = process.env.EBAY_COOKIE;
if (envCookies?.trim()) {
console.log("Loaded eBay cookies from EBAY_COOKIE env var");
return envCookies.trim();
}
// Priority 3: Cookie file (fallback)
try {
const file = Bun.file(cookiePath);
if (await file.exists()) {
const content = await file.text();
const trimmed = content.trim();
if (trimmed) {
console.log(`Loaded eBay cookies from ${cookiePath}`);
return trimmed;
}
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
// No cookies found (eBay cookies are optional, just warn)
console.warn(
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
"Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. EBAY_COOKIE environment variable, or\n" +
" 3. ./cookies/ebay.json file (lowest priority)\n" +
'Format: Cookie string like "name1=value1; name2=value2"',
);
return undefined;
}
// ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
@@ -318,6 +384,8 @@ export default async function fetchEbayItems(
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
cookies?: string; // Optional: Cookie string from URL parameter (highest priority)
cookiePath?: string; // Optional: Path to cookie file (default: ./cookies/ebay.json)
} = {},
) {
const {
@@ -328,8 +396,13 @@ export default async function fetchEbayItems(
keywords = [SEARCH_QUERY], // Default to search query if no keywords provided
buyItNowOnly = true,
canadaOnly = true,
cookies: cookiesSource,
cookiePath,
} = opts;
// Load eBay cookies with priority: URL param > ENV var > file
const cookies = await loadEbayCookies(cookiesSource, cookiePath);
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
const urlParams = new URLSearchParams({
_nkw: SEARCH_QUERY,
@@ -358,7 +431,7 @@ export default async function fetchEbayItems(
"Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100101 Firefox/141.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Encoding": "gzip, deflate, br, zstd",
Referer: "https://www.ebay.ca/",
Connection: "keep-alive",
"Upgrade-Insecure-Requests": "1",
@@ -369,6 +442,11 @@ export default async function fetchEbayItems(
Priority: "u=0, i",
};
// Add cookies if available (helps bypass bot detection)
if (cookies) {
headers.Cookie = cookies;
}
const res = await fetch(searchUrl, {
method: "GET",
headers,

View File

@@ -287,50 +287,65 @@ export function parseFacebookCookieString(cookieString: string): Cookie[] {
}
/**
* Ensure Facebook cookies are available, parsing from env var if needed
* Load Facebook cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority)
*/
export async function ensureFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load existing cookies
// Priority 1: URL parameter (if provided)
if (cookiesSource) {
try {
const cookies = await loadFacebookCookies(cookiesSource);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from URL parameter`,
);
return cookies;
}
} catch (e) {
console.warn(`Failed to parse cookies from URL parameter: ${e}`);
// Continue to next priority
}
}
// Priority 2: Environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (cookieString?.trim()) {
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
);
return cookies;
}
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
// Continue to next priority
}
// Priority 3: Cookie file (fallback)
try {
const existing = await loadFacebookCookies(undefined, cookiePath);
if (existing.length > 0) {
console.log(
`Loaded ${existing.length} Facebook cookies from ${cookiePath}`,
);
return existing;
}
} catch {
// File doesn't exist or is invalid, continue to check env var
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
// Try to parse from environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (!cookieString || !cookieString.trim()) {
throw new Error(
"No valid Facebook cookies found. Either:\n" +
" 1. Set FACEBOOK_COOKIE environment variable with cookie string, or\n" +
" 2. Create ./cookies/facebook.json manually with cookie array",
);
}
// Parse the cookie string
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length === 0) {
throw new Error(
"FACEBOOK_COOKIE environment variable contains no valid cookies. " +
'Expected format: "name1=value1; name2=value2;"',
);
}
// Save to file for future use
try {
await Bun.write(cookiePath, JSON.stringify(cookies, null, 2));
console.log(`Saved ${cookies.length} Facebook cookies to ${cookiePath}`);
} catch (error) {
console.warn(`Could not save cookies to ${cookiePath}: ${error}`);
// Continue anyway, we have the cookies in memory
}
return cookies;
// No cookies found from any source
throw new Error(
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. FACEBOOK_COOKIE environment variable, or\n" +
" 3. ./cookies/facebook.json file (lowest priority)\n" +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
@@ -964,22 +979,8 @@ export default async function fetchFacebookItems(
cookiesSource?: string,
cookiePath?: string,
) {
// Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[];
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
// Load Facebook cookies with priority: URL param > ENV var > file
const cookies = await ensureFacebookCookies(cookiesSource, cookiePath);
// Format cookies for HTTP header
const domain = "www.facebook.com";