Compare commits

...

5 Commits

Author SHA1 Message Date
e4ab145d70 feat: add cookie support to kijiji scraper
Add optional cookie parameter to bypass bot detection (403 errors).
Cookies can be provided via parameter, KIJIJI_COOKIE env var, or
cookies/kijiji.json file. Supports both JSON array and string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:13 -05:00
1dce0392e3 refactor: use shared cookie utility in ebay scraper
Replace inline cookie loading with shared utility functions.
Now supports both JSON array and cookie string formats.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:29:02 -05:00
251fcbb7d9 refactor: use shared cookie utility in facebook scraper
Replace inline cookie parsing with shared utility functions.
Maintains backward compatibility with existing exports.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:52 -05:00
9bc57d6b54 refactor: add shared cookie utility to core package
Move cookie parsing logic to a dedicated utility module that can be
shared across all scrapers. Supports both JSON array and cookie string
formats for all input sources (parameter, env var, file).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 19:28:44 -05:00
4a467c9f02 fix: support both json and string cookies for facebook 2026-01-23 19:00:51 -05:00
8 changed files with 314 additions and 238 deletions

View File

@@ -41,6 +41,7 @@ export async function kijijiRoute(req: Request): Promise<Response> {
maxPages,
priceMin,
priceMax,
cookies: reqUrl.searchParams.get("cookies") || undefined,
};
try {

View File

@@ -36,7 +36,8 @@ export {
} from "./scrapers/kijiji";
// Export shared types
export * from "./types/common";
// Export shared utilities
export * from "./utils/cookies";
export * from "./utils/delay";
export * from "./utils/format";
// Export shared utilities
export * from "./utils/http";

View File

@@ -1,6 +1,19 @@
import { parseHTML } from "linkedom";
import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { delay } from "../utils/delay";
// eBay cookie configuration
const EBAY_COOKIE_CONFIG: CookieConfig = {
name: "eBay",
domain: ".ebay.ca",
envVar: "EBAY_COOKIE",
filePath: "./cookies/ebay.json",
};
// ----------------------------- Types -----------------------------
export interface EbayListingDetails {
@@ -323,54 +336,28 @@ function parseEbayListings(
/**
* Load eBay cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie string from URL parameter (highest priority)
* @param cookiePath - Path to cookie file (default: ./cookies/ebay.json) (lowest priority)
* @returns Cookie string for HTTP header or undefined if no cookies found
* Uses shared cookie utility for consistent handling across all scrapers
*/
async function loadEbayCookies(
cookiesSource?: string,
cookiePath = "./cookies/ebay.json",
): Promise<string | undefined> {
// Priority 1: URL parameter (if provided)
if (cookiesSource?.trim()) {
console.log("Loaded eBay cookies from URL parameter");
return cookiesSource.trim();
}
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
// Priority 2: Environment variable
const envCookies = process.env.EBAY_COOKIE;
if (envCookies?.trim()) {
console.log("Loaded eBay cookies from EBAY_COOKIE env var");
return envCookies.trim();
}
// Priority 3: Cookie file (fallback)
try {
const file = Bun.file(cookiePath);
if (await file.exists()) {
const content = await file.text();
const trimmed = content.trim();
if (trimmed) {
console.log(`Loaded eBay cookies from ${cookiePath}`);
return trimmed;
}
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
// No cookies found (eBay cookies are optional, just warn)
if (cookies.length === 0) {
console.warn(
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
"Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. EBAY_COOKIE environment variable, or\n" +
" 3. ./cookies/ebay.json file (lowest priority)\n" +
'Format: Cookie string like "name1=value1; name2=value2"',
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
return undefined;
}
return formatCookiesForHeader(cookies, "www.ebay.ca");
}
// ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
@@ -384,8 +371,7 @@ export default async function fetchEbayItems(
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
cookies?: string; // Optional: Cookie string from URL parameter (highest priority)
cookiePath?: string; // Optional: Path to cookie file (default: ./cookies/ebay.json)
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
} = {},
) {
const {
@@ -397,11 +383,10 @@ export default async function fetchEbayItems(
buyItNowOnly = true,
canadaOnly = true,
cookies: cookiesSource,
cookiePath,
} = opts;
// Load eBay cookies with priority: URL param > ENV var > file
const cookies = await loadEbayCookies(cookiesSource, cookiePath);
const cookies = await loadEbayCookies(cookiesSource);
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
const urlParams = new URLSearchParams({

View File

@@ -1,6 +1,13 @@
import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import type { HTMLString } from "../types/common";
import {
type Cookie,
type CookieConfig,
ensureCookies,
formatCookiesForHeader,
parseCookieString,
} from "../utils/cookies";
import { delay } from "../utils/delay";
import { formatCentsToCurrency } from "../utils/format";
import { isRecord } from "../utils/http";
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
* This is by design to respect Facebook's authentication requirements.
*/
// ----------------------------- Types -----------------------------
interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
// Facebook cookie configuration
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
name: "Facebook",
domain: ".facebook.com",
envVar: "FACEBOOK_COOKIE",
filePath: "./cookies/facebook.json",
};
interface FacebookAdNode {
node: {
@@ -203,179 +202,24 @@ export interface FacebookListingDetails {
// ----------------------------- Utilities -----------------------------
/**
* Load Facebook cookies from file or string
*/
async function loadFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
): Promise<Cookie[]> {
// First try to load from provided string parameter
if (cookiesSource) {
try {
const cookies = JSON.parse(cookiesSource);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
} catch (e) {
throw new Error(`Invalid cookies JSON provided: ${e}`);
}
}
// Try to load from specified path
try {
const cookiesPath = cookiePath;
const file = Bun.file(cookiesPath);
if (await file.exists()) {
const content = await file.text();
const cookies = JSON.parse(content);
if (Array.isArray(cookies)) {
return cookies.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
return [];
}
/**
* Parse Facebook cookie string into Cookie array format
* @deprecated Use parseCookieString from utils/cookies instead
*/
export function parseFacebookCookieString(cookieString: string): Cookie[] {
if (!cookieString || !cookieString.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, value] = pair.split("=", 2);
const trimmedName = name.trim();
const trimmedValue = value.trim();
// Skip empty names or values
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain: ".facebook.com",
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined, // Session cookies
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
}
/**
* Load Facebook cookies with priority: URL param > ENV var > file
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority)
* @param _cookiePath - Deprecated, uses default path from config
*/
export async function ensureFacebookCookies(
cookiesSource?: string,
cookiePath = "./cookies/facebook.json",
_cookiePath?: string,
): Promise<Cookie[]> {
// Priority 1: URL parameter (if provided)
if (cookiesSource) {
try {
const cookies = await loadFacebookCookies(cookiesSource);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from URL parameter`,
);
return cookies;
}
} catch (e) {
console.warn(`Failed to parse cookies from URL parameter: ${e}`);
// Continue to next priority
}
}
// Priority 2: Environment variable
const cookieString = process.env.FACEBOOK_COOKIE;
if (cookieString?.trim()) {
const cookies = parseFacebookCookieString(cookieString);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
);
return cookies;
}
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
// Continue to next priority
}
// Priority 3: Cookie file (fallback)
try {
const existing = await loadFacebookCookies(undefined, cookiePath);
if (existing.length > 0) {
console.log(
`Loaded ${existing.length} Facebook cookies from ${cookiePath}`,
);
return existing;
}
} catch (e) {
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
" 1. 'cookies' URL parameter (highest priority), or\n" +
" 2. FACEBOOK_COOKIE environment variable, or\n" +
" 3. ./cookies/facebook.json file (lowest priority)\n" +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Format cookies array into Cookie header string
*/
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
domain.endsWith(cookie.domain.slice(1)) ||
domain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === domain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false; // Expired
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
}
class HttpError extends Error {
@@ -1066,28 +910,13 @@ export default async function fetchFacebookItems(
export async function fetchFacebookItem(
itemId: string,
cookiesSource?: string,
cookiePath?: string,
_cookiePath?: string,
): Promise<FacebookListingDetails | null> {
// Load Facebook cookies - required for Facebook Marketplace access
let cookies: Cookie[];
if (cookiesSource) {
// Use provided cookie source (backward compatibility)
cookies = await loadFacebookCookies(cookiesSource);
} else {
// Auto-load from file or parse from env var
cookies = await ensureFacebookCookies(cookiePath);
}
if (cookies.length === 0) {
throw new Error(
"Facebook cookies are required for marketplace access. " +
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
);
}
const cookies = await ensureFacebookCookies(cookiesSource);
// Format cookies for HTTP header
const domain = "www.facebook.com";
const cookiesHeader = formatCookiesForHeader(cookies, domain);
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
if (!cookiesHeader) {
throw new Error(
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",

View File

@@ -2,6 +2,11 @@ import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import unidecode from "unidecode";
import type { HTMLString } from "../types/common";
import {
type CookieConfig,
formatCookiesForHeader,
loadCookiesOptional,
} from "../utils/cookies";
import { formatCentsToCurrency } from "../utils/format";
import {
fetchHtml,
@@ -13,6 +18,14 @@ import {
ValidationError,
} from "../utils/http";
// Kijiji cookie configuration
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
name: "Kijiji",
domain: ".kijiji.ca",
envVar: "KIJIJI_COOKIE",
filePath: "./cookies/kijiji.json",
};
// ----------------------------- Types -----------------------------
type SearchListing = {
@@ -110,6 +123,7 @@ export interface SearchOptions {
maxPages?: number; // Default: 5
priceMin?: number;
priceMax?: number;
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
}
export interface ListingFetchOptions {
@@ -691,6 +705,16 @@ export default async function fetchKijijiItems(
) {
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
// Load Kijiji cookies (optional - helps bypass bot detection)
const cookies = await loadCookiesOptional(
KIJIJI_COOKIE_CONFIG,
searchOptions.cookies,
);
const cookieHeader =
cookies.length > 0
? formatCookiesForHeader(cookies, "www.kijiji.ca")
: undefined;
// Set defaults for configuration
const finalSearchOptions: Required<SearchOptions> = {
location: searchOptions.location ?? 1700272, // Default to GTA
@@ -701,6 +725,7 @@ export default async function fetchKijijiItems(
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
priceMin: searchOptions.priceMin as number,
priceMax: searchOptions.priceMax as number,
cookies: searchOptions.cookies ?? "",
};
const finalListingOptions: Required<ListingFetchOptions> = {
@@ -733,6 +758,7 @@ export default async function fetchKijijiItems(
);
}
},
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
});
const searchResults = parseSearch(searchHtml, BASE_URL);
@@ -782,6 +808,7 @@ export default async function fetchKijijiItems(
);
}
},
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
});
const parsed = await parseDetailedListing(
html,

View File

@@ -0,0 +1,227 @@
/**
* Shared cookie handling utilities for marketplace scrapers
*/
export interface Cookie {
name: string;
value: string;
domain: string;
path: string;
secure?: boolean;
httpOnly?: boolean;
sameSite?: "strict" | "lax" | "none" | "unspecified";
session?: boolean;
expirationDate?: number;
partitionKey?: Record<string, unknown>;
storeId?: string;
}
export interface CookieConfig {
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
name: string;
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
domain: string;
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
envVar: string;
/** Path to cookie file (e.g., "./cookies/facebook.json") */
filePath: string;
}
/**
* Parse cookie string format into Cookie array
* Supports format: "name1=value1; name2=value2"
*/
export function parseCookieString(
cookieString: string,
domain: string,
): Cookie[] {
if (!cookieString?.trim()) {
return [];
}
return cookieString
.split(";")
.map((pair) => pair.trim())
.filter((pair) => pair.includes("="))
.map((pair) => {
const [name, ...valueParts] = pair.split("=");
const trimmedName = name.trim();
const trimmedValue = valueParts.join("=").trim();
if (!trimmedName || !trimmedValue) {
return null;
}
return {
name: trimmedName,
value: decodeURIComponent(trimmedValue),
domain,
path: "/",
secure: true,
httpOnly: false,
sameSite: "lax" as const,
expirationDate: undefined,
};
})
.filter((cookie): cookie is Cookie => cookie !== null);
}
/**
* Parse JSON array format into Cookie array
* Supports format: [{"name": "foo", "value": "bar", ...}]
*/
export function parseJsonCookies(jsonString: string): Cookie[] {
const parsed = JSON.parse(jsonString);
if (!Array.isArray(parsed)) {
return [];
}
return parsed.filter(
(cookie): cookie is Cookie =>
cookie &&
typeof cookie.name === "string" &&
typeof cookie.value === "string",
);
}
/**
* Try to parse cookies from a string (tries JSON first, then cookie string format)
*/
export function parseCookiesAuto(
input: string,
defaultDomain: string,
): Cookie[] {
// Try JSON array format first
try {
const cookies = parseJsonCookies(input);
if (cookies.length > 0) {
return cookies;
}
} catch {
// JSON parse failed, try cookie string format
}
// Try cookie string format
return parseCookieString(input, defaultDomain);
}
/**
* Load cookies from file (supports both JSON array and cookie string formats)
*/
export async function loadCookiesFromFile(
filePath: string,
defaultDomain: string,
): Promise<Cookie[]> {
const file = Bun.file(filePath);
if (!(await file.exists())) {
return [];
}
const content = await file.text();
return parseCookiesAuto(content.trim(), defaultDomain);
}
/**
* Format cookies array into Cookie header string for HTTP requests
*/
export function formatCookiesForHeader(
cookies: Cookie[],
targetDomain: string,
): string {
const validCookies = cookies
.filter((cookie) => {
// Check if cookie applies to this domain
if (cookie.domain.startsWith(".")) {
// Domain cookie (applies to subdomains)
return (
targetDomain.endsWith(cookie.domain.slice(1)) ||
targetDomain === cookie.domain.slice(1)
);
}
// Host-only cookie
return cookie.domain === targetDomain;
})
.filter((cookie) => {
// Check expiration
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
return false;
}
return true;
});
return validCookies
.map((cookie) => `${cookie.name}=${cookie.value}`)
.join("; ");
}
/**
* Load cookies with priority: URL param > ENV var > file
* Supports both JSON array and cookie string formats for all sources
*/
export async function ensureCookies(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
// Priority 1: URL/API parameter (if provided)
if (cookiesSource) {
const cookies = parseCookiesAuto(cookiesSource, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
);
return cookies;
}
console.warn(
`${config.name} cookies parameter provided but no valid cookies extracted`,
);
}
// Priority 2: Environment variable
const envValue = process.env[config.envVar];
if (envValue?.trim()) {
const cookies = parseCookiesAuto(envValue, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
);
return cookies;
}
console.warn(`${config.envVar} env var contains no valid cookies`);
}
// Priority 3: Cookie file (fallback)
try {
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
if (cookies.length > 0) {
console.log(
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
);
return cookies;
}
} catch (e) {
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
}
// No cookies found from any source
throw new Error(
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
` 1. 'cookies' parameter (highest priority), or\n` +
` 2. ${config.envVar} environment variable, or\n` +
` 3. ${config.filePath} file (lowest priority)\n` +
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
);
}
/**
* Try to load cookies, return empty array if none found (non-throwing version)
*/
export async function loadCookiesOptional(
config: CookieConfig,
cookiesSource?: string,
): Promise<Cookie[]> {
try {
return await ensureCookies(config, cookiesSource);
} catch {
return [];
}
}

View File

@@ -115,6 +115,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
params.append("priceMin", args.priceMin.toString());
if (args.priceMax)
params.append("priceMax", args.priceMax.toString());
if (args.cookies) params.append("cookies", args.cookies);
console.log(
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,

View File

@@ -52,6 +52,11 @@ export const tools = [
type: "number",
description: "Maximum price in cents",
},
cookies: {
type: "string",
description:
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
},
},
required: ["query"],
},