Compare commits
5 Commits
f944d319c2
...
e4ab145d70
| Author | SHA1 | Date | |
|---|---|---|---|
| e4ab145d70 | |||
| 1dce0392e3 | |||
| 251fcbb7d9 | |||
| 9bc57d6b54 | |||
| 4a467c9f02 |
@@ -41,6 +41,7 @@ export async function kijijiRoute(req: Request): Promise<Response> {
|
|||||||
maxPages,
|
maxPages,
|
||||||
priceMin,
|
priceMin,
|
||||||
priceMax,
|
priceMax,
|
||||||
|
cookies: reqUrl.searchParams.get("cookies") || undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ export {
|
|||||||
} from "./scrapers/kijiji";
|
} from "./scrapers/kijiji";
|
||||||
// Export shared types
|
// Export shared types
|
||||||
export * from "./types/common";
|
export * from "./types/common";
|
||||||
|
// Export shared utilities
|
||||||
|
export * from "./utils/cookies";
|
||||||
export * from "./utils/delay";
|
export * from "./utils/delay";
|
||||||
export * from "./utils/format";
|
export * from "./utils/format";
|
||||||
// Export shared utilities
|
|
||||||
export * from "./utils/http";
|
export * from "./utils/http";
|
||||||
|
|||||||
@@ -1,6 +1,19 @@
|
|||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
|
import {
|
||||||
|
type CookieConfig,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
loadCookiesOptional,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
|
|
||||||
|
// eBay cookie configuration
|
||||||
|
const EBAY_COOKIE_CONFIG: CookieConfig = {
|
||||||
|
name: "eBay",
|
||||||
|
domain: ".ebay.ca",
|
||||||
|
envVar: "EBAY_COOKIE",
|
||||||
|
filePath: "./cookies/ebay.json",
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
export interface EbayListingDetails {
|
export interface EbayListingDetails {
|
||||||
@@ -323,52 +336,26 @@ function parseEbayListings(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Load eBay cookies with priority: URL param > ENV var > file
|
* Load eBay cookies with priority: URL param > ENV var > file
|
||||||
* @param cookiesSource - Optional cookie string from URL parameter (highest priority)
|
* Uses shared cookie utility for consistent handling across all scrapers
|
||||||
* @param cookiePath - Path to cookie file (default: ./cookies/ebay.json) (lowest priority)
|
|
||||||
* @returns Cookie string for HTTP header or undefined if no cookies found
|
|
||||||
*/
|
*/
|
||||||
async function loadEbayCookies(
|
async function loadEbayCookies(
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath = "./cookies/ebay.json",
|
|
||||||
): Promise<string | undefined> {
|
): Promise<string | undefined> {
|
||||||
// Priority 1: URL parameter (if provided)
|
const cookies = await loadCookiesOptional(EBAY_COOKIE_CONFIG, cookiesSource);
|
||||||
if (cookiesSource?.trim()) {
|
|
||||||
console.log("Loaded eBay cookies from URL parameter");
|
if (cookies.length === 0) {
|
||||||
return cookiesSource.trim();
|
console.warn(
|
||||||
|
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
||||||
|
"Provide cookies via (in priority order):\n" +
|
||||||
|
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
||||||
|
" 2. EBAY_COOKIE environment variable, or\n" +
|
||||||
|
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Priority 2: Environment variable
|
return formatCookiesForHeader(cookies, "www.ebay.ca");
|
||||||
const envCookies = process.env.EBAY_COOKIE;
|
|
||||||
if (envCookies?.trim()) {
|
|
||||||
console.log("Loaded eBay cookies from EBAY_COOKIE env var");
|
|
||||||
return envCookies.trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Priority 3: Cookie file (fallback)
|
|
||||||
try {
|
|
||||||
const file = Bun.file(cookiePath);
|
|
||||||
if (await file.exists()) {
|
|
||||||
const content = await file.text();
|
|
||||||
const trimmed = content.trim();
|
|
||||||
if (trimmed) {
|
|
||||||
console.log(`Loaded eBay cookies from ${cookiePath}`);
|
|
||||||
return trimmed;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// No cookies found (eBay cookies are optional, just warn)
|
|
||||||
console.warn(
|
|
||||||
"No eBay cookies found. eBay may block requests without valid session cookies.\n" +
|
|
||||||
"Provide cookies via (in priority order):\n" +
|
|
||||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
|
||||||
" 2. EBAY_COOKIE environment variable, or\n" +
|
|
||||||
" 3. ./cookies/ebay.json file (lowest priority)\n" +
|
|
||||||
'Format: Cookie string like "name1=value1; name2=value2"',
|
|
||||||
);
|
|
||||||
return undefined;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
@@ -384,8 +371,7 @@ export default async function fetchEbayItems(
|
|||||||
keywords?: string[];
|
keywords?: string[];
|
||||||
buyItNowOnly?: boolean;
|
buyItNowOnly?: boolean;
|
||||||
canadaOnly?: boolean;
|
canadaOnly?: boolean;
|
||||||
cookies?: string; // Optional: Cookie string from URL parameter (highest priority)
|
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||||
cookiePath?: string; // Optional: Path to cookie file (default: ./cookies/ebay.json)
|
|
||||||
} = {},
|
} = {},
|
||||||
) {
|
) {
|
||||||
const {
|
const {
|
||||||
@@ -397,11 +383,10 @@ export default async function fetchEbayItems(
|
|||||||
buyItNowOnly = true,
|
buyItNowOnly = true,
|
||||||
canadaOnly = true,
|
canadaOnly = true,
|
||||||
cookies: cookiesSource,
|
cookies: cookiesSource,
|
||||||
cookiePath,
|
|
||||||
} = opts;
|
} = opts;
|
||||||
|
|
||||||
// Load eBay cookies with priority: URL param > ENV var > file
|
// Load eBay cookies with priority: URL param > ENV var > file
|
||||||
const cookies = await loadEbayCookies(cookiesSource, cookiePath);
|
const cookies = await loadEbayCookies(cookiesSource);
|
||||||
|
|
||||||
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
// Build eBay search URL - use Canadian site, Buy It Now filter, and Canada-only preference
|
||||||
const urlParams = new URLSearchParams({
|
const urlParams = new URLSearchParams({
|
||||||
|
|||||||
@@ -1,6 +1,13 @@
|
|||||||
import cliProgress from "cli-progress";
|
import cliProgress from "cli-progress";
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import type { HTMLString } from "../types/common";
|
import type { HTMLString } from "../types/common";
|
||||||
|
import {
|
||||||
|
type Cookie,
|
||||||
|
type CookieConfig,
|
||||||
|
ensureCookies,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
parseCookieString,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { delay } from "../utils/delay";
|
import { delay } from "../utils/delay";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
import { formatCentsToCurrency } from "../utils/format";
|
||||||
import { isRecord } from "../utils/http";
|
import { isRecord } from "../utils/http";
|
||||||
@@ -13,21 +20,13 @@ import { isRecord } from "../utils/http";
|
|||||||
* This is by design to respect Facebook's authentication requirements.
|
* This is by design to respect Facebook's authentication requirements.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// Facebook cookie configuration
|
||||||
|
const FACEBOOK_COOKIE_CONFIG: CookieConfig = {
|
||||||
interface Cookie {
|
name: "Facebook",
|
||||||
name: string;
|
domain: ".facebook.com",
|
||||||
value: string;
|
envVar: "FACEBOOK_COOKIE",
|
||||||
domain: string;
|
filePath: "./cookies/facebook.json",
|
||||||
path: string;
|
};
|
||||||
secure?: boolean;
|
|
||||||
httpOnly?: boolean;
|
|
||||||
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
|
||||||
session?: boolean;
|
|
||||||
expirationDate?: number;
|
|
||||||
partitionKey?: Record<string, unknown>;
|
|
||||||
storeId?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface FacebookAdNode {
|
interface FacebookAdNode {
|
||||||
node: {
|
node: {
|
||||||
@@ -203,179 +202,24 @@ export interface FacebookListingDetails {
|
|||||||
|
|
||||||
// ----------------------------- Utilities -----------------------------
|
// ----------------------------- Utilities -----------------------------
|
||||||
|
|
||||||
/**
|
|
||||||
* Load Facebook cookies from file or string
|
|
||||||
*/
|
|
||||||
async function loadFacebookCookies(
|
|
||||||
cookiesSource?: string,
|
|
||||||
cookiePath = "./cookies/facebook.json",
|
|
||||||
): Promise<Cookie[]> {
|
|
||||||
// First try to load from provided string parameter
|
|
||||||
if (cookiesSource) {
|
|
||||||
try {
|
|
||||||
const cookies = JSON.parse(cookiesSource);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
throw new Error(`Invalid cookies JSON provided: ${e}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to load from specified path
|
|
||||||
try {
|
|
||||||
const cookiesPath = cookiePath;
|
|
||||||
const file = Bun.file(cookiesPath);
|
|
||||||
if (await file.exists()) {
|
|
||||||
const content = await file.text();
|
|
||||||
const cookies = JSON.parse(content);
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
return cookies.filter(
|
|
||||||
(cookie): cookie is Cookie =>
|
|
||||||
cookie &&
|
|
||||||
typeof cookie.name === "string" &&
|
|
||||||
typeof cookie.value === "string",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse Facebook cookie string into Cookie array format
|
* Parse Facebook cookie string into Cookie array format
|
||||||
|
* @deprecated Use parseCookieString from utils/cookies instead
|
||||||
*/
|
*/
|
||||||
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
export function parseFacebookCookieString(cookieString: string): Cookie[] {
|
||||||
if (!cookieString || !cookieString.trim()) {
|
return parseCookieString(cookieString, FACEBOOK_COOKIE_CONFIG.domain);
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
return cookieString
|
|
||||||
.split(";")
|
|
||||||
.map((pair) => pair.trim())
|
|
||||||
.filter((pair) => pair.includes("="))
|
|
||||||
.map((pair) => {
|
|
||||||
const [name, value] = pair.split("=", 2);
|
|
||||||
const trimmedName = name.trim();
|
|
||||||
const trimmedValue = value.trim();
|
|
||||||
|
|
||||||
// Skip empty names or values
|
|
||||||
if (!trimmedName || !trimmedValue) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
name: trimmedName,
|
|
||||||
value: decodeURIComponent(trimmedValue),
|
|
||||||
domain: ".facebook.com",
|
|
||||||
path: "/",
|
|
||||||
secure: true,
|
|
||||||
httpOnly: false,
|
|
||||||
sameSite: "lax" as const,
|
|
||||||
expirationDate: undefined, // Session cookies
|
|
||||||
};
|
|
||||||
})
|
|
||||||
.filter((cookie): cookie is Cookie => cookie !== null);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load Facebook cookies with priority: URL param > ENV var > file
|
* Load Facebook cookies with priority: URL param > ENV var > file
|
||||||
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
* @param cookiesSource - Optional cookie JSON string from URL parameter (highest priority)
|
||||||
* @param cookiePath - Path to cookie file (default: ./cookies/facebook.json) (lowest priority)
|
* @param _cookiePath - Deprecated, uses default path from config
|
||||||
*/
|
*/
|
||||||
export async function ensureFacebookCookies(
|
export async function ensureFacebookCookies(
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath = "./cookies/facebook.json",
|
_cookiePath?: string,
|
||||||
): Promise<Cookie[]> {
|
): Promise<Cookie[]> {
|
||||||
// Priority 1: URL parameter (if provided)
|
return ensureCookies(FACEBOOK_COOKIE_CONFIG, cookiesSource);
|
||||||
if (cookiesSource) {
|
|
||||||
try {
|
|
||||||
const cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
if (cookies.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${cookies.length} Facebook cookies from URL parameter`,
|
|
||||||
);
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Failed to parse cookies from URL parameter: ${e}`);
|
|
||||||
// Continue to next priority
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Priority 2: Environment variable
|
|
||||||
const cookieString = process.env.FACEBOOK_COOKIE;
|
|
||||||
if (cookieString?.trim()) {
|
|
||||||
const cookies = parseFacebookCookieString(cookieString);
|
|
||||||
if (cookies.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${cookies.length} Facebook cookies from FACEBOOK_COOKIE env var`,
|
|
||||||
);
|
|
||||||
return cookies;
|
|
||||||
}
|
|
||||||
console.warn("FACEBOOK_COOKIE env var contains no valid cookies");
|
|
||||||
// Continue to next priority
|
|
||||||
}
|
|
||||||
|
|
||||||
// Priority 3: Cookie file (fallback)
|
|
||||||
try {
|
|
||||||
const existing = await loadFacebookCookies(undefined, cookiePath);
|
|
||||||
if (existing.length > 0) {
|
|
||||||
console.log(
|
|
||||||
`Loaded ${existing.length} Facebook cookies from ${cookiePath}`,
|
|
||||||
);
|
|
||||||
return existing;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`Could not load cookies from ${cookiePath}: ${e}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// No cookies found from any source
|
|
||||||
throw new Error(
|
|
||||||
"No valid Facebook cookies found. Provide cookies via (in priority order):\n" +
|
|
||||||
" 1. 'cookies' URL parameter (highest priority), or\n" +
|
|
||||||
" 2. FACEBOOK_COOKIE environment variable, or\n" +
|
|
||||||
" 3. ./cookies/facebook.json file (lowest priority)\n" +
|
|
||||||
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format cookies array into Cookie header string
|
|
||||||
*/
|
|
||||||
function formatCookiesForHeader(cookies: Cookie[], domain: string): string {
|
|
||||||
const validCookies = cookies
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check if cookie applies to this domain
|
|
||||||
if (cookie.domain.startsWith(".")) {
|
|
||||||
// Domain cookie (applies to subdomains)
|
|
||||||
return (
|
|
||||||
domain.endsWith(cookie.domain.slice(1)) ||
|
|
||||||
domain === cookie.domain.slice(1)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Host-only cookie
|
|
||||||
return cookie.domain === domain;
|
|
||||||
})
|
|
||||||
.filter((cookie) => {
|
|
||||||
// Check expiration
|
|
||||||
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
|
||||||
return false; // Expired
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
|
|
||||||
return validCookies
|
|
||||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
|
||||||
.join("; ");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class HttpError extends Error {
|
class HttpError extends Error {
|
||||||
@@ -1066,28 +910,13 @@ export default async function fetchFacebookItems(
|
|||||||
export async function fetchFacebookItem(
|
export async function fetchFacebookItem(
|
||||||
itemId: string,
|
itemId: string,
|
||||||
cookiesSource?: string,
|
cookiesSource?: string,
|
||||||
cookiePath?: string,
|
_cookiePath?: string,
|
||||||
): Promise<FacebookListingDetails | null> {
|
): Promise<FacebookListingDetails | null> {
|
||||||
// Load Facebook cookies - required for Facebook Marketplace access
|
// Load Facebook cookies - required for Facebook Marketplace access
|
||||||
let cookies: Cookie[];
|
const cookies = await ensureFacebookCookies(cookiesSource);
|
||||||
if (cookiesSource) {
|
|
||||||
// Use provided cookie source (backward compatibility)
|
|
||||||
cookies = await loadFacebookCookies(cookiesSource);
|
|
||||||
} else {
|
|
||||||
// Auto-load from file or parse from env var
|
|
||||||
cookies = await ensureFacebookCookies(cookiePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cookies.length === 0) {
|
|
||||||
throw new Error(
|
|
||||||
"Facebook cookies are required for marketplace access. " +
|
|
||||||
"Please provide cookies via 'cookies' parameter or create ./cookies/facebook.json file with valid Facebook session cookies.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format cookies for HTTP header
|
// Format cookies for HTTP header
|
||||||
const domain = "www.facebook.com";
|
const cookiesHeader = formatCookiesForHeader(cookies, "www.facebook.com");
|
||||||
const cookiesHeader = formatCookiesForHeader(cookies, domain);
|
|
||||||
if (!cookiesHeader) {
|
if (!cookiesHeader) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
"No valid Facebook cookies found. Please check that cookies are not expired and apply to facebook.com domain.",
|
||||||
|
|||||||
@@ -2,6 +2,11 @@ import cliProgress from "cli-progress";
|
|||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import unidecode from "unidecode";
|
import unidecode from "unidecode";
|
||||||
import type { HTMLString } from "../types/common";
|
import type { HTMLString } from "../types/common";
|
||||||
|
import {
|
||||||
|
type CookieConfig,
|
||||||
|
formatCookiesForHeader,
|
||||||
|
loadCookiesOptional,
|
||||||
|
} from "../utils/cookies";
|
||||||
import { formatCentsToCurrency } from "../utils/format";
|
import { formatCentsToCurrency } from "../utils/format";
|
||||||
import {
|
import {
|
||||||
fetchHtml,
|
fetchHtml,
|
||||||
@@ -13,6 +18,14 @@ import {
|
|||||||
ValidationError,
|
ValidationError,
|
||||||
} from "../utils/http";
|
} from "../utils/http";
|
||||||
|
|
||||||
|
// Kijiji cookie configuration
|
||||||
|
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
|
||||||
|
name: "Kijiji",
|
||||||
|
domain: ".kijiji.ca",
|
||||||
|
envVar: "KIJIJI_COOKIE",
|
||||||
|
filePath: "./cookies/kijiji.json",
|
||||||
|
};
|
||||||
|
|
||||||
// ----------------------------- Types -----------------------------
|
// ----------------------------- Types -----------------------------
|
||||||
|
|
||||||
type SearchListing = {
|
type SearchListing = {
|
||||||
@@ -110,6 +123,7 @@ export interface SearchOptions {
|
|||||||
maxPages?: number; // Default: 5
|
maxPages?: number; // Default: 5
|
||||||
priceMin?: number;
|
priceMin?: number;
|
||||||
priceMax?: number;
|
priceMax?: number;
|
||||||
|
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ListingFetchOptions {
|
export interface ListingFetchOptions {
|
||||||
@@ -691,6 +705,16 @@ export default async function fetchKijijiItems(
|
|||||||
) {
|
) {
|
||||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||||
|
|
||||||
|
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||||
|
const cookies = await loadCookiesOptional(
|
||||||
|
KIJIJI_COOKIE_CONFIG,
|
||||||
|
searchOptions.cookies,
|
||||||
|
);
|
||||||
|
const cookieHeader =
|
||||||
|
cookies.length > 0
|
||||||
|
? formatCookiesForHeader(cookies, "www.kijiji.ca")
|
||||||
|
: undefined;
|
||||||
|
|
||||||
// Set defaults for configuration
|
// Set defaults for configuration
|
||||||
const finalSearchOptions: Required<SearchOptions> = {
|
const finalSearchOptions: Required<SearchOptions> = {
|
||||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||||
@@ -701,6 +725,7 @@ export default async function fetchKijijiItems(
|
|||||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||||
priceMin: searchOptions.priceMin as number,
|
priceMin: searchOptions.priceMin as number,
|
||||||
priceMax: searchOptions.priceMax as number,
|
priceMax: searchOptions.priceMax as number,
|
||||||
|
cookies: searchOptions.cookies ?? "",
|
||||||
};
|
};
|
||||||
|
|
||||||
const finalListingOptions: Required<ListingFetchOptions> = {
|
const finalListingOptions: Required<ListingFetchOptions> = {
|
||||||
@@ -733,6 +758,7 @@ export default async function fetchKijijiItems(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||||
});
|
});
|
||||||
|
|
||||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||||
@@ -782,6 +808,7 @@ export default async function fetchKijijiItems(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||||
});
|
});
|
||||||
const parsed = await parseDetailedListing(
|
const parsed = await parseDetailedListing(
|
||||||
html,
|
html,
|
||||||
|
|||||||
227
packages/core/src/utils/cookies.ts
Normal file
227
packages/core/src/utils/cookies.ts
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
/**
|
||||||
|
* Shared cookie handling utilities for marketplace scrapers
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface Cookie {
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
domain: string;
|
||||||
|
path: string;
|
||||||
|
secure?: boolean;
|
||||||
|
httpOnly?: boolean;
|
||||||
|
sameSite?: "strict" | "lax" | "none" | "unspecified";
|
||||||
|
session?: boolean;
|
||||||
|
expirationDate?: number;
|
||||||
|
partitionKey?: Record<string, unknown>;
|
||||||
|
storeId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface CookieConfig {
|
||||||
|
/** Name used in log messages (e.g., "Facebook", "Kijiji") */
|
||||||
|
name: string;
|
||||||
|
/** Domain for cookies (e.g., ".facebook.com", ".kijiji.ca") */
|
||||||
|
domain: string;
|
||||||
|
/** Environment variable name (e.g., "FACEBOOK_COOKIE") */
|
||||||
|
envVar: string;
|
||||||
|
/** Path to cookie file (e.g., "./cookies/facebook.json") */
|
||||||
|
filePath: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse cookie string format into Cookie array
|
||||||
|
* Supports format: "name1=value1; name2=value2"
|
||||||
|
*/
|
||||||
|
export function parseCookieString(
|
||||||
|
cookieString: string,
|
||||||
|
domain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
if (!cookieString?.trim()) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return cookieString
|
||||||
|
.split(";")
|
||||||
|
.map((pair) => pair.trim())
|
||||||
|
.filter((pair) => pair.includes("="))
|
||||||
|
.map((pair) => {
|
||||||
|
const [name, ...valueParts] = pair.split("=");
|
||||||
|
const trimmedName = name.trim();
|
||||||
|
const trimmedValue = valueParts.join("=").trim();
|
||||||
|
|
||||||
|
if (!trimmedName || !trimmedValue) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
name: trimmedName,
|
||||||
|
value: decodeURIComponent(trimmedValue),
|
||||||
|
domain,
|
||||||
|
path: "/",
|
||||||
|
secure: true,
|
||||||
|
httpOnly: false,
|
||||||
|
sameSite: "lax" as const,
|
||||||
|
expirationDate: undefined,
|
||||||
|
};
|
||||||
|
})
|
||||||
|
.filter((cookie): cookie is Cookie => cookie !== null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse JSON array format into Cookie array
|
||||||
|
* Supports format: [{"name": "foo", "value": "bar", ...}]
|
||||||
|
*/
|
||||||
|
export function parseJsonCookies(jsonString: string): Cookie[] {
|
||||||
|
const parsed = JSON.parse(jsonString);
|
||||||
|
if (!Array.isArray(parsed)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsed.filter(
|
||||||
|
(cookie): cookie is Cookie =>
|
||||||
|
cookie &&
|
||||||
|
typeof cookie.name === "string" &&
|
||||||
|
typeof cookie.value === "string",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to parse cookies from a string (tries JSON first, then cookie string format)
|
||||||
|
*/
|
||||||
|
export function parseCookiesAuto(
|
||||||
|
input: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Cookie[] {
|
||||||
|
// Try JSON array format first
|
||||||
|
try {
|
||||||
|
const cookies = parseJsonCookies(input);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// JSON parse failed, try cookie string format
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try cookie string format
|
||||||
|
return parseCookieString(input, defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies from file (supports both JSON array and cookie string formats)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesFromFile(
|
||||||
|
filePath: string,
|
||||||
|
defaultDomain: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
const file = Bun.file(filePath);
|
||||||
|
if (!(await file.exists())) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const content = await file.text();
|
||||||
|
return parseCookiesAuto(content.trim(), defaultDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format cookies array into Cookie header string for HTTP requests
|
||||||
|
*/
|
||||||
|
export function formatCookiesForHeader(
|
||||||
|
cookies: Cookie[],
|
||||||
|
targetDomain: string,
|
||||||
|
): string {
|
||||||
|
const validCookies = cookies
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check if cookie applies to this domain
|
||||||
|
if (cookie.domain.startsWith(".")) {
|
||||||
|
// Domain cookie (applies to subdomains)
|
||||||
|
return (
|
||||||
|
targetDomain.endsWith(cookie.domain.slice(1)) ||
|
||||||
|
targetDomain === cookie.domain.slice(1)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Host-only cookie
|
||||||
|
return cookie.domain === targetDomain;
|
||||||
|
})
|
||||||
|
.filter((cookie) => {
|
||||||
|
// Check expiration
|
||||||
|
if (cookie.expirationDate && cookie.expirationDate < Date.now() / 1000) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return validCookies
|
||||||
|
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||||
|
.join("; ");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load cookies with priority: URL param > ENV var > file
|
||||||
|
* Supports both JSON array and cookie string formats for all sources
|
||||||
|
*/
|
||||||
|
export async function ensureCookies(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
// Priority 1: URL/API parameter (if provided)
|
||||||
|
if (cookiesSource) {
|
||||||
|
const cookies = parseCookiesAuto(cookiesSource, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from parameter`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(
|
||||||
|
`${config.name} cookies parameter provided but no valid cookies extracted`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 2: Environment variable
|
||||||
|
const envValue = process.env[config.envVar];
|
||||||
|
if (envValue?.trim()) {
|
||||||
|
const cookies = parseCookiesAuto(envValue, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.envVar} env var`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
console.warn(`${config.envVar} env var contains no valid cookies`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Priority 3: Cookie file (fallback)
|
||||||
|
try {
|
||||||
|
const cookies = await loadCookiesFromFile(config.filePath, config.domain);
|
||||||
|
if (cookies.length > 0) {
|
||||||
|
console.log(
|
||||||
|
`Loaded ${cookies.length} ${config.name} cookies from ${config.filePath}`,
|
||||||
|
);
|
||||||
|
return cookies;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn(`Could not load cookies from ${config.filePath}: ${e}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// No cookies found from any source
|
||||||
|
throw new Error(
|
||||||
|
`No valid ${config.name} cookies found. Provide cookies via (in priority order):\n` +
|
||||||
|
` 1. 'cookies' parameter (highest priority), or\n` +
|
||||||
|
` 2. ${config.envVar} environment variable, or\n` +
|
||||||
|
` 3. ${config.filePath} file (lowest priority)\n` +
|
||||||
|
'Format: JSON array or cookie string like "name1=value1; name2=value2"',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to load cookies, return empty array if none found (non-throwing version)
|
||||||
|
*/
|
||||||
|
export async function loadCookiesOptional(
|
||||||
|
config: CookieConfig,
|
||||||
|
cookiesSource?: string,
|
||||||
|
): Promise<Cookie[]> {
|
||||||
|
try {
|
||||||
|
return await ensureCookies(config, cookiesSource);
|
||||||
|
} catch {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -115,6 +115,7 @@ export async function handleMcpRequest(req: Request): Promise<Response> {
|
|||||||
params.append("priceMin", args.priceMin.toString());
|
params.append("priceMin", args.priceMin.toString());
|
||||||
if (args.priceMax)
|
if (args.priceMax)
|
||||||
params.append("priceMax", args.priceMax.toString());
|
params.append("priceMax", args.priceMax.toString());
|
||||||
|
if (args.cookies) params.append("cookies", args.cookies);
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
`[MCP] Calling Kijiji API: ${API_BASE_URL}/kijiji?${params.toString()}`,
|
||||||
|
|||||||
@@ -52,6 +52,11 @@ export const tools = [
|
|||||||
type: "number",
|
type: "number",
|
||||||
description: "Maximum price in cents",
|
description: "Maximum price in cents",
|
||||||
},
|
},
|
||||||
|
cookies: {
|
||||||
|
type: "string",
|
||||||
|
description:
|
||||||
|
"Optional: Kijiji session cookies to bypass bot detection (JSON array or 'name1=value1; name2=value2')",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
required: ["query"],
|
required: ["query"],
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user