feat: add cookie support to kijiji scraper
Add optional cookie parameter to bypass bot detection (403 errors). Cookies can be provided via parameter, KIJIJI_COOKIE env var, or cookies/kijiji.json file. Supports both JSON array and string formats. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,11 @@ import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import {
|
||||
type CookieConfig,
|
||||
formatCookiesForHeader,
|
||||
loadCookiesOptional,
|
||||
} from "../utils/cookies";
|
||||
import { formatCentsToCurrency } from "../utils/format";
|
||||
import {
|
||||
fetchHtml,
|
||||
@@ -13,6 +18,14 @@ import {
|
||||
ValidationError,
|
||||
} from "../utils/http";
|
||||
|
||||
// Kijiji cookie configuration
|
||||
const KIJIJI_COOKIE_CONFIG: CookieConfig = {
|
||||
name: "Kijiji",
|
||||
domain: ".kijiji.ca",
|
||||
envVar: "KIJIJI_COOKIE",
|
||||
filePath: "./cookies/kijiji.json",
|
||||
};
|
||||
|
||||
// ----------------------------- Types -----------------------------
|
||||
|
||||
type SearchListing = {
|
||||
@@ -110,6 +123,7 @@ export interface SearchOptions {
|
||||
maxPages?: number; // Default: 5
|
||||
priceMin?: number;
|
||||
priceMax?: number;
|
||||
cookies?: string; // Optional: Cookie string or JSON (helps bypass bot detection)
|
||||
}
|
||||
|
||||
export interface ListingFetchOptions {
|
||||
@@ -691,6 +705,16 @@ export default async function fetchKijijiItems(
|
||||
) {
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||
const cookies = await loadCookiesOptional(
|
||||
KIJIJI_COOKIE_CONFIG,
|
||||
searchOptions.cookies,
|
||||
);
|
||||
const cookieHeader =
|
||||
cookies.length > 0
|
||||
? formatCookiesForHeader(cookies, "www.kijiji.ca")
|
||||
: undefined;
|
||||
|
||||
// Set defaults for configuration
|
||||
const finalSearchOptions: Required<SearchOptions> = {
|
||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||
@@ -701,6 +725,7 @@ export default async function fetchKijijiItems(
|
||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||
priceMin: searchOptions.priceMin as number,
|
||||
priceMax: searchOptions.priceMax as number,
|
||||
cookies: searchOptions.cookies ?? "",
|
||||
};
|
||||
|
||||
const finalListingOptions: Required<ListingFetchOptions> = {
|
||||
@@ -733,6 +758,7 @@ export default async function fetchKijijiItems(
|
||||
);
|
||||
}
|
||||
},
|
||||
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||
});
|
||||
|
||||
const searchResults = parseSearch(searchHtml, BASE_URL);
|
||||
@@ -782,6 +808,7 @@ export default async function fetchKijijiItems(
|
||||
);
|
||||
}
|
||||
},
|
||||
headers: cookieHeader ? { cookie: cookieHeader } : undefined,
|
||||
});
|
||||
const parsed = await parseDetailedListing(
|
||||
html,
|
||||
|
||||
Reference in New Issue
Block a user