From 1ee41fb34615e08c61723ccfb44972d6b775828f Mon Sep 17 00:00:00 2001 From: Dmytro Stanchiev Date: Wed, 22 Apr 2026 23:23:31 -0400 Subject: [PATCH] feat: add unstable mode to scraper results --- packages/core/src/scrapers/ebay.ts | 15 ++- packages/core/src/scrapers/facebook.ts | 28 +++-- packages/core/src/scrapers/kijiji.ts | 14 ++- packages/core/test/ebay-core.test.ts | 45 ++++++++ packages/core/test/facebook-core.test.ts | 138 +++++++++++++++++++++++ packages/core/test/kijiji-core.test.ts | 136 +++++++++++++++++++++- 6 files changed, 364 insertions(+), 12 deletions(-) diff --git a/packages/core/src/scrapers/ebay.ts b/packages/core/src/scrapers/ebay.ts index b7fae76..aa6cabc 100644 --- a/packages/core/src/scrapers/ebay.ts +++ b/packages/core/src/scrapers/ebay.ts @@ -1,4 +1,6 @@ import { parseHTML } from "linkedom"; +import type { UnstableListingModeOptions } from "../types/common"; +import { classifyUnstableListings } from "../utils/unstable"; import { type CookieConfig, ensureCookies, @@ -362,7 +364,16 @@ export default async function fetchEbayItems( buyItNowOnly?: boolean; canadaOnly?: boolean; } = {}, + unstableMode: UnstableListingModeOptions = {}, ) { + const finalizeResults = (listings: EbayListingDetails[]) => { + if (!unstableMode.hideUnstableResults) { + return listings; + } + + return classifyUnstableListings(listings); + }; + const { minPrice = 0, maxPrice = Number.MAX_SAFE_INTEGER, @@ -452,13 +463,13 @@ export default async function fetchEbayItems( }); console.log(`Parsed ${filteredListings.length} eBay listings.`); - return filteredListings; + return finalizeResults(filteredListings); } catch (err) { if (err instanceof HttpError) { console.error( `Failed to fetch eBay search (${err.status}): ${err.message}`, ); - return []; + return finalizeResults([]); } throw err; } diff --git a/packages/core/src/scrapers/facebook.ts b/packages/core/src/scrapers/facebook.ts index b177dda..187884c 100644 --- a/packages/core/src/scrapers/facebook.ts +++ b/packages/core/src/scrapers/facebook.ts @@ -1,6 +1,7 @@ import cliProgress from "cli-progress"; import { parseHTML } from "linkedom"; -import type { HTMLString } from "../types/common"; +import type { HTMLString, UnstableListingModeOptions } from "../types/common"; +import { classifyUnstableListings } from "../utils/unstable"; import { type Cookie, type CookieConfig, @@ -1065,7 +1066,20 @@ export default async function fetchFacebookItems( REQUESTS_PER_SECOND = 1, LOCATION = "toronto", MAX_ITEMS = 25, + unstableMode: UnstableListingModeOptions = {}, ) { + const finalizeResults = (listings: FacebookListingDetails[]) => { + if (!unstableMode.hideUnstableResults) { + return listings.slice(0, MAX_ITEMS); + } + + const classified = classifyUnstableListings(listings); + return { + results: classified.results.slice(0, MAX_ITEMS), + unstableResults: classified.unstableResults, + }; + }; + const cookies = await ensureFacebookCookies(); // Format cookies for HTTP header @@ -1114,7 +1128,7 @@ export default async function fetchFacebookItems( "This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.", ); } - return []; + return finalizeResults([]); } throw err; } @@ -1122,25 +1136,25 @@ export default async function fetchFacebookItems( const classification = classifyFacebookResponse(searchHtml, searchResponseUrl); if (classification.authGated) { console.warn("Facebook marketplace search redirected to login. Cookies may be expired."); - return []; + return finalizeResults([]); } if (classification.unavailable) { console.warn("Facebook marketplace search returned an unavailable route."); - return []; + return finalizeResults([]); } if (classification.kind !== "search") { console.warn( `Facebook marketplace search returned unexpected route kind: ${classification.kind}.`, ); - return []; + return finalizeResults([]); } const ads = extractFacebookMarketplaceData(searchHtml); if (!ads || ads.length === 0) { console.warn("No ads parsed from Facebook marketplace page."); - return []; + return finalizeResults([]); } console.log(`\nFound ${ads.length} raw ads. Processing...`); @@ -1164,7 +1178,7 @@ export default async function fetchFacebookItems( progressBar.stop(); console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`); - return pricedItems.slice(0, MAX_ITEMS); // Limit results + return finalizeResults(pricedItems); } /** diff --git a/packages/core/src/scrapers/kijiji.ts b/packages/core/src/scrapers/kijiji.ts index d6d28fd..f0ba0c4 100644 --- a/packages/core/src/scrapers/kijiji.ts +++ b/packages/core/src/scrapers/kijiji.ts @@ -1,7 +1,8 @@ import cliProgress from "cli-progress"; import { parseHTML } from "linkedom"; import unidecode from "unidecode"; -import type { HTMLString } from "../types/common"; +import type { HTMLString, UnstableListingModeOptions } from "../types/common"; +import { classifyUnstableListings } from "../utils/unstable"; import { type CookieConfig, formatCookiesForHeader, @@ -702,7 +703,16 @@ export default async function fetchKijijiItems( BASE_URL = "https://www.kijiji.ca", searchOptions: SearchOptions = {}, listingOptions: ListingFetchOptions = {}, + unstableMode: UnstableListingModeOptions = {}, ) { + const finalizeResults = (listings: DetailedListing[]) => { + if (!unstableMode.hideUnstableResults) { + return listings; + } + + return classifyUnstableListings(listings); + }; + const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND)); // Load Kijiji cookies (optional - helps bypass bot detection) @@ -860,7 +870,7 @@ export default async function fetchKijijiItems( } console.log(`\nParsed ${allListings.length} detailed listings.`); - return allListings; + return finalizeResults(allListings); } // Re-export error classes for convenience diff --git a/packages/core/test/ebay-core.test.ts b/packages/core/test/ebay-core.test.ts index 64c8e6f..f9542fd 100644 --- a/packages/core/test/ebay-core.test.ts +++ b/packages/core/test/ebay-core.test.ts @@ -38,4 +38,49 @@ describe("eBay Scraper Cookie Handling", () => { "No valid eBay cookies found in EBAY_COOKIE. eBay may block requests without a raw Cookie header string.", ); }); + + test("returns results and unstableResults when unstable mode is enabled", async () => { + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => + Promise.resolve(` + +
  • + +

    Stable Laptop Bundle

    + CA $100.00 +
  • +
  • + +

    Another Laptop Bundle

    + CA $110.00 +
  • +
  • + +

    Cheap Laptop Bundle

    + CA $70.00 +
  • + + `), + }), + ) as typeof fetch; + + const results = await fetchEbayItems( + "laptop", + 1000, + {}, + { hideUnstableResults: true }, + ); + + expect(results).toEqual({ + results: [ + expect.objectContaining({ title: "Stable Laptop Bundle" }), + expect.objectContaining({ title: "Another Laptop Bundle" }), + ], + unstableResults: [ + expect.objectContaining({ title: "Cheap Laptop Bundle" }), + ], + }); + }); }); diff --git a/packages/core/test/facebook-core.test.ts b/packages/core/test/facebook-core.test.ts index 11d0f2e..1662054 100644 --- a/packages/core/test/facebook-core.test.ts +++ b/packages/core/test/facebook-core.test.ts @@ -5,6 +5,7 @@ import { extractFacebookBootstrapCandidates, extractFacebookItemData, extractFacebookMarketplaceData, + default as fetchFacebookItems, fetchFacebookItem, parseFacebookAds, parseFacebookCookieString, @@ -367,6 +368,143 @@ describe("Facebook Marketplace Scraper Core Tests", () => { }); }); + describe("fetchFacebookItems", () => { + let previousCookie: string | undefined; + + beforeEach(() => { + previousCookie = process.env.FACEBOOK_COOKIE; + process.env.FACEBOOK_COOKIE = "c_user=12345; xs=abc123"; + }); + + afterEach(() => { + if (previousCookie === undefined) { + delete process.env.FACEBOOK_COOKIE; + } else { + process.env.FACEBOOK_COOKIE = previousCookie; + } + }); + + test("returns an array by default", async () => { + const mockSearchHtml = ``; + + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => Promise.resolve(mockSearchHtml), + url: "https://www.facebook.com/marketplace/toronto/search?query=chair", + headers: { + get: () => null, + }, + }), + ); + + const results = await fetchFacebookItems("chair", 1, "toronto", 25); + + expect(Array.isArray(results)).toBe(true); + expect(results).toHaveLength(1); + }); + + test("returns results and unstableResults when unstable mode is enabled", async () => { + const mockSearchHtml = ``; + + global.fetch = mock(() => + Promise.resolve({ + ok: true, + text: () => Promise.resolve(mockSearchHtml), + url: "https://www.facebook.com/marketplace/toronto/search?query=chair", + headers: { + get: () => null, + }, + }), + ); + + const results = await fetchFacebookItems("chair", 1, "toronto", 1, { + hideUnstableResults: true, + }); + + expect(results).toEqual({ + results: [expect.objectContaining({ title: "Stable Chair Listing" })], + unstableResults: [ + expect.objectContaining({ title: "Suspiciously Cheap Chair" }), + ], + }); + }); + }); + describe("Data Extraction", () => { describe("extractFacebookItemData", () => { test("extracts item details from Comet permalink bootstrap candidates", () => { diff --git a/packages/core/test/kijiji-core.test.ts b/packages/core/test/kijiji-core.test.ts index a48c8f6..760720c 100644 --- a/packages/core/test/kijiji-core.test.ts +++ b/packages/core/test/kijiji-core.test.ts @@ -1,6 +1,7 @@ -import { describe, expect, test } from "bun:test"; +import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { buildSearchUrl, + default as fetchKijijiItems, NetworkError, ParseError, RateLimitError, @@ -9,6 +10,18 @@ import { ValidationError, } from "../src/scrapers/kijiji"; +const originalFetch = global.fetch; + +beforeEach(() => { + global.fetch = mock(() => { + throw new Error("fetch should be mocked in individual tests"); + }); +}); + +afterEach(() => { + global.fetch = originalFetch; +}); + describe("Location and Category Resolution", () => { describe("resolveLocationId", () => { test("should return numeric IDs as-is", () => { @@ -155,3 +168,124 @@ describe("Error Classes", () => { expect(error.name).toBe("ValidationError"); }); }); + +describe("fetchKijijiItems", () => { + test("returns results and unstableResults when unstable mode is enabled", async () => { + const searchHtml = ` + + + + `; + + const listingHtml = (title: string, amount: number, slug: string) => ` + + + + `; + + global.fetch = mock((input: string | URL | Request) => { + const url = typeof input === "string" ? input : input.toString(); + + if (url.includes("/b-buy-sell/")) { + return Promise.resolve({ + ok: true, + text: () => Promise.resolve(searchHtml), + headers: { get: () => null }, + url, + }); + } + + if (url.endsWith("/v-stable-one/k0l0")) { + return Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + listingHtml("Stable Listing One", 10000, "v-stable-one/k0l0"), + ), + headers: { get: () => null }, + url, + }); + } + + if (url.endsWith("/v-stable-two/k0l0")) { + return Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + listingHtml("Stable Listing Two", 11000, "v-stable-two/k0l0"), + ), + headers: { get: () => null }, + url, + }); + } + + if (url.endsWith("/v-unstable/k0l0")) { + return Promise.resolve({ + ok: true, + text: () => + Promise.resolve( + listingHtml("Unstable Listing", 7000, "v-unstable/k0l0"), + ), + headers: { get: () => null }, + url, + }); + } + + throw new Error(`Unexpected URL: ${url}`); + }) as typeof fetch; + + const results = await fetchKijijiItems( + "phone", + 1000, + "https://www.kijiji.ca", + { maxPages: 1 }, + {}, + { hideUnstableResults: true }, + ); + + expect(results).toEqual({ + results: [ + expect.objectContaining({ title: "Stable Listing One" }), + expect.objectContaining({ title: "Stable Listing Two" }), + ], + unstableResults: [expect.objectContaining({ title: "Unstable Listing" })], + }); + }); +});