fix: preserve default scraper result contracts

This commit is contained in:
2026-04-22 23:30:17 -04:00
parent 1ee41fb346
commit c7fc8352ac
6 changed files with 206 additions and 13 deletions

View File

@@ -1,5 +1,8 @@
import { parseHTML } from "linkedom";
import type { UnstableListingModeOptions } from "../types/common";
import type {
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable";
import {
type CookieConfig,
@@ -352,6 +355,34 @@ async function loadEbayCookies(): Promise<string | undefined> {
// ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
opts?: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
},
unstableMode?: UnstableListingModeOptions,
): Promise<EbayListingDetails[]>;
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
opts: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
} | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<EbayListingDetails>>;
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
@@ -366,7 +397,9 @@ export default async function fetchEbayItems(
} = {},
unstableMode: UnstableListingModeOptions = {},
) {
const finalizeResults = (listings: EbayListingDetails[]) => {
const finalizeResults = (
listings: EbayListingDetails[],
): EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails> => {
if (!unstableMode.hideUnstableResults) {
return listings;
}

View File

@@ -1,6 +1,10 @@
import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
import type {
HTMLString,
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable";
import {
type Cookie,
@@ -1061,6 +1065,20 @@ export function parseFacebookItem(
// ----------------------------- Main -----------------------------
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
LOCATION?: string,
MAX_ITEMS?: number,
unstableMode?: UnstableListingModeOptions,
): Promise<FacebookListingDetails[]>;
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
LOCATION: string | undefined,
MAX_ITEMS: number | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
@@ -1068,16 +1086,14 @@ export default async function fetchFacebookItems(
MAX_ITEMS = 25,
unstableMode: UnstableListingModeOptions = {},
) {
const finalizeResults = (listings: FacebookListingDetails[]) => {
const finalizeResults = (
listings: FacebookListingDetails[],
): FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails> => {
if (!unstableMode.hideUnstableResults) {
return listings.slice(0, MAX_ITEMS);
}
const classified = classifyUnstableListings(listings);
return {
results: classified.results.slice(0, MAX_ITEMS),
unstableResults: classified.unstableResults,
};
return classifyUnstableListings(listings.slice(0, MAX_ITEMS));
};
const cookies = await ensureFacebookCookies();

View File

@@ -1,7 +1,11 @@
import cliProgress from "cli-progress";
import { parseHTML } from "linkedom";
import unidecode from "unidecode";
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
import type {
HTMLString,
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable";
import {
type CookieConfig,
@@ -697,6 +701,22 @@ export async function parseDetailedListing(
// ----------------------------- Main -----------------------------
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
BASE_URL?: string,
searchOptions?: SearchOptions,
listingOptions?: ListingFetchOptions,
unstableMode?: UnstableListingModeOptions,
): Promise<DetailedListing[]>;
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
BASE_URL: string | undefined,
searchOptions: SearchOptions | undefined,
listingOptions: ListingFetchOptions | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<DetailedListing>>;
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1,
@@ -705,7 +725,9 @@ export default async function fetchKijijiItems(
listingOptions: ListingFetchOptions = {},
unstableMode: UnstableListingModeOptions = {},
) {
const finalizeResults = (listings: DetailedListing[]) => {
const finalizeResults = (
listings: DetailedListing[],
): DetailedListing[] | UnstableListingBuckets<DetailedListing> => {
if (!unstableMode.hideUnstableResults) {
return listings;
}

View File

@@ -1,6 +1,20 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import type { EbayListingDetails } from "../src/scrapers/ebay";
import fetchEbayItems from "../src/scrapers/ebay";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
type _EbayDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
>;
const originalFetch = global.fetch;
const originalWarn = console.warn;

View File

@@ -1,6 +1,7 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
classifyFacebookResponse,
type FacebookListingDetails,
ensureFacebookCookies,
extractFacebookBootstrapCandidates,
extractFacebookItemData,
@@ -14,6 +15,19 @@ import {
import { formatCookiesForHeader } from "../src/utils/cookies";
import { formatCentsToCurrency } from "../src/utils/format";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
type _FacebookDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
>;
// Mock fetch globally
const originalFetch = global.fetch;
@@ -492,17 +506,97 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}),
);
const results = await fetchFacebookItems("chair", 1, "toronto", 1, {
const results = await fetchFacebookItems("chair", 1, "toronto", 25, {
hideUnstableResults: true,
});
expect(results).toEqual({
results: [expect.objectContaining({ title: "Stable Chair Listing" })],
results: [
expect.objectContaining({ title: "Stable Chair Listing" }),
expect.objectContaining({ title: "Another Stable Chair" }),
],
unstableResults: [
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
],
});
});
test("unstable mode keeps MAX_ITEMS as the classification boundary", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Boundary Stable Chair",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Boundary Cheap Chair",
listing_price: {
amount: "50.00",
formatted_amount: "CA$50",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Past Boundary Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
})}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems("chair", 1, "toronto", 2, {
hideUnstableResults: true,
});
expect(results).toEqual({
results: [expect.objectContaining({ title: "Boundary Stable Chair" })],
unstableResults: [
expect.objectContaining({ title: "Boundary Cheap Chair" }),
],
});
});
});
describe("Data Extraction", () => {

View File

@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import {
buildSearchUrl,
default as fetchKijijiItems,
type DetailedListing,
NetworkError,
ParseError,
RateLimitError,
@@ -10,6 +11,19 @@ import {
ValidationError,
} from "../src/scrapers/kijiji";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
type _KijijiDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
>;
const originalFetch = global.fetch;
beforeEach(() => {