fix: preserve default scraper result contracts

This commit is contained in:
2026-04-22 23:30:17 -04:00
parent 1ee41fb346
commit c7fc8352ac
6 changed files with 206 additions and 13 deletions

View File

@@ -1,5 +1,8 @@
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import type { UnstableListingModeOptions } from "../types/common"; import type {
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable"; import { classifyUnstableListings } from "../utils/unstable";
import { import {
type CookieConfig, type CookieConfig,
@@ -352,6 +355,34 @@ async function loadEbayCookies(): Promise<string | undefined> {
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
opts?: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
},
unstableMode?: UnstableListingModeOptions,
): Promise<EbayListingDetails[]>;
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
opts: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
} | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<EbayListingDetails>>;
export default async function fetchEbayItems( export default async function fetchEbayItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
@@ -366,7 +397,9 @@ export default async function fetchEbayItems(
} = {}, } = {},
unstableMode: UnstableListingModeOptions = {}, unstableMode: UnstableListingModeOptions = {},
) { ) {
const finalizeResults = (listings: EbayListingDetails[]) => { const finalizeResults = (
listings: EbayListingDetails[],
): EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails> => {
if (!unstableMode.hideUnstableResults) { if (!unstableMode.hideUnstableResults) {
return listings; return listings;
} }

View File

@@ -1,6 +1,10 @@
import cliProgress from "cli-progress"; import cliProgress from "cli-progress";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import type { HTMLString, UnstableListingModeOptions } from "../types/common"; import type {
HTMLString,
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable"; import { classifyUnstableListings } from "../utils/unstable";
import { import {
type Cookie, type Cookie,
@@ -1061,6 +1065,20 @@ export function parseFacebookItem(
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
LOCATION?: string,
MAX_ITEMS?: number,
unstableMode?: UnstableListingModeOptions,
): Promise<FacebookListingDetails[]>;
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
LOCATION: string | undefined,
MAX_ITEMS: number | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
export default async function fetchFacebookItems( export default async function fetchFacebookItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
@@ -1068,16 +1086,14 @@ export default async function fetchFacebookItems(
MAX_ITEMS = 25, MAX_ITEMS = 25,
unstableMode: UnstableListingModeOptions = {}, unstableMode: UnstableListingModeOptions = {},
) { ) {
const finalizeResults = (listings: FacebookListingDetails[]) => { const finalizeResults = (
listings: FacebookListingDetails[],
): FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails> => {
if (!unstableMode.hideUnstableResults) { if (!unstableMode.hideUnstableResults) {
return listings.slice(0, MAX_ITEMS); return listings.slice(0, MAX_ITEMS);
} }
const classified = classifyUnstableListings(listings); return classifyUnstableListings(listings.slice(0, MAX_ITEMS));
return {
results: classified.results.slice(0, MAX_ITEMS),
unstableResults: classified.unstableResults,
};
}; };
const cookies = await ensureFacebookCookies(); const cookies = await ensureFacebookCookies();

View File

@@ -1,7 +1,11 @@
import cliProgress from "cli-progress"; import cliProgress from "cli-progress";
import { parseHTML } from "linkedom"; import { parseHTML } from "linkedom";
import unidecode from "unidecode"; import unidecode from "unidecode";
import type { HTMLString, UnstableListingModeOptions } from "../types/common"; import type {
HTMLString,
UnstableListingBuckets,
UnstableListingModeOptions,
} from "../types/common";
import { classifyUnstableListings } from "../utils/unstable"; import { classifyUnstableListings } from "../utils/unstable";
import { import {
type CookieConfig, type CookieConfig,
@@ -697,6 +701,22 @@ export async function parseDetailedListing(
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
BASE_URL?: string,
searchOptions?: SearchOptions,
listingOptions?: ListingFetchOptions,
unstableMode?: UnstableListingModeOptions,
): Promise<DetailedListing[]>;
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined,
BASE_URL: string | undefined,
searchOptions: SearchOptions | undefined,
listingOptions: ListingFetchOptions | undefined,
unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<DetailedListing>>;
export default async function fetchKijijiItems( export default async function fetchKijijiItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
@@ -705,7 +725,9 @@ export default async function fetchKijijiItems(
listingOptions: ListingFetchOptions = {}, listingOptions: ListingFetchOptions = {},
unstableMode: UnstableListingModeOptions = {}, unstableMode: UnstableListingModeOptions = {},
) { ) {
const finalizeResults = (listings: DetailedListing[]) => { const finalizeResults = (
listings: DetailedListing[],
): DetailedListing[] | UnstableListingBuckets<DetailedListing> => {
if (!unstableMode.hideUnstableResults) { if (!unstableMode.hideUnstableResults) {
return listings; return listings;
} }

View File

@@ -1,6 +1,20 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import type { EbayListingDetails } from "../src/scrapers/ebay";
import fetchEbayItems from "../src/scrapers/ebay"; import fetchEbayItems from "../src/scrapers/ebay";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
type _EbayDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
>;
const originalFetch = global.fetch; const originalFetch = global.fetch;
const originalWarn = console.warn; const originalWarn = console.warn;

View File

@@ -1,6 +1,7 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import { import {
classifyFacebookResponse, classifyFacebookResponse,
type FacebookListingDetails,
ensureFacebookCookies, ensureFacebookCookies,
extractFacebookBootstrapCandidates, extractFacebookBootstrapCandidates,
extractFacebookItemData, extractFacebookItemData,
@@ -14,6 +15,19 @@ import {
import { formatCookiesForHeader } from "../src/utils/cookies"; import { formatCookiesForHeader } from "../src/utils/cookies";
import { formatCentsToCurrency } from "../src/utils/format"; import { formatCentsToCurrency } from "../src/utils/format";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
type _FacebookDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
>;
// Mock fetch globally // Mock fetch globally
const originalFetch = global.fetch; const originalFetch = global.fetch;
@@ -492,17 +506,97 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}), }),
); );
const results = await fetchFacebookItems("chair", 1, "toronto", 1, { const results = await fetchFacebookItems("chair", 1, "toronto", 25, {
hideUnstableResults: true, hideUnstableResults: true,
}); });
expect(results).toEqual({ expect(results).toEqual({
results: [expect.objectContaining({ title: "Stable Chair Listing" })], results: [
expect.objectContaining({ title: "Stable Chair Listing" }),
expect.objectContaining({ title: "Another Stable Chair" }),
],
unstableResults: [ unstableResults: [
expect.objectContaining({ title: "Suspiciously Cheap Chair" }), expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
], ],
}); });
}); });
test("unstable mode keeps MAX_ITEMS as the classification boundary", async () => {
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
payload: {
resultGroups: [
{
edges: [
{
node: {
listing: {
id: "1",
marketplace_listing_title: "Boundary Stable Chair",
listing_price: {
amount: "100.00",
formatted_amount: "CA$100",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "2",
marketplace_listing_title: "Boundary Cheap Chair",
listing_price: {
amount: "50.00",
formatted_amount: "CA$50",
currency: "CAD",
},
is_live: true,
},
},
},
{
node: {
listing: {
id: "3",
marketplace_listing_title: "Past Boundary Chair",
listing_price: {
amount: "110.00",
formatted_amount: "CA$110",
currency: "CAD",
},
is_live: true,
},
},
},
],
},
],
},
})}</script></body></html>`;
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () => Promise.resolve(mockSearchHtml),
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
headers: {
get: () => null,
},
}),
);
const results = await fetchFacebookItems("chair", 1, "toronto", 2, {
hideUnstableResults: true,
});
expect(results).toEqual({
results: [expect.objectContaining({ title: "Boundary Stable Chair" })],
unstableResults: [
expect.objectContaining({ title: "Boundary Cheap Chair" }),
],
});
});
}); });
describe("Data Extraction", () => { describe("Data Extraction", () => {

View File

@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import { import {
buildSearchUrl, buildSearchUrl,
default as fetchKijijiItems, default as fetchKijijiItems,
type DetailedListing,
NetworkError, NetworkError,
ParseError, ParseError,
RateLimitError, RateLimitError,
@@ -10,6 +11,19 @@ import {
ValidationError, ValidationError,
} from "../src/scrapers/kijiji"; } from "../src/scrapers/kijiji";
type Assert<T extends true> = T;
type IsExact<T, U> =
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
? true
: false
: false;
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
type _KijijiDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
>;
const originalFetch = global.fetch; const originalFetch = global.fetch;
beforeEach(() => { beforeEach(() => {