fix: preserve default scraper result contracts
This commit is contained in:
@@ -1,5 +1,8 @@
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { UnstableListingModeOptions } from "../types/common";
|
||||
import type {
|
||||
UnstableListingBuckets,
|
||||
UnstableListingModeOptions,
|
||||
} from "../types/common";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
import {
|
||||
type CookieConfig,
|
||||
@@ -352,6 +355,34 @@ async function loadEbayCookies(): Promise<string | undefined> {
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
opts?: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
},
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<EbayListingDetails[]>;
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
opts: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
} | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<EbayListingDetails>>;
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
@@ -366,7 +397,9 @@ export default async function fetchEbayItems(
|
||||
} = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const finalizeResults = (listings: EbayListingDetails[]) => {
|
||||
const finalizeResults = (
|
||||
listings: EbayListingDetails[],
|
||||
): EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails> => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
|
||||
import type {
|
||||
HTMLString,
|
||||
UnstableListingBuckets,
|
||||
UnstableListingModeOptions,
|
||||
} from "../types/common";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
import {
|
||||
type Cookie,
|
||||
@@ -1061,6 +1065,20 @@ export function parseFacebookItem(
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
LOCATION?: string,
|
||||
MAX_ITEMS?: number,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<FacebookListingDetails[]>;
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
LOCATION: string | undefined,
|
||||
MAX_ITEMS: number | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
@@ -1068,16 +1086,14 @@ export default async function fetchFacebookItems(
|
||||
MAX_ITEMS = 25,
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const finalizeResults = (listings: FacebookListingDetails[]) => {
|
||||
const finalizeResults = (
|
||||
listings: FacebookListingDetails[],
|
||||
): FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails> => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings.slice(0, MAX_ITEMS);
|
||||
}
|
||||
|
||||
const classified = classifyUnstableListings(listings);
|
||||
return {
|
||||
results: classified.results.slice(0, MAX_ITEMS),
|
||||
unstableResults: classified.unstableResults,
|
||||
};
|
||||
return classifyUnstableListings(listings.slice(0, MAX_ITEMS));
|
||||
};
|
||||
|
||||
const cookies = await ensureFacebookCookies();
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
|
||||
import type {
|
||||
HTMLString,
|
||||
UnstableListingBuckets,
|
||||
UnstableListingModeOptions,
|
||||
} from "../types/common";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
import {
|
||||
type CookieConfig,
|
||||
@@ -697,6 +701,22 @@ export async function parseDetailedListing(
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
BASE_URL?: string,
|
||||
searchOptions?: SearchOptions,
|
||||
listingOptions?: ListingFetchOptions,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<DetailedListing[]>;
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
BASE_URL: string | undefined,
|
||||
searchOptions: SearchOptions | undefined,
|
||||
listingOptions: ListingFetchOptions | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<DetailedListing>>;
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
@@ -705,7 +725,9 @@ export default async function fetchKijijiItems(
|
||||
listingOptions: ListingFetchOptions = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const finalizeResults = (listings: DetailedListing[]) => {
|
||||
const finalizeResults = (
|
||||
listings: DetailedListing[],
|
||||
): DetailedListing[] | UnstableListingBuckets<DetailedListing> => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,20 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import type { EbayListingDetails } from "../src/scrapers/ebay";
|
||||
import fetchEbayItems from "../src/scrapers/ebay";
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
||||
type _EbayDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
||||
>;
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
const originalWarn = console.warn;
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import {
|
||||
classifyFacebookResponse,
|
||||
type FacebookListingDetails,
|
||||
ensureFacebookCookies,
|
||||
extractFacebookBootstrapCandidates,
|
||||
extractFacebookItemData,
|
||||
@@ -14,6 +15,19 @@ import {
|
||||
import { formatCookiesForHeader } from "../src/utils/cookies";
|
||||
import { formatCentsToCurrency } from "../src/utils/format";
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
|
||||
type _FacebookDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
|
||||
>;
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
@@ -492,17 +506,97 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 1, {
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 25, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [expect.objectContaining({ title: "Stable Chair Listing" })],
|
||||
results: [
|
||||
expect.objectContaining({ title: "Stable Chair Listing" }),
|
||||
expect.objectContaining({ title: "Another Stable Chair" }),
|
||||
],
|
||||
unstableResults: [
|
||||
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
test("unstable mode keeps MAX_ITEMS as the classification boundary", async () => {
|
||||
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||
payload: {
|
||||
resultGroups: [
|
||||
{
|
||||
edges: [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "1",
|
||||
marketplace_listing_title: "Boundary Stable Chair",
|
||||
listing_price: {
|
||||
amount: "100.00",
|
||||
formatted_amount: "CA$100",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "2",
|
||||
marketplace_listing_title: "Boundary Cheap Chair",
|
||||
listing_price: {
|
||||
amount: "50.00",
|
||||
formatted_amount: "CA$50",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "3",
|
||||
marketplace_listing_title: "Past Boundary Chair",
|
||||
listing_price: {
|
||||
amount: "110.00",
|
||||
formatted_amount: "CA$110",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
})}</script></body></html>`;
|
||||
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () => Promise.resolve(mockSearchHtml),
|
||||
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||
headers: {
|
||||
get: () => null,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
const results = await fetchFacebookItems("chair", 1, "toronto", 2, {
|
||||
hideUnstableResults: true,
|
||||
});
|
||||
|
||||
expect(results).toEqual({
|
||||
results: [expect.objectContaining({ title: "Boundary Stable Chair" })],
|
||||
unstableResults: [
|
||||
expect.objectContaining({ title: "Boundary Cheap Chair" }),
|
||||
],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("Data Extraction", () => {
|
||||
|
||||
@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import {
|
||||
buildSearchUrl,
|
||||
default as fetchKijijiItems,
|
||||
type DetailedListing,
|
||||
NetworkError,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
@@ -10,6 +11,19 @@ import {
|
||||
ValidationError,
|
||||
} from "../src/scrapers/kijiji";
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||
? true
|
||||
: false
|
||||
: false;
|
||||
|
||||
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
|
||||
type _KijijiDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
|
||||
>;
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
beforeEach(() => {
|
||||
|
||||
Reference in New Issue
Block a user