fix: preserve default scraper result contracts
This commit is contained in:
@@ -1,5 +1,8 @@
|
|||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import type { UnstableListingModeOptions } from "../types/common";
|
import type {
|
||||||
|
UnstableListingBuckets,
|
||||||
|
UnstableListingModeOptions,
|
||||||
|
} from "../types/common";
|
||||||
import { classifyUnstableListings } from "../utils/unstable";
|
import { classifyUnstableListings } from "../utils/unstable";
|
||||||
import {
|
import {
|
||||||
type CookieConfig,
|
type CookieConfig,
|
||||||
@@ -352,6 +355,34 @@ async function loadEbayCookies(): Promise<string | undefined> {
|
|||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
|
export default async function fetchEbayItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND?: number,
|
||||||
|
opts?: {
|
||||||
|
minPrice?: number;
|
||||||
|
maxPrice?: number;
|
||||||
|
strictMode?: boolean;
|
||||||
|
exclusions?: string[];
|
||||||
|
keywords?: string[];
|
||||||
|
buyItNowOnly?: boolean;
|
||||||
|
canadaOnly?: boolean;
|
||||||
|
},
|
||||||
|
unstableMode?: UnstableListingModeOptions,
|
||||||
|
): Promise<EbayListingDetails[]>;
|
||||||
|
export default async function fetchEbayItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND: number | undefined,
|
||||||
|
opts: {
|
||||||
|
minPrice?: number;
|
||||||
|
maxPrice?: number;
|
||||||
|
strictMode?: boolean;
|
||||||
|
exclusions?: string[];
|
||||||
|
keywords?: string[];
|
||||||
|
buyItNowOnly?: boolean;
|
||||||
|
canadaOnly?: boolean;
|
||||||
|
} | undefined,
|
||||||
|
unstableMode: { hideUnstableResults: true },
|
||||||
|
): Promise<UnstableListingBuckets<EbayListingDetails>>;
|
||||||
export default async function fetchEbayItems(
|
export default async function fetchEbayItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND = 1,
|
REQUESTS_PER_SECOND = 1,
|
||||||
@@ -366,7 +397,9 @@ export default async function fetchEbayItems(
|
|||||||
} = {},
|
} = {},
|
||||||
unstableMode: UnstableListingModeOptions = {},
|
unstableMode: UnstableListingModeOptions = {},
|
||||||
) {
|
) {
|
||||||
const finalizeResults = (listings: EbayListingDetails[]) => {
|
const finalizeResults = (
|
||||||
|
listings: EbayListingDetails[],
|
||||||
|
): EbayListingDetails[] | UnstableListingBuckets<EbayListingDetails> => {
|
||||||
if (!unstableMode.hideUnstableResults) {
|
if (!unstableMode.hideUnstableResults) {
|
||||||
return listings;
|
return listings;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
import cliProgress from "cli-progress";
|
import cliProgress from "cli-progress";
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
|
import type {
|
||||||
|
HTMLString,
|
||||||
|
UnstableListingBuckets,
|
||||||
|
UnstableListingModeOptions,
|
||||||
|
} from "../types/common";
|
||||||
import { classifyUnstableListings } from "../utils/unstable";
|
import { classifyUnstableListings } from "../utils/unstable";
|
||||||
import {
|
import {
|
||||||
type Cookie,
|
type Cookie,
|
||||||
@@ -1061,6 +1065,20 @@ export function parseFacebookItem(
|
|||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
|
export default async function fetchFacebookItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND?: number,
|
||||||
|
LOCATION?: string,
|
||||||
|
MAX_ITEMS?: number,
|
||||||
|
unstableMode?: UnstableListingModeOptions,
|
||||||
|
): Promise<FacebookListingDetails[]>;
|
||||||
|
export default async function fetchFacebookItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND: number | undefined,
|
||||||
|
LOCATION: string | undefined,
|
||||||
|
MAX_ITEMS: number | undefined,
|
||||||
|
unstableMode: { hideUnstableResults: true },
|
||||||
|
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
|
||||||
export default async function fetchFacebookItems(
|
export default async function fetchFacebookItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND = 1,
|
REQUESTS_PER_SECOND = 1,
|
||||||
@@ -1068,16 +1086,14 @@ export default async function fetchFacebookItems(
|
|||||||
MAX_ITEMS = 25,
|
MAX_ITEMS = 25,
|
||||||
unstableMode: UnstableListingModeOptions = {},
|
unstableMode: UnstableListingModeOptions = {},
|
||||||
) {
|
) {
|
||||||
const finalizeResults = (listings: FacebookListingDetails[]) => {
|
const finalizeResults = (
|
||||||
|
listings: FacebookListingDetails[],
|
||||||
|
): FacebookListingDetails[] | UnstableListingBuckets<FacebookListingDetails> => {
|
||||||
if (!unstableMode.hideUnstableResults) {
|
if (!unstableMode.hideUnstableResults) {
|
||||||
return listings.slice(0, MAX_ITEMS);
|
return listings.slice(0, MAX_ITEMS);
|
||||||
}
|
}
|
||||||
|
|
||||||
const classified = classifyUnstableListings(listings);
|
return classifyUnstableListings(listings.slice(0, MAX_ITEMS));
|
||||||
return {
|
|
||||||
results: classified.results.slice(0, MAX_ITEMS),
|
|
||||||
unstableResults: classified.unstableResults,
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const cookies = await ensureFacebookCookies();
|
const cookies = await ensureFacebookCookies();
|
||||||
|
|||||||
@@ -1,7 +1,11 @@
|
|||||||
import cliProgress from "cli-progress";
|
import cliProgress from "cli-progress";
|
||||||
import { parseHTML } from "linkedom";
|
import { parseHTML } from "linkedom";
|
||||||
import unidecode from "unidecode";
|
import unidecode from "unidecode";
|
||||||
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
|
import type {
|
||||||
|
HTMLString,
|
||||||
|
UnstableListingBuckets,
|
||||||
|
UnstableListingModeOptions,
|
||||||
|
} from "../types/common";
|
||||||
import { classifyUnstableListings } from "../utils/unstable";
|
import { classifyUnstableListings } from "../utils/unstable";
|
||||||
import {
|
import {
|
||||||
type CookieConfig,
|
type CookieConfig,
|
||||||
@@ -697,6 +701,22 @@ export async function parseDetailedListing(
|
|||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
|
export default async function fetchKijijiItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND?: number,
|
||||||
|
BASE_URL?: string,
|
||||||
|
searchOptions?: SearchOptions,
|
||||||
|
listingOptions?: ListingFetchOptions,
|
||||||
|
unstableMode?: UnstableListingModeOptions,
|
||||||
|
): Promise<DetailedListing[]>;
|
||||||
|
export default async function fetchKijijiItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND: number | undefined,
|
||||||
|
BASE_URL: string | undefined,
|
||||||
|
searchOptions: SearchOptions | undefined,
|
||||||
|
listingOptions: ListingFetchOptions | undefined,
|
||||||
|
unstableMode: { hideUnstableResults: true },
|
||||||
|
): Promise<UnstableListingBuckets<DetailedListing>>;
|
||||||
export default async function fetchKijijiItems(
|
export default async function fetchKijijiItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND = 1,
|
REQUESTS_PER_SECOND = 1,
|
||||||
@@ -705,7 +725,9 @@ export default async function fetchKijijiItems(
|
|||||||
listingOptions: ListingFetchOptions = {},
|
listingOptions: ListingFetchOptions = {},
|
||||||
unstableMode: UnstableListingModeOptions = {},
|
unstableMode: UnstableListingModeOptions = {},
|
||||||
) {
|
) {
|
||||||
const finalizeResults = (listings: DetailedListing[]) => {
|
const finalizeResults = (
|
||||||
|
listings: DetailedListing[],
|
||||||
|
): DetailedListing[] | UnstableListingBuckets<DetailedListing> => {
|
||||||
if (!unstableMode.hideUnstableResults) {
|
if (!unstableMode.hideUnstableResults) {
|
||||||
return listings;
|
return listings;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,20 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||||
|
import type { EbayListingDetails } from "../src/scrapers/ebay";
|
||||||
import fetchEbayItems from "../src/scrapers/ebay";
|
import fetchEbayItems from "../src/scrapers/ebay";
|
||||||
|
|
||||||
|
type Assert<T extends true> = T;
|
||||||
|
type IsExact<T, U> =
|
||||||
|
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||||
|
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||||
|
? true
|
||||||
|
: false
|
||||||
|
: false;
|
||||||
|
|
||||||
|
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
||||||
|
type _EbayDefaultReturn = Assert<
|
||||||
|
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
||||||
|
>;
|
||||||
|
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
const originalWarn = console.warn;
|
const originalWarn = console.warn;
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||||
import {
|
import {
|
||||||
classifyFacebookResponse,
|
classifyFacebookResponse,
|
||||||
|
type FacebookListingDetails,
|
||||||
ensureFacebookCookies,
|
ensureFacebookCookies,
|
||||||
extractFacebookBootstrapCandidates,
|
extractFacebookBootstrapCandidates,
|
||||||
extractFacebookItemData,
|
extractFacebookItemData,
|
||||||
@@ -14,6 +15,19 @@ import {
|
|||||||
import { formatCookiesForHeader } from "../src/utils/cookies";
|
import { formatCookiesForHeader } from "../src/utils/cookies";
|
||||||
import { formatCentsToCurrency } from "../src/utils/format";
|
import { formatCentsToCurrency } from "../src/utils/format";
|
||||||
|
|
||||||
|
type Assert<T extends true> = T;
|
||||||
|
type IsExact<T, U> =
|
||||||
|
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||||
|
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||||
|
? true
|
||||||
|
: false
|
||||||
|
: false;
|
||||||
|
|
||||||
|
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
|
||||||
|
type _FacebookDefaultReturn = Assert<
|
||||||
|
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
|
||||||
|
>;
|
||||||
|
|
||||||
// Mock fetch globally
|
// Mock fetch globally
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
|
|
||||||
@@ -492,17 +506,97 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
const results = await fetchFacebookItems("chair", 1, "toronto", 1, {
|
const results = await fetchFacebookItems("chair", 1, "toronto", 25, {
|
||||||
hideUnstableResults: true,
|
hideUnstableResults: true,
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(results).toEqual({
|
expect(results).toEqual({
|
||||||
results: [expect.objectContaining({ title: "Stable Chair Listing" })],
|
results: [
|
||||||
|
expect.objectContaining({ title: "Stable Chair Listing" }),
|
||||||
|
expect.objectContaining({ title: "Another Stable Chair" }),
|
||||||
|
],
|
||||||
unstableResults: [
|
unstableResults: [
|
||||||
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
|
expect.objectContaining({ title: "Suspiciously Cheap Chair" }),
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("unstable mode keeps MAX_ITEMS as the classification boundary", async () => {
|
||||||
|
const mockSearchHtml = `<html><body><script>"XCometMarketplaceSearchController"</script><script>${JSON.stringify({
|
||||||
|
payload: {
|
||||||
|
resultGroups: [
|
||||||
|
{
|
||||||
|
edges: [
|
||||||
|
{
|
||||||
|
node: {
|
||||||
|
listing: {
|
||||||
|
id: "1",
|
||||||
|
marketplace_listing_title: "Boundary Stable Chair",
|
||||||
|
listing_price: {
|
||||||
|
amount: "100.00",
|
||||||
|
formatted_amount: "CA$100",
|
||||||
|
currency: "CAD",
|
||||||
|
},
|
||||||
|
is_live: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
node: {
|
||||||
|
listing: {
|
||||||
|
id: "2",
|
||||||
|
marketplace_listing_title: "Boundary Cheap Chair",
|
||||||
|
listing_price: {
|
||||||
|
amount: "50.00",
|
||||||
|
formatted_amount: "CA$50",
|
||||||
|
currency: "CAD",
|
||||||
|
},
|
||||||
|
is_live: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
node: {
|
||||||
|
listing: {
|
||||||
|
id: "3",
|
||||||
|
marketplace_listing_title: "Past Boundary Chair",
|
||||||
|
listing_price: {
|
||||||
|
amount: "110.00",
|
||||||
|
formatted_amount: "CA$110",
|
||||||
|
currency: "CAD",
|
||||||
|
},
|
||||||
|
is_live: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
})}</script></body></html>`;
|
||||||
|
|
||||||
|
global.fetch = mock(() =>
|
||||||
|
Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(mockSearchHtml),
|
||||||
|
url: "https://www.facebook.com/marketplace/toronto/search?query=chair",
|
||||||
|
headers: {
|
||||||
|
get: () => null,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const results = await fetchFacebookItems("chair", 1, "toronto", 2, {
|
||||||
|
hideUnstableResults: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(results).toEqual({
|
||||||
|
results: [expect.objectContaining({ title: "Boundary Stable Chair" })],
|
||||||
|
unstableResults: [
|
||||||
|
expect.objectContaining({ title: "Boundary Cheap Chair" }),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("Data Extraction", () => {
|
describe("Data Extraction", () => {
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
|||||||
import {
|
import {
|
||||||
buildSearchUrl,
|
buildSearchUrl,
|
||||||
default as fetchKijijiItems,
|
default as fetchKijijiItems,
|
||||||
|
type DetailedListing,
|
||||||
NetworkError,
|
NetworkError,
|
||||||
ParseError,
|
ParseError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
@@ -10,6 +11,19 @@ import {
|
|||||||
ValidationError,
|
ValidationError,
|
||||||
} from "../src/scrapers/kijiji";
|
} from "../src/scrapers/kijiji";
|
||||||
|
|
||||||
|
type Assert<T extends true> = T;
|
||||||
|
type IsExact<T, U> =
|
||||||
|
(<G>() => G extends T ? 1 : 2) extends <G>() => G extends U ? 1 : 2
|
||||||
|
? (<G>() => G extends U ? 1 : 2) extends <G>() => G extends T ? 1 : 2
|
||||||
|
? true
|
||||||
|
: false
|
||||||
|
: false;
|
||||||
|
|
||||||
|
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
|
||||||
|
type _KijijiDefaultReturn = Assert<
|
||||||
|
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
|
||||||
|
>;
|
||||||
|
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
|
|||||||
Reference in New Issue
Block a user