fix: tighten scraper type contracts

This commit is contained in:
2026-04-23 05:28:46 -04:00
parent 08d59ab497
commit 13c0fec305
6 changed files with 171 additions and 36 deletions

View File

@@ -82,7 +82,10 @@ function parseEbayPrice(
cleaned.includes("C $") cleaned.includes("C $")
) { ) {
currency = "CAD"; currency = "CAD";
} else if (cleaned.toUpperCase().includes("USD")) { } else if (
cleaned.toUpperCase().includes("USD") ||
cleaned.toUpperCase().includes("US $")
) {
currency = "USD"; currency = "USD";
} }
@@ -372,20 +375,6 @@ async function loadEbayCookies(): Promise<string | undefined> {
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
opts?: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
},
unstableMode?: UnstableListingModeOptions,
): Promise<EbayListingDetails[]>;
export default async function fetchEbayItems( export default async function fetchEbayItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined, REQUESTS_PER_SECOND: number | undefined,
@@ -400,6 +389,20 @@ export default async function fetchEbayItems(
} | undefined, } | undefined,
unstableMode: { hideUnstableResults: true }, unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<EbayListingDetails>>; ): Promise<UnstableListingBuckets<EbayListingDetails>>;
export default async function fetchEbayItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
opts?: {
minPrice?: number;
maxPrice?: number;
strictMode?: boolean;
exclusions?: string[];
keywords?: string[];
buyItNowOnly?: boolean;
canadaOnly?: boolean;
},
unstableMode?: UnstableListingModeOptions,
): Promise<EbayListingDetails[]>;
export default async function fetchEbayItems( export default async function fetchEbayItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,

View File

@@ -918,7 +918,7 @@ export function parseFacebookAds(
continue; // No price available continue; // No price available
} }
if (!Number.isFinite(cents) || cents <= 0) continue; if (!Number.isFinite(cents) || cents < 0) continue;
// Extract address from location data if available // Extract address from location data if available
const cityName = const cityName =
@@ -1077,13 +1077,6 @@ export function parseFacebookItem(
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
LOCATION?: string,
MAX_ITEMS?: number,
unstableMode?: UnstableListingModeOptions,
): Promise<FacebookListingDetails[]>;
export default async function fetchFacebookItems( export default async function fetchFacebookItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined, REQUESTS_PER_SECOND: number | undefined,
@@ -1091,6 +1084,13 @@ export default async function fetchFacebookItems(
MAX_ITEMS: number | undefined, MAX_ITEMS: number | undefined,
unstableMode: { hideUnstableResults: true }, unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<FacebookListingDetails>>; ): Promise<UnstableListingBuckets<FacebookListingDetails>>;
export default async function fetchFacebookItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
LOCATION?: string,
MAX_ITEMS?: number,
unstableMode?: UnstableListingModeOptions,
): Promise<FacebookListingDetails[]>;
export default async function fetchFacebookItems( export default async function fetchFacebookItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,

View File

@@ -520,8 +520,7 @@ export function parseSearch(
const results: SearchListing[] = []; const results: SearchListing[] = [];
for (const [key, value] of Object.entries(apolloState)) { for (const [key, value] of Object.entries(apolloState)) {
// Heuristic: Kijiji listing keys usually contain "Listing" if (!key.startsWith("Listing:")) continue;
if (!key.includes("Listing")) continue;
if (!isRecord(value)) continue; if (!isRecord(value)) continue;
const item = value as ApolloSearchItem; const item = value as ApolloSearchItem;
@@ -762,14 +761,6 @@ export async function parseDetailedListing(
// ----------------------------- Main ----------------------------- // ----------------------------- Main -----------------------------
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
BASE_URL?: string,
searchOptions?: SearchOptions,
listingOptions?: ListingFetchOptions,
unstableMode?: UnstableListingModeOptions,
): Promise<DetailedListing[]>;
export default async function fetchKijijiItems( export default async function fetchKijijiItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND: number | undefined, REQUESTS_PER_SECOND: number | undefined,
@@ -778,6 +769,14 @@ export default async function fetchKijijiItems(
listingOptions: ListingFetchOptions | undefined, listingOptions: ListingFetchOptions | undefined,
unstableMode: { hideUnstableResults: true }, unstableMode: { hideUnstableResults: true },
): Promise<UnstableListingBuckets<DetailedListing>>; ): Promise<UnstableListingBuckets<DetailedListing>>;
export default async function fetchKijijiItems(
SEARCH_QUERY: string,
REQUESTS_PER_SECOND?: number,
BASE_URL?: string,
searchOptions?: SearchOptions,
listingOptions?: ListingFetchOptions,
unstableMode?: UnstableListingModeOptions,
): Promise<DetailedListing[]>;
export default async function fetchKijijiItems( export default async function fetchKijijiItems(
SEARCH_QUERY: string, SEARCH_QUERY: string,
REQUESTS_PER_SECOND = 1, REQUESTS_PER_SECOND = 1,
@@ -811,15 +810,18 @@ export default async function fetchKijijiItems(
: undefined; : undefined;
// Set defaults for configuration // Set defaults for configuration
const finalSearchOptions: Required<SearchOptions> = { const finalSearchOptions: Omit<Required<SearchOptions>, "priceMin" | "priceMax"> & {
priceMin?: number;
priceMax?: number;
} = {
location: searchOptions.location ?? 1700272, // Default to GTA location: searchOptions.location ?? 1700272, // Default to GTA
category: searchOptions.category ?? 0, // Default to all categories category: searchOptions.category ?? 0, // Default to all categories
keywords: searchOptions.keywords ?? SEARCH_QUERY, keywords: searchOptions.keywords ?? SEARCH_QUERY,
sortBy: searchOptions.sortBy ?? "relevancy", sortBy: searchOptions.sortBy ?? "relevancy",
sortOrder: searchOptions.sortOrder ?? "desc", sortOrder: searchOptions.sortOrder ?? "desc",
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
priceMin: searchOptions.priceMin as number, priceMin: searchOptions.priceMin,
priceMax: searchOptions.priceMax as number, priceMax: searchOptions.priceMax,
cookies: searchOptions.cookies ?? "", cookies: searchOptions.cookies ?? "",
}; };

View File

@@ -1,6 +1,7 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
import type { EbayListingDetails } from "../src/scrapers/ebay"; import type { EbayListingDetails } from "../src/scrapers/ebay";
import fetchEbayItems from "../src/scrapers/ebay"; import fetchEbayItems from "../src/scrapers/ebay";
import type { UnstableListingBuckets } from "../src/types/common";
type Assert<T extends true> = T; type Assert<T extends true> = T;
type IsExact<T, U> = type IsExact<T, U> =
@@ -11,9 +12,18 @@ type IsExact<T, U> =
: false; : false;
const getDefaultEbayItems = async () => fetchEbayItems("laptop"); const getDefaultEbayItems = async () => fetchEbayItems("laptop");
const getUnstableEbayItems = async (): Promise<
UnstableListingBuckets<EbayListingDetails>
> => fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
type _EbayDefaultReturn = Assert< type _EbayDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]> IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
>; >;
type _EbayUnstableReturn = Assert<
IsExact<
Awaited<ReturnType<typeof getUnstableEbayItems>>,
UnstableListingBuckets<EbayListingDetails>
>
>;
const originalFetch = global.fetch; const originalFetch = global.fetch;
const originalWarn = console.warn; const originalWarn = console.warn;
@@ -199,6 +209,32 @@ describe("eBay Scraper Cookie Handling", () => {
]); ]);
}); });
test("treats US dollar prices as USD", async () => {
global.fetch = mock(() =>
Promise.resolve({
ok: true,
text: () =>
Promise.resolve(`
<html><body>
<li class="s-item">
<a href="/itm/123"></a>
<h3>Stable Laptop Bundle</h3>
<span class="s-item__price">US $123.45</span>
</li>
</body></html>
`),
}),
) as typeof fetch;
const results = await fetchEbayItems("laptop", 1000);
expect(results).toEqual([
expect.objectContaining({
listingPrice: expect.objectContaining({ currency: "USD", cents: 12345 }),
}),
]);
});
test("prefers the discounted Canadian-formatted price", async () => { test("prefers the discounted Canadian-formatted price", async () => {
global.fetch = mock(() => global.fetch = mock(() =>
Promise.resolve({ Promise.resolve({

View File

@@ -12,6 +12,7 @@ import {
parseFacebookCookieString, parseFacebookCookieString,
parseFacebookItem, parseFacebookItem,
} from "../src/scrapers/facebook"; } from "../src/scrapers/facebook";
import type { UnstableListingBuckets } from "../src/types/common";
import { formatCookiesForHeader } from "../src/utils/cookies"; import { formatCookiesForHeader } from "../src/utils/cookies";
import { formatCentsToCurrency } from "../src/utils/format"; import { formatCentsToCurrency } from "../src/utils/format";
@@ -24,9 +25,18 @@ type IsExact<T, U> =
: false; : false;
const getDefaultFacebookItems = async () => fetchFacebookItems("chair"); const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
const getUnstableFacebookItems = async (): Promise<
UnstableListingBuckets<FacebookListingDetails>
> => fetchFacebookItems("chair", 1, "toronto", 25, { hideUnstableResults: true });
type _FacebookDefaultReturn = Assert< type _FacebookDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]> IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
>; >;
type _FacebookUnstableReturn = Assert<
IsExact<
Awaited<ReturnType<typeof getUnstableFacebookItems>>,
UnstableListingBuckets<FacebookListingDetails>
>
>;
// Mock fetch globally // Mock fetch globally
const originalFetch = global.fetch; const originalFetch = global.fetch;
@@ -1606,6 +1616,37 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
}), }),
]); ]);
}); });
test("keeps valid free search listings", () => {
const ads = [
{
node: {
listing: {
id: "free-item",
marketplace_listing_title: "Free Chair",
listing_price: {
amount: "0.00",
formatted_amount: "FREE",
currency: "CAD",
},
is_live: true,
},
},
},
];
const results = parseFacebookAds(ads);
expect(results).toEqual([
expect.objectContaining({
title: "Free Chair",
listingPrice: expect.objectContaining({
cents: 0,
amountFormatted: "FREE",
}),
}),
]);
});
}); });
}); });

View File

@@ -4,6 +4,7 @@ import {
default as fetchKijijiItems, default as fetchKijijiItems,
type DetailedListing, type DetailedListing,
NetworkError, NetworkError,
parseSearch,
parseDetailedListing, parseDetailedListing,
ParseError, ParseError,
RateLimitError, RateLimitError,
@@ -11,6 +12,7 @@ import {
resolveLocationId, resolveLocationId,
ValidationError, ValidationError,
} from "../src/scrapers/kijiji"; } from "../src/scrapers/kijiji";
import type { UnstableListingBuckets } from "../src/types/common";
type Assert<T extends true> = T; type Assert<T extends true> = T;
type IsExact<T, U> = type IsExact<T, U> =
@@ -21,9 +23,26 @@ type IsExact<T, U> =
: false; : false;
const getDefaultKijijiItems = async () => fetchKijijiItems("phone"); const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
const getUnstableKijijiItems = async (): Promise<
UnstableListingBuckets<DetailedListing>
> =>
fetchKijijiItems(
"phone",
1000,
"https://www.kijiji.ca",
{},
{},
{ hideUnstableResults: true },
);
type _KijijiDefaultReturn = Assert< type _KijijiDefaultReturn = Assert<
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]> IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
>; >;
type _KijijiUnstableReturn = Assert<
IsExact<
Awaited<ReturnType<typeof getUnstableKijijiItems>>,
UnstableListingBuckets<DetailedListing>
>
>;
const originalFetch = global.fetch; const originalFetch = global.fetch;
@@ -667,3 +686,37 @@ describe("fetchKijijiItems", () => {
}); });
}); });
}); });
describe("parseSearch", () => {
test("ignores SearchListingCard noise keys", () => {
const html = `
<html>
<script id="__NEXT_DATA__" type="application/json">
${JSON.stringify({
props: {
pageProps: {
__APOLLO_STATE__: {
"SearchListingCard:1": {
url: "/v-card-noise/k0l0",
title: "Card Noise",
},
"Listing:1": {
url: "/v-real-result/k0l0",
title: "Real Result",
},
},
},
},
})}
</script>
</html>
`;
expect(parseSearch(html, "https://www.kijiji.ca")).toEqual([
{
listingLink: "https://www.kijiji.ca/v-real-result/k0l0",
name: "Real Result",
},
]);
});
});