fix: tighten scraper type contracts
This commit is contained in:
@@ -82,7 +82,10 @@ function parseEbayPrice(
|
||||
cleaned.includes("C $")
|
||||
) {
|
||||
currency = "CAD";
|
||||
} else if (cleaned.toUpperCase().includes("USD")) {
|
||||
} else if (
|
||||
cleaned.toUpperCase().includes("USD") ||
|
||||
cleaned.toUpperCase().includes("US $")
|
||||
) {
|
||||
currency = "USD";
|
||||
}
|
||||
|
||||
@@ -372,20 +375,6 @@ async function loadEbayCookies(): Promise<string | undefined> {
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
opts?: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
},
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<EbayListingDetails[]>;
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
@@ -400,6 +389,20 @@ export default async function fetchEbayItems(
|
||||
} | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<EbayListingDetails>>;
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
opts?: {
|
||||
minPrice?: number;
|
||||
maxPrice?: number;
|
||||
strictMode?: boolean;
|
||||
exclusions?: string[];
|
||||
keywords?: string[];
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
},
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<EbayListingDetails[]>;
|
||||
export default async function fetchEbayItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
|
||||
@@ -918,7 +918,7 @@ export function parseFacebookAds(
|
||||
continue; // No price available
|
||||
}
|
||||
|
||||
if (!Number.isFinite(cents) || cents <= 0) continue;
|
||||
if (!Number.isFinite(cents) || cents < 0) continue;
|
||||
|
||||
// Extract address from location data if available
|
||||
const cityName =
|
||||
@@ -1077,13 +1077,6 @@ export function parseFacebookItem(
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
LOCATION?: string,
|
||||
MAX_ITEMS?: number,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<FacebookListingDetails[]>;
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
@@ -1091,6 +1084,13 @@ export default async function fetchFacebookItems(
|
||||
MAX_ITEMS: number | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
LOCATION?: string,
|
||||
MAX_ITEMS?: number,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<FacebookListingDetails[]>;
|
||||
export default async function fetchFacebookItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
|
||||
@@ -520,8 +520,7 @@ export function parseSearch(
|
||||
|
||||
const results: SearchListing[] = [];
|
||||
for (const [key, value] of Object.entries(apolloState)) {
|
||||
// Heuristic: Kijiji listing keys usually contain "Listing"
|
||||
if (!key.includes("Listing")) continue;
|
||||
if (!key.startsWith("Listing:")) continue;
|
||||
if (!isRecord(value)) continue;
|
||||
|
||||
const item = value as ApolloSearchItem;
|
||||
@@ -762,14 +761,6 @@ export async function parseDetailedListing(
|
||||
|
||||
// ----------------------------- Main -----------------------------
|
||||
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
BASE_URL?: string,
|
||||
searchOptions?: SearchOptions,
|
||||
listingOptions?: ListingFetchOptions,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<DetailedListing[]>;
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND: number | undefined,
|
||||
@@ -778,6 +769,14 @@ export default async function fetchKijijiItems(
|
||||
listingOptions: ListingFetchOptions | undefined,
|
||||
unstableMode: { hideUnstableResults: true },
|
||||
): Promise<UnstableListingBuckets<DetailedListing>>;
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND?: number,
|
||||
BASE_URL?: string,
|
||||
searchOptions?: SearchOptions,
|
||||
listingOptions?: ListingFetchOptions,
|
||||
unstableMode?: UnstableListingModeOptions,
|
||||
): Promise<DetailedListing[]>;
|
||||
export default async function fetchKijijiItems(
|
||||
SEARCH_QUERY: string,
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
@@ -811,15 +810,18 @@ export default async function fetchKijijiItems(
|
||||
: undefined;
|
||||
|
||||
// Set defaults for configuration
|
||||
const finalSearchOptions: Required<SearchOptions> = {
|
||||
const finalSearchOptions: Omit<Required<SearchOptions>, "priceMin" | "priceMax"> & {
|
||||
priceMin?: number;
|
||||
priceMax?: number;
|
||||
} = {
|
||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||
category: searchOptions.category ?? 0, // Default to all categories
|
||||
keywords: searchOptions.keywords ?? SEARCH_QUERY,
|
||||
sortBy: searchOptions.sortBy ?? "relevancy",
|
||||
sortOrder: searchOptions.sortOrder ?? "desc",
|
||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||
priceMin: searchOptions.priceMin as number,
|
||||
priceMax: searchOptions.priceMax as number,
|
||||
priceMin: searchOptions.priceMin,
|
||||
priceMax: searchOptions.priceMax,
|
||||
cookies: searchOptions.cookies ?? "",
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
import type { EbayListingDetails } from "../src/scrapers/ebay";
|
||||
import fetchEbayItems from "../src/scrapers/ebay";
|
||||
import type { UnstableListingBuckets } from "../src/types/common";
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
@@ -11,9 +12,18 @@ type IsExact<T, U> =
|
||||
: false;
|
||||
|
||||
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
||||
const getUnstableEbayItems = async (): Promise<
|
||||
UnstableListingBuckets<EbayListingDetails>
|
||||
> => fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
|
||||
type _EbayDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
||||
>;
|
||||
type _EbayUnstableReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<ReturnType<typeof getUnstableEbayItems>>,
|
||||
UnstableListingBuckets<EbayListingDetails>
|
||||
>
|
||||
>;
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
const originalWarn = console.warn;
|
||||
@@ -199,6 +209,32 @@ describe("eBay Scraper Cookie Handling", () => {
|
||||
]);
|
||||
});
|
||||
|
||||
test("treats US dollar prices as USD", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
ok: true,
|
||||
text: () =>
|
||||
Promise.resolve(`
|
||||
<html><body>
|
||||
<li class="s-item">
|
||||
<a href="/itm/123"></a>
|
||||
<h3>Stable Laptop Bundle</h3>
|
||||
<span class="s-item__price">US $123.45</span>
|
||||
</li>
|
||||
</body></html>
|
||||
`),
|
||||
}),
|
||||
) as typeof fetch;
|
||||
|
||||
const results = await fetchEbayItems("laptop", 1000);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
listingPrice: expect.objectContaining({ currency: "USD", cents: 12345 }),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("prefers the discounted Canadian-formatted price", async () => {
|
||||
global.fetch = mock(() =>
|
||||
Promise.resolve({
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
parseFacebookCookieString,
|
||||
parseFacebookItem,
|
||||
} from "../src/scrapers/facebook";
|
||||
import type { UnstableListingBuckets } from "../src/types/common";
|
||||
import { formatCookiesForHeader } from "../src/utils/cookies";
|
||||
import { formatCentsToCurrency } from "../src/utils/format";
|
||||
|
||||
@@ -24,9 +25,18 @@ type IsExact<T, U> =
|
||||
: false;
|
||||
|
||||
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
|
||||
const getUnstableFacebookItems = async (): Promise<
|
||||
UnstableListingBuckets<FacebookListingDetails>
|
||||
> => fetchFacebookItems("chair", 1, "toronto", 25, { hideUnstableResults: true });
|
||||
type _FacebookDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
|
||||
>;
|
||||
type _FacebookUnstableReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<ReturnType<typeof getUnstableFacebookItems>>,
|
||||
UnstableListingBuckets<FacebookListingDetails>
|
||||
>
|
||||
>;
|
||||
|
||||
// Mock fetch globally
|
||||
const originalFetch = global.fetch;
|
||||
@@ -1606,6 +1616,37 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
test("keeps valid free search listings", () => {
|
||||
const ads = [
|
||||
{
|
||||
node: {
|
||||
listing: {
|
||||
id: "free-item",
|
||||
marketplace_listing_title: "Free Chair",
|
||||
listing_price: {
|
||||
amount: "0.00",
|
||||
formatted_amount: "FREE",
|
||||
currency: "CAD",
|
||||
},
|
||||
is_live: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
const results = parseFacebookAds(ads);
|
||||
|
||||
expect(results).toEqual([
|
||||
expect.objectContaining({
|
||||
title: "Free Chair",
|
||||
listingPrice: expect.objectContaining({
|
||||
cents: 0,
|
||||
amountFormatted: "FREE",
|
||||
}),
|
||||
}),
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import {
|
||||
default as fetchKijijiItems,
|
||||
type DetailedListing,
|
||||
NetworkError,
|
||||
parseSearch,
|
||||
parseDetailedListing,
|
||||
ParseError,
|
||||
RateLimitError,
|
||||
@@ -11,6 +12,7 @@ import {
|
||||
resolveLocationId,
|
||||
ValidationError,
|
||||
} from "../src/scrapers/kijiji";
|
||||
import type { UnstableListingBuckets } from "../src/types/common";
|
||||
|
||||
type Assert<T extends true> = T;
|
||||
type IsExact<T, U> =
|
||||
@@ -21,9 +23,26 @@ type IsExact<T, U> =
|
||||
: false;
|
||||
|
||||
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
|
||||
const getUnstableKijijiItems = async (): Promise<
|
||||
UnstableListingBuckets<DetailedListing>
|
||||
> =>
|
||||
fetchKijijiItems(
|
||||
"phone",
|
||||
1000,
|
||||
"https://www.kijiji.ca",
|
||||
{},
|
||||
{},
|
||||
{ hideUnstableResults: true },
|
||||
);
|
||||
type _KijijiDefaultReturn = Assert<
|
||||
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
|
||||
>;
|
||||
type _KijijiUnstableReturn = Assert<
|
||||
IsExact<
|
||||
Awaited<ReturnType<typeof getUnstableKijijiItems>>,
|
||||
UnstableListingBuckets<DetailedListing>
|
||||
>
|
||||
>;
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
@@ -667,3 +686,37 @@ describe("fetchKijijiItems", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseSearch", () => {
|
||||
test("ignores SearchListingCard noise keys", () => {
|
||||
const html = `
|
||||
<html>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
${JSON.stringify({
|
||||
props: {
|
||||
pageProps: {
|
||||
__APOLLO_STATE__: {
|
||||
"SearchListingCard:1": {
|
||||
url: "/v-card-noise/k0l0",
|
||||
title: "Card Noise",
|
||||
},
|
||||
"Listing:1": {
|
||||
url: "/v-real-result/k0l0",
|
||||
title: "Real Result",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})}
|
||||
</script>
|
||||
</html>
|
||||
`;
|
||||
|
||||
expect(parseSearch(html, "https://www.kijiji.ca")).toEqual([
|
||||
{
|
||||
listingLink: "https://www.kijiji.ca/v-real-result/k0l0",
|
||||
name: "Real Result",
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user