fix: tighten scraper type contracts
This commit is contained in:
@@ -82,7 +82,10 @@ function parseEbayPrice(
|
|||||||
cleaned.includes("C $")
|
cleaned.includes("C $")
|
||||||
) {
|
) {
|
||||||
currency = "CAD";
|
currency = "CAD";
|
||||||
} else if (cleaned.toUpperCase().includes("USD")) {
|
} else if (
|
||||||
|
cleaned.toUpperCase().includes("USD") ||
|
||||||
|
cleaned.toUpperCase().includes("US $")
|
||||||
|
) {
|
||||||
currency = "USD";
|
currency = "USD";
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -372,20 +375,6 @@ async function loadEbayCookies(): Promise<string | undefined> {
|
|||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
export default async function fetchEbayItems(
|
|
||||||
SEARCH_QUERY: string,
|
|
||||||
REQUESTS_PER_SECOND?: number,
|
|
||||||
opts?: {
|
|
||||||
minPrice?: number;
|
|
||||||
maxPrice?: number;
|
|
||||||
strictMode?: boolean;
|
|
||||||
exclusions?: string[];
|
|
||||||
keywords?: string[];
|
|
||||||
buyItNowOnly?: boolean;
|
|
||||||
canadaOnly?: boolean;
|
|
||||||
},
|
|
||||||
unstableMode?: UnstableListingModeOptions,
|
|
||||||
): Promise<EbayListingDetails[]>;
|
|
||||||
export default async function fetchEbayItems(
|
export default async function fetchEbayItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND: number | undefined,
|
REQUESTS_PER_SECOND: number | undefined,
|
||||||
@@ -400,6 +389,20 @@ export default async function fetchEbayItems(
|
|||||||
} | undefined,
|
} | undefined,
|
||||||
unstableMode: { hideUnstableResults: true },
|
unstableMode: { hideUnstableResults: true },
|
||||||
): Promise<UnstableListingBuckets<EbayListingDetails>>;
|
): Promise<UnstableListingBuckets<EbayListingDetails>>;
|
||||||
|
export default async function fetchEbayItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND?: number,
|
||||||
|
opts?: {
|
||||||
|
minPrice?: number;
|
||||||
|
maxPrice?: number;
|
||||||
|
strictMode?: boolean;
|
||||||
|
exclusions?: string[];
|
||||||
|
keywords?: string[];
|
||||||
|
buyItNowOnly?: boolean;
|
||||||
|
canadaOnly?: boolean;
|
||||||
|
},
|
||||||
|
unstableMode?: UnstableListingModeOptions,
|
||||||
|
): Promise<EbayListingDetails[]>;
|
||||||
export default async function fetchEbayItems(
|
export default async function fetchEbayItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND = 1,
|
REQUESTS_PER_SECOND = 1,
|
||||||
|
|||||||
@@ -918,7 +918,7 @@ export function parseFacebookAds(
|
|||||||
continue; // No price available
|
continue; // No price available
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Number.isFinite(cents) || cents <= 0) continue;
|
if (!Number.isFinite(cents) || cents < 0) continue;
|
||||||
|
|
||||||
// Extract address from location data if available
|
// Extract address from location data if available
|
||||||
const cityName =
|
const cityName =
|
||||||
@@ -1077,13 +1077,6 @@ export function parseFacebookItem(
|
|||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
export default async function fetchFacebookItems(
|
|
||||||
SEARCH_QUERY: string,
|
|
||||||
REQUESTS_PER_SECOND?: number,
|
|
||||||
LOCATION?: string,
|
|
||||||
MAX_ITEMS?: number,
|
|
||||||
unstableMode?: UnstableListingModeOptions,
|
|
||||||
): Promise<FacebookListingDetails[]>;
|
|
||||||
export default async function fetchFacebookItems(
|
export default async function fetchFacebookItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND: number | undefined,
|
REQUESTS_PER_SECOND: number | undefined,
|
||||||
@@ -1091,6 +1084,13 @@ export default async function fetchFacebookItems(
|
|||||||
MAX_ITEMS: number | undefined,
|
MAX_ITEMS: number | undefined,
|
||||||
unstableMode: { hideUnstableResults: true },
|
unstableMode: { hideUnstableResults: true },
|
||||||
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
|
): Promise<UnstableListingBuckets<FacebookListingDetails>>;
|
||||||
|
export default async function fetchFacebookItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND?: number,
|
||||||
|
LOCATION?: string,
|
||||||
|
MAX_ITEMS?: number,
|
||||||
|
unstableMode?: UnstableListingModeOptions,
|
||||||
|
): Promise<FacebookListingDetails[]>;
|
||||||
export default async function fetchFacebookItems(
|
export default async function fetchFacebookItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND = 1,
|
REQUESTS_PER_SECOND = 1,
|
||||||
|
|||||||
@@ -520,8 +520,7 @@ export function parseSearch(
|
|||||||
|
|
||||||
const results: SearchListing[] = [];
|
const results: SearchListing[] = [];
|
||||||
for (const [key, value] of Object.entries(apolloState)) {
|
for (const [key, value] of Object.entries(apolloState)) {
|
||||||
// Heuristic: Kijiji listing keys usually contain "Listing"
|
if (!key.startsWith("Listing:")) continue;
|
||||||
if (!key.includes("Listing")) continue;
|
|
||||||
if (!isRecord(value)) continue;
|
if (!isRecord(value)) continue;
|
||||||
|
|
||||||
const item = value as ApolloSearchItem;
|
const item = value as ApolloSearchItem;
|
||||||
@@ -762,14 +761,6 @@ export async function parseDetailedListing(
|
|||||||
|
|
||||||
// ----------------------------- Main -----------------------------
|
// ----------------------------- Main -----------------------------
|
||||||
|
|
||||||
export default async function fetchKijijiItems(
|
|
||||||
SEARCH_QUERY: string,
|
|
||||||
REQUESTS_PER_SECOND?: number,
|
|
||||||
BASE_URL?: string,
|
|
||||||
searchOptions?: SearchOptions,
|
|
||||||
listingOptions?: ListingFetchOptions,
|
|
||||||
unstableMode?: UnstableListingModeOptions,
|
|
||||||
): Promise<DetailedListing[]>;
|
|
||||||
export default async function fetchKijijiItems(
|
export default async function fetchKijijiItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND: number | undefined,
|
REQUESTS_PER_SECOND: number | undefined,
|
||||||
@@ -778,6 +769,14 @@ export default async function fetchKijijiItems(
|
|||||||
listingOptions: ListingFetchOptions | undefined,
|
listingOptions: ListingFetchOptions | undefined,
|
||||||
unstableMode: { hideUnstableResults: true },
|
unstableMode: { hideUnstableResults: true },
|
||||||
): Promise<UnstableListingBuckets<DetailedListing>>;
|
): Promise<UnstableListingBuckets<DetailedListing>>;
|
||||||
|
export default async function fetchKijijiItems(
|
||||||
|
SEARCH_QUERY: string,
|
||||||
|
REQUESTS_PER_SECOND?: number,
|
||||||
|
BASE_URL?: string,
|
||||||
|
searchOptions?: SearchOptions,
|
||||||
|
listingOptions?: ListingFetchOptions,
|
||||||
|
unstableMode?: UnstableListingModeOptions,
|
||||||
|
): Promise<DetailedListing[]>;
|
||||||
export default async function fetchKijijiItems(
|
export default async function fetchKijijiItems(
|
||||||
SEARCH_QUERY: string,
|
SEARCH_QUERY: string,
|
||||||
REQUESTS_PER_SECOND = 1,
|
REQUESTS_PER_SECOND = 1,
|
||||||
@@ -811,15 +810,18 @@ export default async function fetchKijijiItems(
|
|||||||
: undefined;
|
: undefined;
|
||||||
|
|
||||||
// Set defaults for configuration
|
// Set defaults for configuration
|
||||||
const finalSearchOptions: Required<SearchOptions> = {
|
const finalSearchOptions: Omit<Required<SearchOptions>, "priceMin" | "priceMax"> & {
|
||||||
|
priceMin?: number;
|
||||||
|
priceMax?: number;
|
||||||
|
} = {
|
||||||
location: searchOptions.location ?? 1700272, // Default to GTA
|
location: searchOptions.location ?? 1700272, // Default to GTA
|
||||||
category: searchOptions.category ?? 0, // Default to all categories
|
category: searchOptions.category ?? 0, // Default to all categories
|
||||||
keywords: searchOptions.keywords ?? SEARCH_QUERY,
|
keywords: searchOptions.keywords ?? SEARCH_QUERY,
|
||||||
sortBy: searchOptions.sortBy ?? "relevancy",
|
sortBy: searchOptions.sortBy ?? "relevancy",
|
||||||
sortOrder: searchOptions.sortOrder ?? "desc",
|
sortOrder: searchOptions.sortOrder ?? "desc",
|
||||||
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
maxPages: searchOptions.maxPages ?? 5, // Default to 5 pages
|
||||||
priceMin: searchOptions.priceMin as number,
|
priceMin: searchOptions.priceMin,
|
||||||
priceMax: searchOptions.priceMax as number,
|
priceMax: searchOptions.priceMax,
|
||||||
cookies: searchOptions.cookies ?? "",
|
cookies: searchOptions.cookies ?? "",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";
|
||||||
import type { EbayListingDetails } from "../src/scrapers/ebay";
|
import type { EbayListingDetails } from "../src/scrapers/ebay";
|
||||||
import fetchEbayItems from "../src/scrapers/ebay";
|
import fetchEbayItems from "../src/scrapers/ebay";
|
||||||
|
import type { UnstableListingBuckets } from "../src/types/common";
|
||||||
|
|
||||||
type Assert<T extends true> = T;
|
type Assert<T extends true> = T;
|
||||||
type IsExact<T, U> =
|
type IsExact<T, U> =
|
||||||
@@ -11,9 +12,18 @@ type IsExact<T, U> =
|
|||||||
: false;
|
: false;
|
||||||
|
|
||||||
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
const getDefaultEbayItems = async () => fetchEbayItems("laptop");
|
||||||
|
const getUnstableEbayItems = async (): Promise<
|
||||||
|
UnstableListingBuckets<EbayListingDetails>
|
||||||
|
> => fetchEbayItems("laptop", 1000, {}, { hideUnstableResults: true });
|
||||||
type _EbayDefaultReturn = Assert<
|
type _EbayDefaultReturn = Assert<
|
||||||
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
IsExact<Awaited<ReturnType<typeof getDefaultEbayItems>>, EbayListingDetails[]>
|
||||||
>;
|
>;
|
||||||
|
type _EbayUnstableReturn = Assert<
|
||||||
|
IsExact<
|
||||||
|
Awaited<ReturnType<typeof getUnstableEbayItems>>,
|
||||||
|
UnstableListingBuckets<EbayListingDetails>
|
||||||
|
>
|
||||||
|
>;
|
||||||
|
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
const originalWarn = console.warn;
|
const originalWarn = console.warn;
|
||||||
@@ -199,6 +209,32 @@ describe("eBay Scraper Cookie Handling", () => {
|
|||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("treats US dollar prices as USD", async () => {
|
||||||
|
global.fetch = mock(() =>
|
||||||
|
Promise.resolve({
|
||||||
|
ok: true,
|
||||||
|
text: () =>
|
||||||
|
Promise.resolve(`
|
||||||
|
<html><body>
|
||||||
|
<li class="s-item">
|
||||||
|
<a href="/itm/123"></a>
|
||||||
|
<h3>Stable Laptop Bundle</h3>
|
||||||
|
<span class="s-item__price">US $123.45</span>
|
||||||
|
</li>
|
||||||
|
</body></html>
|
||||||
|
`),
|
||||||
|
}),
|
||||||
|
) as typeof fetch;
|
||||||
|
|
||||||
|
const results = await fetchEbayItems("laptop", 1000);
|
||||||
|
|
||||||
|
expect(results).toEqual([
|
||||||
|
expect.objectContaining({
|
||||||
|
listingPrice: expect.objectContaining({ currency: "USD", cents: 12345 }),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
test("prefers the discounted Canadian-formatted price", async () => {
|
test("prefers the discounted Canadian-formatted price", async () => {
|
||||||
global.fetch = mock(() =>
|
global.fetch = mock(() =>
|
||||||
Promise.resolve({
|
Promise.resolve({
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import {
|
|||||||
parseFacebookCookieString,
|
parseFacebookCookieString,
|
||||||
parseFacebookItem,
|
parseFacebookItem,
|
||||||
} from "../src/scrapers/facebook";
|
} from "../src/scrapers/facebook";
|
||||||
|
import type { UnstableListingBuckets } from "../src/types/common";
|
||||||
import { formatCookiesForHeader } from "../src/utils/cookies";
|
import { formatCookiesForHeader } from "../src/utils/cookies";
|
||||||
import { formatCentsToCurrency } from "../src/utils/format";
|
import { formatCentsToCurrency } from "../src/utils/format";
|
||||||
|
|
||||||
@@ -24,9 +25,18 @@ type IsExact<T, U> =
|
|||||||
: false;
|
: false;
|
||||||
|
|
||||||
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
|
const getDefaultFacebookItems = async () => fetchFacebookItems("chair");
|
||||||
|
const getUnstableFacebookItems = async (): Promise<
|
||||||
|
UnstableListingBuckets<FacebookListingDetails>
|
||||||
|
> => fetchFacebookItems("chair", 1, "toronto", 25, { hideUnstableResults: true });
|
||||||
type _FacebookDefaultReturn = Assert<
|
type _FacebookDefaultReturn = Assert<
|
||||||
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
|
IsExact<Awaited<ReturnType<typeof getDefaultFacebookItems>>, FacebookListingDetails[]>
|
||||||
>;
|
>;
|
||||||
|
type _FacebookUnstableReturn = Assert<
|
||||||
|
IsExact<
|
||||||
|
Awaited<ReturnType<typeof getUnstableFacebookItems>>,
|
||||||
|
UnstableListingBuckets<FacebookListingDetails>
|
||||||
|
>
|
||||||
|
>;
|
||||||
|
|
||||||
// Mock fetch globally
|
// Mock fetch globally
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
@@ -1606,6 +1616,37 @@ describe("Facebook Marketplace Scraper Core Tests", () => {
|
|||||||
}),
|
}),
|
||||||
]);
|
]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("keeps valid free search listings", () => {
|
||||||
|
const ads = [
|
||||||
|
{
|
||||||
|
node: {
|
||||||
|
listing: {
|
||||||
|
id: "free-item",
|
||||||
|
marketplace_listing_title: "Free Chair",
|
||||||
|
listing_price: {
|
||||||
|
amount: "0.00",
|
||||||
|
formatted_amount: "FREE",
|
||||||
|
currency: "CAD",
|
||||||
|
},
|
||||||
|
is_live: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const results = parseFacebookAds(ads);
|
||||||
|
|
||||||
|
expect(results).toEqual([
|
||||||
|
expect.objectContaining({
|
||||||
|
title: "Free Chair",
|
||||||
|
listingPrice: expect.objectContaining({
|
||||||
|
cents: 0,
|
||||||
|
amountFormatted: "FREE",
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import {
|
|||||||
default as fetchKijijiItems,
|
default as fetchKijijiItems,
|
||||||
type DetailedListing,
|
type DetailedListing,
|
||||||
NetworkError,
|
NetworkError,
|
||||||
|
parseSearch,
|
||||||
parseDetailedListing,
|
parseDetailedListing,
|
||||||
ParseError,
|
ParseError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
@@ -11,6 +12,7 @@ import {
|
|||||||
resolveLocationId,
|
resolveLocationId,
|
||||||
ValidationError,
|
ValidationError,
|
||||||
} from "../src/scrapers/kijiji";
|
} from "../src/scrapers/kijiji";
|
||||||
|
import type { UnstableListingBuckets } from "../src/types/common";
|
||||||
|
|
||||||
type Assert<T extends true> = T;
|
type Assert<T extends true> = T;
|
||||||
type IsExact<T, U> =
|
type IsExact<T, U> =
|
||||||
@@ -21,9 +23,26 @@ type IsExact<T, U> =
|
|||||||
: false;
|
: false;
|
||||||
|
|
||||||
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
|
const getDefaultKijijiItems = async () => fetchKijijiItems("phone");
|
||||||
|
const getUnstableKijijiItems = async (): Promise<
|
||||||
|
UnstableListingBuckets<DetailedListing>
|
||||||
|
> =>
|
||||||
|
fetchKijijiItems(
|
||||||
|
"phone",
|
||||||
|
1000,
|
||||||
|
"https://www.kijiji.ca",
|
||||||
|
{},
|
||||||
|
{},
|
||||||
|
{ hideUnstableResults: true },
|
||||||
|
);
|
||||||
type _KijijiDefaultReturn = Assert<
|
type _KijijiDefaultReturn = Assert<
|
||||||
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
|
IsExact<Awaited<ReturnType<typeof getDefaultKijijiItems>>, DetailedListing[]>
|
||||||
>;
|
>;
|
||||||
|
type _KijijiUnstableReturn = Assert<
|
||||||
|
IsExact<
|
||||||
|
Awaited<ReturnType<typeof getUnstableKijijiItems>>,
|
||||||
|
UnstableListingBuckets<DetailedListing>
|
||||||
|
>
|
||||||
|
>;
|
||||||
|
|
||||||
const originalFetch = global.fetch;
|
const originalFetch = global.fetch;
|
||||||
|
|
||||||
@@ -667,3 +686,37 @@ describe("fetchKijijiItems", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("parseSearch", () => {
|
||||||
|
test("ignores SearchListingCard noise keys", () => {
|
||||||
|
const html = `
|
||||||
|
<html>
|
||||||
|
<script id="__NEXT_DATA__" type="application/json">
|
||||||
|
${JSON.stringify({
|
||||||
|
props: {
|
||||||
|
pageProps: {
|
||||||
|
__APOLLO_STATE__: {
|
||||||
|
"SearchListingCard:1": {
|
||||||
|
url: "/v-card-noise/k0l0",
|
||||||
|
title: "Card Noise",
|
||||||
|
},
|
||||||
|
"Listing:1": {
|
||||||
|
url: "/v-real-result/k0l0",
|
||||||
|
title: "Real Result",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})}
|
||||||
|
</script>
|
||||||
|
</html>
|
||||||
|
`;
|
||||||
|
|
||||||
|
expect(parseSearch(html, "https://www.kijiji.ca")).toEqual([
|
||||||
|
{
|
||||||
|
listingLink: "https://www.kijiji.ca/v-real-result/k0l0",
|
||||||
|
name: "Real Result",
|
||||||
|
},
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user