feat: add unstable mode to scraper results
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { UnstableListingModeOptions } from "../types/common";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
import {
|
||||
type CookieConfig,
|
||||
ensureCookies,
|
||||
@@ -362,7 +364,16 @@ export default async function fetchEbayItems(
|
||||
buyItNowOnly?: boolean;
|
||||
canadaOnly?: boolean;
|
||||
} = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const finalizeResults = (listings: EbayListingDetails[]) => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings;
|
||||
}
|
||||
|
||||
return classifyUnstableListings(listings);
|
||||
};
|
||||
|
||||
const {
|
||||
minPrice = 0,
|
||||
maxPrice = Number.MAX_SAFE_INTEGER,
|
||||
@@ -452,13 +463,13 @@ export default async function fetchEbayItems(
|
||||
});
|
||||
|
||||
console.log(`Parsed ${filteredListings.length} eBay listings.`);
|
||||
return filteredListings;
|
||||
return finalizeResults(filteredListings);
|
||||
} catch (err) {
|
||||
if (err instanceof HttpError) {
|
||||
console.error(
|
||||
`Failed to fetch eBay search (${err.status}): ${err.message}`,
|
||||
);
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
import {
|
||||
type Cookie,
|
||||
type CookieConfig,
|
||||
@@ -1065,7 +1066,20 @@ export default async function fetchFacebookItems(
|
||||
REQUESTS_PER_SECOND = 1,
|
||||
LOCATION = "toronto",
|
||||
MAX_ITEMS = 25,
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const finalizeResults = (listings: FacebookListingDetails[]) => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings.slice(0, MAX_ITEMS);
|
||||
}
|
||||
|
||||
const classified = classifyUnstableListings(listings);
|
||||
return {
|
||||
results: classified.results.slice(0, MAX_ITEMS),
|
||||
unstableResults: classified.unstableResults,
|
||||
};
|
||||
};
|
||||
|
||||
const cookies = await ensureFacebookCookies();
|
||||
|
||||
// Format cookies for HTTP header
|
||||
@@ -1114,7 +1128,7 @@ export default async function fetchFacebookItems(
|
||||
"This might indicate invalid or expired cookies. Update FACEBOOK_COOKIE with a fresh raw Cookie header string.",
|
||||
);
|
||||
}
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
@@ -1122,25 +1136,25 @@ export default async function fetchFacebookItems(
|
||||
const classification = classifyFacebookResponse(searchHtml, searchResponseUrl);
|
||||
if (classification.authGated) {
|
||||
console.warn("Facebook marketplace search redirected to login. Cookies may be expired.");
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
if (classification.unavailable) {
|
||||
console.warn("Facebook marketplace search returned an unavailable route.");
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
if (classification.kind !== "search") {
|
||||
console.warn(
|
||||
`Facebook marketplace search returned unexpected route kind: ${classification.kind}.`,
|
||||
);
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
const ads = extractFacebookMarketplaceData(searchHtml);
|
||||
if (!ads || ads.length === 0) {
|
||||
console.warn("No ads parsed from Facebook marketplace page.");
|
||||
return [];
|
||||
return finalizeResults([]);
|
||||
}
|
||||
|
||||
console.log(`\nFound ${ads.length} raw ads. Processing...`);
|
||||
@@ -1164,7 +1178,7 @@ export default async function fetchFacebookItems(
|
||||
progressBar.stop();
|
||||
|
||||
console.log(`\nParsed ${pricedItems.length} Facebook marketplace listings.`);
|
||||
return pricedItems.slice(0, MAX_ITEMS); // Limit results
|
||||
return finalizeResults(pricedItems);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import cliProgress from "cli-progress";
|
||||
import { parseHTML } from "linkedom";
|
||||
import unidecode from "unidecode";
|
||||
import type { HTMLString } from "../types/common";
|
||||
import type { HTMLString, UnstableListingModeOptions } from "../types/common";
|
||||
import { classifyUnstableListings } from "../utils/unstable";
|
||||
import {
|
||||
type CookieConfig,
|
||||
formatCookiesForHeader,
|
||||
@@ -702,7 +703,16 @@ export default async function fetchKijijiItems(
|
||||
BASE_URL = "https://www.kijiji.ca",
|
||||
searchOptions: SearchOptions = {},
|
||||
listingOptions: ListingFetchOptions = {},
|
||||
unstableMode: UnstableListingModeOptions = {},
|
||||
) {
|
||||
const finalizeResults = (listings: DetailedListing[]) => {
|
||||
if (!unstableMode.hideUnstableResults) {
|
||||
return listings;
|
||||
}
|
||||
|
||||
return classifyUnstableListings(listings);
|
||||
};
|
||||
|
||||
const DELAY_MS = Math.max(1, Math.floor(1000 / REQUESTS_PER_SECOND));
|
||||
|
||||
// Load Kijiji cookies (optional - helps bypass bot detection)
|
||||
@@ -860,7 +870,7 @@ export default async function fetchKijijiItems(
|
||||
}
|
||||
|
||||
console.log(`\nParsed ${allListings.length} detailed listings.`);
|
||||
return allListings;
|
||||
return finalizeResults(allListings);
|
||||
}
|
||||
|
||||
// Re-export error classes for convenience
|
||||
|
||||
Reference in New Issue
Block a user