-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.ts
74 lines (72 loc) · 1.97 KB
/
example.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import { Config, Crawler } from "./mod.ts";
const config: Config = {
crawl: {
startUrl: "https://www.jamieoliver.com/recipes/category/ingredient/",
linkExtractors: [
{
linkExtraction: ".recipe-block a",
shouldExtract: (url) => {
const rawUrl = new URL(url);
if (rawUrl.pathname === "/recipes/category/ingredient/") {
return true;
}
if (rawUrl.pathname.split("/").length === 4) {
return true;
}
return false;
},
},
],
detailExtractor: {
details: {
headline: "h1",
subHeadline: "p.subheading",
serves: ".recipe-detail.serves",
time: ".recipe-detail.time",
difficulty: ".recipe-detail.difficulty",
calories: "li[title=Calories] .top",
fat: "li[title=Fat] .top",
saturates: "li[title=Saturates] .top",
sugars: "li[title=Sugars] .top",
salt: "li[title=Salt] .top",
protein: "li[title=Protein] .top",
carbohydrate: "li[title=Carbs] .top",
fibre: "li[title=Fibre] .top",
ingredients: ".ingred-list",
method: "div.method-p > div",
},
shouldExtract: (url) => {
const rawUrl = new URL(url);
if (rawUrl.pathname.split("/").length === 5) {
return true;
}
return false;
},
},
},
crawler: {
launchConfig: {
headless: false,
},
},
};
(async () => {
const crawler = new Crawler(config);
crawler.on(
"start",
(startUrl, date) => console.log({ startUrl, date }),
);
crawler.on(
"finish",
(urlsCrawled, date) => console.log({ urlsCrawled, date }),
);
crawler.on("crawled", (url) => console.log("crawled:", url));
crawler.on("error", (error) => {
console.log("Error", error);
});
crawler.on("info", (tag, info) => {
console.log("info", tag, info);
});
crawler.on("data", (record) => console.log({ record }));
await crawler.crawl();
})();