-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.sh
More file actions
97 lines (79 loc) · 2.18 KB
/
main.sh
File metadata and controls
97 lines (79 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/bin/bash
RED='\033[0;31m'
BLUE='\033[0;34m'
GREEN='\033[0;32m'
NC='\033[0m'
TOKEN_MODEL=${TOKEN_MODEL:-"gpt-4"}
bread() {
local urls=()
local mode="auto"
while [[ "$#" -gt 0 ]]; do
case $1 in
--shot-scraper)
mode="shot-scraper"
shift
;;
--jina)
mode="jina"
shift
;;
*)
urls+=("$1")
shift
;;
esac
done
if [ ${#urls[@]} -eq 0 ] && [ ! -t 0 ]; then
while read -r line; do
urls+=("$line")
done
fi
if [ ${#urls[@]} -eq 0 ]; then
echo "Usage: bread [--shot-scraper | --jina] [URL] or echo [URL] | bread [--shot-scraper | --jina]" >&2
return 1
fi
local exit_code=0
local is_first=true
for url in "${urls[@]}"; do
echo -e "${BLUE}$url${NC}" >&2
title=$(shot-scraper javascript "$url" "document.title" --raw 2>/dev/null)
local final_markdown=""
local success=false
if [ "$mode" = "auto" ] || [ "$mode" = "shot-scraper" ]; then
echo -e "${GREEN}[Info]${NC} Trying shot-scraper" >&2
if html_content=$(shot-scraper html "$url" 2>/dev/null); then
final_markdown=$(echo "$html_content" | trafilatura --markdown)
success=true
else
echo -e "${RED}[Fatal]${NC} shot-scraper failed" >&2
fi
fi
if [ "$success" = false ] && { [ "$mode" = "auto" ] || [ "$mode" = "jina" ]; }; then
echo -e "${GREEN}[Info]${NC} Trying Jina Reader" >&2
if jina_content=$(curl -fsSL "https://r.jina.ai/$url" 2>/dev/null); then
final_markdown="$jina_content"
success=true
else
echo -e "${RED}[Fatal]${NC} Jina Reader failed" >&2
fi
fi
if [ "$success" = false ]; then
exit_code=1
continue
fi
if [ -n "$final_markdown" ]; then
if [ "$is_first" = true ]; then
echo -e "# [${title:-$url}]($url)"
is_first=false
else
echo -e "# [${title:-$url}]($url)"
fi
echo "$final_markdown"
token_count=$(echo "$final_markdown" | tokuin --model "$TOKEN_MODEL" 2>/dev/null | grep -oE '[0-9]+' | tail -n 1)
echo -e "${GREEN}[Tokens]${NC} $token_count" >&2
fi
echo -e "" >&2
done
return $exit_code
}
bread "$@"