forked from desmondhume/newspaper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
119 lines (97 loc) · 2.97 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package main
import (
"flag"
"fmt"
"html"
"io/ioutil"
"net/url"
"os"
"os/exec"
"regexp"
"strings"
"time"
"github.com/go-shiori/go-readability"
"github.com/logrusorgru/aurora"
"github.com/lunny/html2md"
"github.com/mitchellh/go-wordwrap"
)
func main() {
var (
nolinks = flag.Bool("no-links", false, "Remove links")
plaintext = flag.Bool("plaintext", false, "Disable ANSI (plain-text output)")
saveToFile = flag.Bool("save-to-file", false, "Save output to file")
)
var bold = func(arg interface{}) aurora.Value {
if *plaintext || *saveToFile {
return aurora.Reset(arg)
}
return aurora.Bold(arg)
}
var red = func(arg interface{}) aurora.Value {
if *plaintext || *saveToFile {
return aurora.Reset(arg)
}
return aurora.Red(arg)
}
flag.Parse()
// Fetch article from given url
articleURL := os.Args[len(os.Args)-1]
parsedURL, _ := url.Parse(articleURL)
article, err := readability.FromURL(parsedURL.String(), 5*time.Second)
if err != nil {
fmt.Printf("Unable to request data from URL %s: %v", articleURL, err)
}
// Convert html to readable markdown
md := html2md.Convert(article.Content)
output := html.UnescapeString(md)
var regex *regexp.Regexp
// Squash multiple lines blocks into single blank lines
regex = regexp.MustCompile(`(\s*\n){2,}`)
output = regex.ReplaceAllString(output, "\n\n")
// Remove leading whitespaces
regex = regexp.MustCompile(`[\n\n ][ \t]+`)
output = regex.ReplaceAllString(output, "")
// Remove links if -nolinks is passed
if *nolinks {
// Remove empty links (like js-driven anchors)
regex = regexp.MustCompile(`\[\]\(\)`)
output = regex.ReplaceAllString(output, "")
// Remove other links
regex = regexp.MustCompile(`\[(.*)\]\((.*)\)`)
output = regex.ReplaceAllString(output, "$1")
}
if !*plaintext {
// Convert markdown wrappers to ANSI codes (to enhance subtitles)
regex = regexp.MustCompile(`\*\*(.*)\*\*`)
output = regex.ReplaceAllString(output, fmt.Sprintf("%s", bold("$1")))
// Convert markdown wrappers to ANSI codes (to enhance subtitles)
regex = regexp.MustCompile("## (.*)")
output = regex.ReplaceAllString(output, fmt.Sprintf("%s", bold("$1")))
}
// Wrap text to 80 columns to make the content more readable
output = wordwrap.WrapString(output, 80)
// Format article output with title and content
output = fmt.Sprintf("%s\n%s", bold(red(article.Title)), output)
if *saveToFile {
outputAsBytes := []byte(output)
filename := fmt.Sprintf("%s.md", article.Title)
err = ioutil.WriteFile(filename, outputAsBytes, 0644)
if err != nil {
fmt.Printf("Unable to save output to the file: %v\n", err)
os.Exit(1)
}
} else {
cmd := exec.Command(PathToTerminalPagerProgram, ParamsForTerminalPagerProgram)
// Set `less` stdin to string Reader
cmd.Stdin = strings.NewReader(output)
// Set `less` stdout to os stdout
cmd.Stdout = os.Stdout
// Start the command and wait for user actions
err = cmd.Start()
if err != nil {
fmt.Print(err)
} else {
cmd.Wait()
}
}
}