diff --git a/main.go b/main.go
index 2414672..2594803 100644
--- a/main.go
+++ b/main.go
@@ -1,25 +1,10 @@
package main
import (
- "crypto/tls"
- "encoding/json"
- "encoding/xml"
- "fmt"
- "sort"
-
- "html/template"
- "io"
"log"
- "net/http"
"os"
- "reflect"
- "strings"
- "time"
- "github.com/boltdb/bolt"
- tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5"
"github.com/jessevdk/go-flags"
- "golang.org/x/net/html"
"gopkg.in/yaml.v2"
)
@@ -66,262 +51,6 @@ type Options struct {
var ConfigPath = "./config.yml"
-type Rss2 struct {
- XMLName xml.Name `xml:"rss"`
- Version string `xml:"version,attr"`
- // Required
- Title string `xml:"channel>title"`
- Link string `xml:"channel>link"`
- Description string `xml:"channel>description"`
- // Optional
- PubDate string `xml:"channel>pubDate"`
- ItemList []Item `xml:"channel>item"`
-}
-
-type Item struct {
- // Required
- Title string `xml:"title"`
- Link string `xml:"link"`
- Description template.HTML `xml:"description"`
- // Optional
- Content template.HTML `xml:"encoded"`
- PubDate string `xml:"pubDate"`
- Comments string `xml:"comments"`
-}
-
-type SendItems struct {
- ItemList []Item
-}
-
-type ByPubDate []Item
-
-func (a ByPubDate) Len() int { return len(a) }
-func (a ByPubDate) Less(i, j int) bool {
- timeone, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[i].PubDate)
- timetwo, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[j].PubDate)
- return timeone.Unix() > timetwo.Unix()
-}
-func (a ByPubDate) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-
-func GetRSS(name string, url string) (*Rss2, error) {
- rss := &Rss2{}
-
- var netClient = &http.Client{}
-
- customTransport := &(*http.DefaultTransport.(*http.Transport)) // make shallow copy
- timeout := time.Duration(240 * time.Second)
- customTransport = &http.Transport{
- IdleConnTimeout: timeout,
- ResponseHeaderTimeout: timeout,
- DisableKeepAlives: false,
- DisableCompression: false,
- ForceAttemptHTTP2: true,
- TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
- TLSHandshakeTimeout: timeout,
- MaxIdleConns: 20,
- MaxIdleConnsPerHost: 100,
- MaxConnsPerHost: 100,
- }
- netClient = &http.Client{Transport: customTransport, Timeout: timeout}
- request, err := http.NewRequest("GET", url, nil)
- if err != nil {
- return nil, err
- }
- request.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0")
-
- resp, err := netClient.Do(request)
- if err != nil {
- return nil, err
- }
-
- defer resp.Body.Close()
-
- body, err := io.ReadAll(resp.Body)
- if err != nil {
- return nil, err
- }
-
- // Start RSS decoding from file
- if err := xml.Unmarshal(body, rss); err != nil {
- return nil, err
- }
-
- return rss, nil
-}
-
-func ProcessRss(rss Rss2, dbpath string, rssname string) (*SendItems, error) {
- var si SendItems
- db, err := bolt.Open(dbpath, 0600, nil)
- if err != nil {
- log.Fatal(err)
- }
- defer db.Close()
- db.Update(func(tx *bolt.Tx) error {
- _, err := tx.CreateBucketIfNotExists([]byte(rssname))
- if err != nil {
- return err
- }
- return nil
- })
-
- for _, v := range rss.ItemList {
- db.View(func(tx *bolt.Tx) error {
- // Assume bucket exists and has keys
- b := tx.Bucket([]byte(rssname))
- c := b.Cursor()
- flag := false
- for key, _ := c.First(); key != nil; key, _ = c.Next() {
- if v.Link == string(key) {
- flag = true
- break
- }
- }
- if !flag {
- si.ItemList = append(si.ItemList, v)
- }
- return nil
- })
- }
-
- sort.Sort(ByPubDate(si.ItemList))
- if len(si.ItemList) > 0 {
- return &si, nil
- } else {
- return nil, nil
- }
-}
-
-func in_array(val interface{}, array interface{}) (exists bool) {
- exists = false
-
- switch reflect.TypeOf(array).Kind() {
- case reflect.Slice:
- s := reflect.ValueOf(array)
-
- for i := 0; i < s.Len(); i++ {
- if reflect.DeepEqual(val, s.Index(i).Interface()) == true {
- exists = true
- return
- }
- }
- }
-
- return
-}
-
-func NormalizeHTMLforTelegram(s string) (out string) {
-
- tags := []string{"br", "img", "b", "strong", "i", "em", "code", "s", "strike", "del", "u", "pre"}
-
- domDocTest := html.NewTokenizer(strings.NewReader(s))
- previousStartTokenTest := domDocTest.Token()
- flagendtag := false
- for {
- tt := domDocTest.Next()
- if len(out) > 2500 {
- if e := in_array(previousStartTokenTest.Data, tags) && previousStartTokenTest.Data != "img" && previousStartTokenTest.Data != "br" && !flagendtag; e {
- out += fmt.Sprintf("%s> ...", previousStartTokenTest.Data)
- } else {
- out += " ..."
- }
- return
- }
- switch {
- case tt == html.ErrorToken:
- return
- case tt == html.StartTagToken:
- previousStartTokenTest = domDocTest.Token()
- if e := in_array(previousStartTokenTest.Data, tags); e {
- switch {
- case previousStartTokenTest.Data == "br":
- out += "\n"
- case previousStartTokenTest.Data == "img" && previousStartTokenTest.Attr[0].Key == "src":
- out += fmt.Sprintf("%s ", previousStartTokenTest.Attr[0].Val)
- // case previousStartTokenTest.Data == "a" && previousStartTokenTest.Attr[0].Key == "href":
- // out += fmt.Sprintf(" %s ", previousStartTokenTest.Attr[0].Val)
- default:
- out += fmt.Sprintf(" <%s>", previousStartTokenTest.Data)
- }
- flagendtag = false
- }
- case tt == html.EndTagToken:
- t := domDocTest.Token()
- if e := in_array(t.Data, tags); e {
- // switch {
- // case t.Data == "a":
- // out += " "
- // default:
- out += fmt.Sprintf("%s> ", t.Data)
- // }
- flagendtag = true
- }
- case tt == html.SelfClosingTagToken:
- t := domDocTest.Token()
- if e := in_array(t.Data, tags); e {
- if t.Data == "br" {
- out += "\n"
- }
- }
- case tt == html.TextToken:
- if previousStartTokenTest.Data == "script" {
- continue
- }
- TxtContent := strings.TrimSpace(html.UnescapeString(string(domDocTest.Text())))
- if len(TxtContent) > 0 {
- out += TxtContent
- }
- }
- }
-}
-
-func SendAndWriteToDB(send SendItems, dbpath string, rssname string, token string, chatid int64, debug bool) error {
-
- db, err := bolt.Open(dbpath, 0600, nil)
- if err != nil {
- log.Fatal(err)
- }
- defer db.Close()
-
- for i := len(send.ItemList) - 1; i >= 0; i-- {
- v := send.ItemList[i]
- log.Printf("Send to telegram post: %s", v.Title)
- bot, err := tgbotapi.NewBotAPI(token)
- if err != nil {
- log.Panic(err)
- }
- bot.Debug = debug
-
- s := "" + rssname + "\n\n" + "" + string(v.Title) + "\n\n" + NormalizeHTMLforTelegram(html.UnescapeString(string(v.Description))) +
- "\n\n" + v.Link
- msg := tgbotapi.NewMessage(chatid, s)
- msg.ParseMode = "Html"
- _, err = bot.Send(msg)
- if err != nil {
- log.Panic(err)
-
- }
- duration := time.Duration(10) * time.Second
- time.Sleep(duration)
-
- db.Update(func(tx *bolt.Tx) error {
- b, err := tx.CreateBucketIfNotExists([]byte(rssname))
- if err != nil {
- return err
- }
- encoded, err := json.Marshal(v)
- if err != nil {
- return err
- }
- err = b.Put([]byte(v.Link), encoded)
- if err != nil {
- return err
- }
- return nil
- })
- }
- return nil
-}
-
func main() {
var options Options
var parser = flags.NewParser(&options, flags.Default)
diff --git a/process_rss.go b/process_rss.go
new file mode 100644
index 0000000..3284cb4
--- /dev/null
+++ b/process_rss.go
@@ -0,0 +1,139 @@
+package main
+
+import (
+ "crypto/tls"
+ "encoding/xml"
+ "html/template"
+ "io"
+ "log"
+ "net/http"
+ "sort"
+ "time"
+
+ "github.com/boltdb/bolt"
+)
+
+type Rss2 struct {
+ XMLName xml.Name `xml:"rss"`
+ Version string `xml:"version,attr"`
+ // Required
+ Title string `xml:"channel>title"`
+ Link string `xml:"channel>link"`
+ Description string `xml:"channel>description"`
+ // Optional
+ PubDate string `xml:"channel>pubDate"`
+ ItemList []Item `xml:"channel>item"`
+}
+
+type Item struct {
+ // Required
+ Title string `xml:"title"`
+ Link string `xml:"link"`
+ Description template.HTML `xml:"description"`
+ // Optional
+ Content template.HTML `xml:"encoded"`
+ PubDate string `xml:"pubDate"`
+ Comments string `xml:"comments"`
+}
+
+type SendItems struct {
+ ItemList []Item
+}
+
+type ByPubDate []Item
+
+func (a ByPubDate) Len() int { return len(a) }
+func (a ByPubDate) Less(i, j int) bool {
+ timeone, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[i].PubDate)
+ timetwo, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[j].PubDate)
+ return timeone.Unix() > timetwo.Unix()
+}
+func (a ByPubDate) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+
+func GetRSS(name string, url string) (*Rss2, error) {
+ rss := &Rss2{}
+
+ var netClient = &http.Client{}
+
+ customTransport := &(*http.DefaultTransport.(*http.Transport)) // make shallow copy
+ timeout := time.Duration(240 * time.Second)
+ customTransport = &http.Transport{
+ IdleConnTimeout: timeout,
+ ResponseHeaderTimeout: timeout,
+ DisableKeepAlives: false,
+ DisableCompression: false,
+ ForceAttemptHTTP2: true,
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
+ TLSHandshakeTimeout: timeout,
+ MaxIdleConns: 20,
+ MaxIdleConnsPerHost: 100,
+ MaxConnsPerHost: 100,
+ }
+ netClient = &http.Client{Transport: customTransport, Timeout: timeout}
+ request, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ return nil, err
+ }
+ request.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0")
+
+ resp, err := netClient.Do(request)
+ if err != nil {
+ return nil, err
+ }
+
+ defer resp.Body.Close()
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, err
+ }
+
+ // Start RSS decoding from file
+ if err := xml.Unmarshal(body, rss); err != nil {
+ return nil, err
+ }
+
+ return rss, nil
+}
+
+func ProcessRss(rss Rss2, dbpath string, rssname string) (*SendItems, error) {
+ var si SendItems
+ db, err := bolt.Open(dbpath, 0600, nil)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer db.Close()
+ db.Update(func(tx *bolt.Tx) error {
+ _, err := tx.CreateBucketIfNotExists([]byte(rssname))
+ if err != nil {
+ return err
+ }
+ return nil
+ })
+
+ for _, v := range rss.ItemList {
+ db.View(func(tx *bolt.Tx) error {
+ // Assume bucket exists and has keys
+ b := tx.Bucket([]byte(rssname))
+ c := b.Cursor()
+ flag := false
+ for key, _ := c.First(); key != nil; key, _ = c.Next() {
+ if v.Link == string(key) {
+ flag = true
+ break
+ }
+ }
+ if !flag {
+ si.ItemList = append(si.ItemList, v)
+ }
+ return nil
+ })
+ }
+
+ sort.Sort(ByPubDate(si.ItemList))
+ if len(si.ItemList) > 0 {
+ return &si, nil
+ } else {
+ return nil, nil
+ }
+}
diff --git a/send.go b/send.go
new file mode 100644
index 0000000..a510c51
--- /dev/null
+++ b/send.go
@@ -0,0 +1,145 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "log"
+ "reflect"
+ "strings"
+ "time"
+
+ "github.com/boltdb/bolt"
+ tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5"
+ "golang.org/x/net/html"
+)
+
+func in_array(val interface{}, array interface{}) (exists bool) {
+ exists = false
+
+ switch reflect.TypeOf(array).Kind() {
+ case reflect.Slice:
+ s := reflect.ValueOf(array)
+
+ for i := 0; i < s.Len(); i++ {
+ if reflect.DeepEqual(val, s.Index(i).Interface()) == true {
+ exists = true
+ return
+ }
+ }
+ }
+
+ return
+}
+
+func NormalizeHTMLforTelegram(s string) (out string) {
+
+ tags := []string{"br", "img", "b", "strong", "i", "em", "code", "s", "strike", "del", "u", "pre"}
+
+ domDocTest := html.NewTokenizer(strings.NewReader(s))
+ previousStartTokenTest := domDocTest.Token()
+ flagendtag := false
+ for {
+ tt := domDocTest.Next()
+ if len(out) > 2500 {
+ if e := in_array(previousStartTokenTest.Data, tags) && previousStartTokenTest.Data != "img" && previousStartTokenTest.Data != "br" && !flagendtag; e {
+ out += fmt.Sprintf("%s> ...", previousStartTokenTest.Data)
+ } else {
+ out += " ..."
+ }
+ return
+ }
+ switch {
+ case tt == html.ErrorToken:
+ return
+ case tt == html.StartTagToken:
+ previousStartTokenTest = domDocTest.Token()
+ if e := in_array(previousStartTokenTest.Data, tags); e {
+ switch {
+ case previousStartTokenTest.Data == "br":
+ out += "\n"
+ case previousStartTokenTest.Data == "img" && previousStartTokenTest.Attr[0].Key == "src":
+ out += fmt.Sprintf("%s ", previousStartTokenTest.Attr[0].Val)
+ // case previousStartTokenTest.Data == "a" && previousStartTokenTest.Attr[0].Key == "href":
+ // out += fmt.Sprintf(" %s ", previousStartTokenTest.Attr[0].Val)
+ default:
+ out += fmt.Sprintf(" <%s>", previousStartTokenTest.Data)
+ }
+ flagendtag = false
+ }
+ case tt == html.EndTagToken:
+ t := domDocTest.Token()
+ if e := in_array(t.Data, tags); e {
+ // switch {
+ // case t.Data == "a":
+ // out += " "
+ // default:
+ out += fmt.Sprintf("%s> ", t.Data)
+ // }
+ flagendtag = true
+ }
+ case tt == html.SelfClosingTagToken:
+ t := domDocTest.Token()
+ if e := in_array(t.Data, tags); e {
+ if t.Data == "br" {
+ out += "\n"
+ }
+ }
+ case tt == html.TextToken:
+ if previousStartTokenTest.Data == "script" {
+ continue
+ }
+ TxtContent := strings.TrimSpace(html.UnescapeString(string(domDocTest.Text())))
+ if len(TxtContent) > 0 {
+ out += TxtContent
+ }
+ }
+ }
+}
+
+func SendAndWriteToDB(send SendItems, dbpath string, rssname string, token string, chatid int64, debug bool) error {
+
+ db, err := bolt.Open(dbpath, 0600, nil)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer db.Close()
+
+ for i := len(send.ItemList) - 1; i >= 0; i-- {
+ v := send.ItemList[i]
+ log.Printf("Send to telegram post: %s", v.Title)
+ bot, err := tgbotapi.NewBotAPI(token)
+ if err != nil {
+ log.Panic(err)
+ }
+ bot.Debug = debug
+
+ s := "" + rssname + "\n\n" + "" + string(v.Title) + "\n\n" + NormalizeHTMLforTelegram(html.UnescapeString(string(v.Description))) +
+ "\n\n" + v.Link
+ msg := tgbotapi.NewMessage(chatid, s)
+ msg.ParseMode = "Html"
+ _, err = bot.Send(msg)
+ if err != nil {
+ log.Panic(err)
+
+ }
+ duration := time.Duration(10) * time.Second
+ time.Sleep(duration)
+
+ db.Update(func(tx *bolt.Tx) error {
+ b, err := tx.CreateBucketIfNotExists([]byte(rssname))
+ if err != nil {
+ return err
+ }
+ encoded, err := json.Marshal(v)
+ if err != nil {
+ return err
+ }
+ err = b.Put([]byte(v.Link), encoded)
+ if err != nil {
+ return err
+ }
+ return nil
+ })
+ }
+ return nil
+}