From 9855a89fbe783f9659eb0129223af57170d028ab Mon Sep 17 00:00:00 2001 From: "Andrey Ivanov (NetMoose)" Date: Sun, 10 Apr 2022 14:44:43 +0500 Subject: [PATCH] Split project --- main.go | 271 ------------------------------------------------- process_rss.go | 139 +++++++++++++++++++++++++ send.go | 145 ++++++++++++++++++++++++++ 3 files changed, 284 insertions(+), 271 deletions(-) create mode 100644 process_rss.go create mode 100644 send.go diff --git a/main.go b/main.go index 2414672..2594803 100644 --- a/main.go +++ b/main.go @@ -1,25 +1,10 @@ package main import ( - "crypto/tls" - "encoding/json" - "encoding/xml" - "fmt" - "sort" - - "html/template" - "io" "log" - "net/http" "os" - "reflect" - "strings" - "time" - "github.com/boltdb/bolt" - tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" "github.com/jessevdk/go-flags" - "golang.org/x/net/html" "gopkg.in/yaml.v2" ) @@ -66,262 +51,6 @@ type Options struct { var ConfigPath = "./config.yml" -type Rss2 struct { - XMLName xml.Name `xml:"rss"` - Version string `xml:"version,attr"` - // Required - Title string `xml:"channel>title"` - Link string `xml:"channel>link"` - Description string `xml:"channel>description"` - // Optional - PubDate string `xml:"channel>pubDate"` - ItemList []Item `xml:"channel>item"` -} - -type Item struct { - // Required - Title string `xml:"title"` - Link string `xml:"link"` - Description template.HTML `xml:"description"` - // Optional - Content template.HTML `xml:"encoded"` - PubDate string `xml:"pubDate"` - Comments string `xml:"comments"` -} - -type SendItems struct { - ItemList []Item -} - -type ByPubDate []Item - -func (a ByPubDate) Len() int { return len(a) } -func (a ByPubDate) Less(i, j int) bool { - timeone, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[i].PubDate) - timetwo, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[j].PubDate) - return timeone.Unix() > timetwo.Unix() -} -func (a ByPubDate) Swap(i, j int) { a[i], a[j] = a[j], a[i] } - -func GetRSS(name string, url string) (*Rss2, error) { - rss := &Rss2{} - - var netClient = &http.Client{} - - customTransport := &(*http.DefaultTransport.(*http.Transport)) // make shallow copy - timeout := time.Duration(240 * time.Second) - customTransport = &http.Transport{ - IdleConnTimeout: timeout, - ResponseHeaderTimeout: timeout, - DisableKeepAlives: false, - DisableCompression: false, - ForceAttemptHTTP2: true, - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - TLSHandshakeTimeout: timeout, - MaxIdleConns: 20, - MaxIdleConnsPerHost: 100, - MaxConnsPerHost: 100, - } - netClient = &http.Client{Transport: customTransport, Timeout: timeout} - request, err := http.NewRequest("GET", url, nil) - if err != nil { - return nil, err - } - request.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0") - - resp, err := netClient.Do(request) - if err != nil { - return nil, err - } - - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, err - } - - // Start RSS decoding from file - if err := xml.Unmarshal(body, rss); err != nil { - return nil, err - } - - return rss, nil -} - -func ProcessRss(rss Rss2, dbpath string, rssname string) (*SendItems, error) { - var si SendItems - db, err := bolt.Open(dbpath, 0600, nil) - if err != nil { - log.Fatal(err) - } - defer db.Close() - db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucketIfNotExists([]byte(rssname)) - if err != nil { - return err - } - return nil - }) - - for _, v := range rss.ItemList { - db.View(func(tx *bolt.Tx) error { - // Assume bucket exists and has keys - b := tx.Bucket([]byte(rssname)) - c := b.Cursor() - flag := false - for key, _ := c.First(); key != nil; key, _ = c.Next() { - if v.Link == string(key) { - flag = true - break - } - } - if !flag { - si.ItemList = append(si.ItemList, v) - } - return nil - }) - } - - sort.Sort(ByPubDate(si.ItemList)) - if len(si.ItemList) > 0 { - return &si, nil - } else { - return nil, nil - } -} - -func in_array(val interface{}, array interface{}) (exists bool) { - exists = false - - switch reflect.TypeOf(array).Kind() { - case reflect.Slice: - s := reflect.ValueOf(array) - - for i := 0; i < s.Len(); i++ { - if reflect.DeepEqual(val, s.Index(i).Interface()) == true { - exists = true - return - } - } - } - - return -} - -func NormalizeHTMLforTelegram(s string) (out string) { - - tags := []string{"br", "img", "b", "strong", "i", "em", "code", "s", "strike", "del", "u", "pre"} - - domDocTest := html.NewTokenizer(strings.NewReader(s)) - previousStartTokenTest := domDocTest.Token() - flagendtag := false - for { - tt := domDocTest.Next() - if len(out) > 2500 { - if e := in_array(previousStartTokenTest.Data, tags) && previousStartTokenTest.Data != "img" && previousStartTokenTest.Data != "br" && !flagendtag; e { - out += fmt.Sprintf(" ...", previousStartTokenTest.Data) - } else { - out += " ..." - } - return - } - switch { - case tt == html.ErrorToken: - return - case tt == html.StartTagToken: - previousStartTokenTest = domDocTest.Token() - if e := in_array(previousStartTokenTest.Data, tags); e { - switch { - case previousStartTokenTest.Data == "br": - out += "\n" - case previousStartTokenTest.Data == "img" && previousStartTokenTest.Attr[0].Key == "src": - out += fmt.Sprintf("%s ", previousStartTokenTest.Attr[0].Val) - // case previousStartTokenTest.Data == "a" && previousStartTokenTest.Attr[0].Key == "href": - // out += fmt.Sprintf(" %s ", previousStartTokenTest.Attr[0].Val) - default: - out += fmt.Sprintf(" <%s>", previousStartTokenTest.Data) - } - flagendtag = false - } - case tt == html.EndTagToken: - t := domDocTest.Token() - if e := in_array(t.Data, tags); e { - // switch { - // case t.Data == "a": - // out += " " - // default: - out += fmt.Sprintf(" ", t.Data) - // } - flagendtag = true - } - case tt == html.SelfClosingTagToken: - t := domDocTest.Token() - if e := in_array(t.Data, tags); e { - if t.Data == "br" { - out += "\n" - } - } - case tt == html.TextToken: - if previousStartTokenTest.Data == "script" { - continue - } - TxtContent := strings.TrimSpace(html.UnescapeString(string(domDocTest.Text()))) - if len(TxtContent) > 0 { - out += TxtContent - } - } - } -} - -func SendAndWriteToDB(send SendItems, dbpath string, rssname string, token string, chatid int64, debug bool) error { - - db, err := bolt.Open(dbpath, 0600, nil) - if err != nil { - log.Fatal(err) - } - defer db.Close() - - for i := len(send.ItemList) - 1; i >= 0; i-- { - v := send.ItemList[i] - log.Printf("Send to telegram post: %s", v.Title) - bot, err := tgbotapi.NewBotAPI(token) - if err != nil { - log.Panic(err) - } - bot.Debug = debug - - s := "" + rssname + "\n\n" + "" + string(v.Title) + "\n\n" + NormalizeHTMLforTelegram(html.UnescapeString(string(v.Description))) + - "\n\n" + v.Link - msg := tgbotapi.NewMessage(chatid, s) - msg.ParseMode = "Html" - _, err = bot.Send(msg) - if err != nil { - log.Panic(err) - - } - duration := time.Duration(10) * time.Second - time.Sleep(duration) - - db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucketIfNotExists([]byte(rssname)) - if err != nil { - return err - } - encoded, err := json.Marshal(v) - if err != nil { - return err - } - err = b.Put([]byte(v.Link), encoded) - if err != nil { - return err - } - return nil - }) - } - return nil -} - func main() { var options Options var parser = flags.NewParser(&options, flags.Default) diff --git a/process_rss.go b/process_rss.go new file mode 100644 index 0000000..3284cb4 --- /dev/null +++ b/process_rss.go @@ -0,0 +1,139 @@ +package main + +import ( + "crypto/tls" + "encoding/xml" + "html/template" + "io" + "log" + "net/http" + "sort" + "time" + + "github.com/boltdb/bolt" +) + +type Rss2 struct { + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + // Required + Title string `xml:"channel>title"` + Link string `xml:"channel>link"` + Description string `xml:"channel>description"` + // Optional + PubDate string `xml:"channel>pubDate"` + ItemList []Item `xml:"channel>item"` +} + +type Item struct { + // Required + Title string `xml:"title"` + Link string `xml:"link"` + Description template.HTML `xml:"description"` + // Optional + Content template.HTML `xml:"encoded"` + PubDate string `xml:"pubDate"` + Comments string `xml:"comments"` +} + +type SendItems struct { + ItemList []Item +} + +type ByPubDate []Item + +func (a ByPubDate) Len() int { return len(a) } +func (a ByPubDate) Less(i, j int) bool { + timeone, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[i].PubDate) + timetwo, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[j].PubDate) + return timeone.Unix() > timetwo.Unix() +} +func (a ByPubDate) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func GetRSS(name string, url string) (*Rss2, error) { + rss := &Rss2{} + + var netClient = &http.Client{} + + customTransport := &(*http.DefaultTransport.(*http.Transport)) // make shallow copy + timeout := time.Duration(240 * time.Second) + customTransport = &http.Transport{ + IdleConnTimeout: timeout, + ResponseHeaderTimeout: timeout, + DisableKeepAlives: false, + DisableCompression: false, + ForceAttemptHTTP2: true, + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + TLSHandshakeTimeout: timeout, + MaxIdleConns: 20, + MaxIdleConnsPerHost: 100, + MaxConnsPerHost: 100, + } + netClient = &http.Client{Transport: customTransport, Timeout: timeout} + request, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + request.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0") + + resp, err := netClient.Do(request) + if err != nil { + return nil, err + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + // Start RSS decoding from file + if err := xml.Unmarshal(body, rss); err != nil { + return nil, err + } + + return rss, nil +} + +func ProcessRss(rss Rss2, dbpath string, rssname string) (*SendItems, error) { + var si SendItems + db, err := bolt.Open(dbpath, 0600, nil) + if err != nil { + log.Fatal(err) + } + defer db.Close() + db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucketIfNotExists([]byte(rssname)) + if err != nil { + return err + } + return nil + }) + + for _, v := range rss.ItemList { + db.View(func(tx *bolt.Tx) error { + // Assume bucket exists and has keys + b := tx.Bucket([]byte(rssname)) + c := b.Cursor() + flag := false + for key, _ := c.First(); key != nil; key, _ = c.Next() { + if v.Link == string(key) { + flag = true + break + } + } + if !flag { + si.ItemList = append(si.ItemList, v) + } + return nil + }) + } + + sort.Sort(ByPubDate(si.ItemList)) + if len(si.ItemList) > 0 { + return &si, nil + } else { + return nil, nil + } +} diff --git a/send.go b/send.go new file mode 100644 index 0000000..a510c51 --- /dev/null +++ b/send.go @@ -0,0 +1,145 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "reflect" + "strings" + "time" + + "github.com/boltdb/bolt" + tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" + "golang.org/x/net/html" +) + +func in_array(val interface{}, array interface{}) (exists bool) { + exists = false + + switch reflect.TypeOf(array).Kind() { + case reflect.Slice: + s := reflect.ValueOf(array) + + for i := 0; i < s.Len(); i++ { + if reflect.DeepEqual(val, s.Index(i).Interface()) == true { + exists = true + return + } + } + } + + return +} + +func NormalizeHTMLforTelegram(s string) (out string) { + + tags := []string{"br", "img", "b", "strong", "i", "em", "code", "s", "strike", "del", "u", "pre"} + + domDocTest := html.NewTokenizer(strings.NewReader(s)) + previousStartTokenTest := domDocTest.Token() + flagendtag := false + for { + tt := domDocTest.Next() + if len(out) > 2500 { + if e := in_array(previousStartTokenTest.Data, tags) && previousStartTokenTest.Data != "img" && previousStartTokenTest.Data != "br" && !flagendtag; e { + out += fmt.Sprintf(" ...", previousStartTokenTest.Data) + } else { + out += " ..." + } + return + } + switch { + case tt == html.ErrorToken: + return + case tt == html.StartTagToken: + previousStartTokenTest = domDocTest.Token() + if e := in_array(previousStartTokenTest.Data, tags); e { + switch { + case previousStartTokenTest.Data == "br": + out += "\n" + case previousStartTokenTest.Data == "img" && previousStartTokenTest.Attr[0].Key == "src": + out += fmt.Sprintf("%s ", previousStartTokenTest.Attr[0].Val) + // case previousStartTokenTest.Data == "a" && previousStartTokenTest.Attr[0].Key == "href": + // out += fmt.Sprintf(" %s ", previousStartTokenTest.Attr[0].Val) + default: + out += fmt.Sprintf(" <%s>", previousStartTokenTest.Data) + } + flagendtag = false + } + case tt == html.EndTagToken: + t := domDocTest.Token() + if e := in_array(t.Data, tags); e { + // switch { + // case t.Data == "a": + // out += " " + // default: + out += fmt.Sprintf(" ", t.Data) + // } + flagendtag = true + } + case tt == html.SelfClosingTagToken: + t := domDocTest.Token() + if e := in_array(t.Data, tags); e { + if t.Data == "br" { + out += "\n" + } + } + case tt == html.TextToken: + if previousStartTokenTest.Data == "script" { + continue + } + TxtContent := strings.TrimSpace(html.UnescapeString(string(domDocTest.Text()))) + if len(TxtContent) > 0 { + out += TxtContent + } + } + } +} + +func SendAndWriteToDB(send SendItems, dbpath string, rssname string, token string, chatid int64, debug bool) error { + + db, err := bolt.Open(dbpath, 0600, nil) + if err != nil { + log.Fatal(err) + } + defer db.Close() + + for i := len(send.ItemList) - 1; i >= 0; i-- { + v := send.ItemList[i] + log.Printf("Send to telegram post: %s", v.Title) + bot, err := tgbotapi.NewBotAPI(token) + if err != nil { + log.Panic(err) + } + bot.Debug = debug + + s := "" + rssname + "\n\n" + "" + string(v.Title) + "\n\n" + NormalizeHTMLforTelegram(html.UnescapeString(string(v.Description))) + + "\n\n" + v.Link + msg := tgbotapi.NewMessage(chatid, s) + msg.ParseMode = "Html" + _, err = bot.Send(msg) + if err != nil { + log.Panic(err) + + } + duration := time.Duration(10) * time.Second + time.Sleep(duration) + + db.Update(func(tx *bolt.Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte(rssname)) + if err != nil { + return err + } + encoded, err := json.Marshal(v) + if err != nil { + return err + } + err = b.Put([]byte(v.Link), encoded) + if err != nil { + return err + } + return nil + }) + } + return nil +}