From 1fd61d20872b31107c6df9f2ce5c2150ddaf823e Mon Sep 17 00:00:00 2001 From: "Andrey Ivanov (NetMoose)" Date: Sat, 12 Mar 2022 21:10:23 +0500 Subject: [PATCH] Init commit --- .gitignore | 18 +++ LICENSE | 21 +++ README.md | 1 + go.mod | 12 ++ go.sum | 15 +++ main.go | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 435 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d6cff4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# Dependency directories (remove the comment below to include it) +# vendor/ +*.yml +*.db* +rsscollector \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ee8f6fa --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022 NetMoose + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7136f02 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# RSSCollector \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c09cc5d --- /dev/null +++ b/go.mod @@ -0,0 +1,12 @@ +module mod + +go 1.17 + +require ( + github.com/boltdb/bolt v1.3.1 // indirect + github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 // indirect + github.com/jessevdk/go-flags v1.5.0 // indirect + golang.org/x/net v0.0.0-20220225172249-27dd8689420f // indirect + golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..815cb61 --- /dev/null +++ b/go.sum @@ -0,0 +1,15 @@ +github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4= +github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= +github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 h1:wG8n/XJQ07TmjbITcGiUaOtXxdrINDz1b0J1w0SzqDc= +github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1/go.mod h1:A2S0CWkNylc2phvKXWBBdD3K0iGnDBGbzRpISP2zBl8= +github.com/jessevdk/go-flags v1.5.0 h1:1jKYvbxEjfUl0fmqTCOfonvskHHXMjBySTLW4y9LFvc= +github.com/jessevdk/go-flags v1.5.0/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4= +golang.org/x/net v0.0.0-20220225172249-27dd8689420f h1:oA4XRj0qtSt8Yo1Zms0CUlsT3KG69V2UGQWPBxujDmc= +golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 h1:EZ2mChiOa8udjfp6rRmswTbtZN/QzUQp4ptM4rnjHvc= +golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM= +golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/main.go b/main.go new file mode 100644 index 0000000..493eb76 --- /dev/null +++ b/main.go @@ -0,0 +1,368 @@ +package main + +import ( + "crypto/tls" + "encoding/json" + "encoding/xml" + "fmt" + "sort" + + "html/template" + "io" + "log" + "net/http" + "os" + "reflect" + "strings" + "time" + + "github.com/boltdb/bolt" + tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" + "github.com/jessevdk/go-flags" + "golang.org/x/net/html" + "gopkg.in/yaml.v2" +) + +type RSSItem struct { + Name string `yaml:"name"` + URL string `yaml:"url"` +} + +type Config struct { + Dbpath string `yaml:"dbpath"` + Telegram struct { + SendDebug bool `yaml:"senddebug"` + ChatId int64 `yaml:"chatid"` + Token string `yaml:"token"` + } `yaml:"telegram"` + RssList []RSSItem `yaml:"rsslist"` +} + +func NewConfig(configPath string) (*Config, error) { + // Create config structure + config := &Config{} + + // Open config file + file, err := os.Open(configPath) + if err != nil { + return nil, err + } + defer file.Close() + + // Init new YAML decode + d := yaml.NewDecoder(file) + + // Start YAML decoding from file + if err := d.Decode(&config); err != nil { + return nil, err + } + + return config, nil +} + +type Options struct { + ConfigPath string `short:"c" long:"configpath" description:"Config file path"` +} + +var ConfigPath = "./config.yml" + +type Rss2 struct { + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + // Required + Title string `xml:"channel>title"` + Link string `xml:"channel>link"` + Description string `xml:"channel>description"` + // Optional + PubDate string `xml:"channel>pubDate"` + ItemList []Item `xml:"channel>item"` +} + +type Item struct { + // Required + Title string `xml:"title"` + Link string `xml:"link"` + Description template.HTML `xml:"description"` + // Optional + Content template.HTML `xml:"encoded"` + PubDate string `xml:"pubDate"` + Comments string `xml:"comments"` +} + +type SendItems struct { + ItemList []Item +} + +type ByPubDate []Item + +func (a ByPubDate) Len() int { return len(a) } +func (a ByPubDate) Less(i, j int) bool { + timeone, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[i].PubDate) + timetwo, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[j].PubDate) + return timeone.Unix() > timetwo.Unix() +} +func (a ByPubDate) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +func GetRSS(name string, url string) (*Rss2, error) { + rss := &Rss2{} + + var netClient = &http.Client{} + + customTransport := &(*http.DefaultTransport.(*http.Transport)) // make shallow copy + timeout := time.Duration(240 * time.Second) + customTransport = &http.Transport{ + IdleConnTimeout: timeout, + ResponseHeaderTimeout: timeout, + DisableKeepAlives: false, + DisableCompression: false, + ForceAttemptHTTP2: true, + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + TLSHandshakeTimeout: timeout, + MaxIdleConns: 20, + MaxIdleConnsPerHost: 100, + MaxConnsPerHost: 100, + } + netClient = &http.Client{Transport: customTransport, Timeout: timeout} + request, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + request.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0") + + resp, err := netClient.Do(request) + if err != nil { + return nil, err + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + // Start RSS decoding from file + if err := xml.Unmarshal(body, rss); err != nil { + return nil, err + } + + return rss, nil +} + +func ProcessRss(rss Rss2, dbpath string, rssname string) (*SendItems, error) { + var si SendItems + db, err := bolt.Open(dbpath, 0600, nil) + if err != nil { + log.Fatal(err) + } + defer db.Close() + db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucketIfNotExists([]byte(rssname)) + if err != nil { + return err + } + return nil + }) + + for _, v := range rss.ItemList { + db.View(func(tx *bolt.Tx) error { + // Assume bucket exists and has keys + b := tx.Bucket([]byte(rssname)) + c := b.Cursor() + flag := false + for key, _ := c.First(); key != nil; key, _ = c.Next() { + if v.Link == string(key) { + flag = true + break + } + } + if !flag { + si.ItemList = append(si.ItemList, v) + } + return nil + }) + } + + sort.Sort(ByPubDate(si.ItemList)) + if len(si.ItemList) > 0 { + return &si, nil + } else { + return nil, nil + } +} + +func in_array(val interface{}, array interface{}) (exists bool) { + exists = false + + switch reflect.TypeOf(array).Kind() { + case reflect.Slice: + s := reflect.ValueOf(array) + + for i := 0; i < s.Len(); i++ { + if reflect.DeepEqual(val, s.Index(i).Interface()) == true { + exists = true + return + } + } + } + + return +} + +func NormalizeHTMLforTelegram(s string) (out string) { + + tags := []string{"br", "img", "b", "strong", "i", "em", "code", "s", "strike", "del", "u", "pre"} + + domDocTest := html.NewTokenizer(strings.NewReader(s)) + previousStartTokenTest := domDocTest.Token() + for { + tt := domDocTest.Next() + if len(out) > 2500 { + if e := in_array(previousStartTokenTest.Data, tags) && previousStartTokenTest.Data != "img" && previousStartTokenTest.Data != "br"; e { + out += fmt.Sprintf(" ...", previousStartTokenTest.Data) + } else { + out += " ..." + } + return + } + switch { + case tt == html.ErrorToken: + return + case tt == html.StartTagToken: + previousStartTokenTest = domDocTest.Token() + if e := in_array(previousStartTokenTest.Data, tags); e { + switch { + case previousStartTokenTest.Data == "br": + out += "\n" + case previousStartTokenTest.Data == "img" && previousStartTokenTest.Attr[0].Key == "src": + out += fmt.Sprintf("%s ", previousStartTokenTest.Attr[0].Val) + // case previousStartTokenTest.Data == "a" && previousStartTokenTest.Attr[0].Key == "href": + // out += fmt.Sprintf(" %s ", previousStartTokenTest.Attr[0].Val) + default: + out += fmt.Sprintf(" <%s>", previousStartTokenTest.Data) + } + } + case tt == html.EndTagToken: + t := domDocTest.Token() + if e := in_array(t.Data, tags); e { + // switch { + // case t.Data == "a": + // out += " " + // default: + out += fmt.Sprintf(" ", t.Data) + // } + } + case tt == html.SelfClosingTagToken: + t := domDocTest.Token() + if e := in_array(t.Data, tags); e { + if t.Data == "br" { + out += "\n" + } + } + case tt == html.TextToken: + if previousStartTokenTest.Data == "script" { + continue + } + TxtContent := strings.TrimSpace(html.UnescapeString(string(domDocTest.Text()))) + if len(TxtContent) > 0 { + out += TxtContent + } + } + } +} + +func SendAndWriteToDB(send SendItems, dbpath string, rssname string, token string, chatid int64, debug bool) error { + + db, err := bolt.Open(dbpath, 0600, nil) + if err != nil { + log.Fatal(err) + } + defer db.Close() + + for i := len(send.ItemList) - 1; i >= 0; i-- { + v := send.ItemList[i] + log.Printf("Send to telegram post: %s", v.Title) + bot, err := tgbotapi.NewBotAPI(token) + if err != nil { + log.Panic(err) + } + bot.Debug = debug + + s := "" + rssname + "\n\n" + "" + string(v.Title) + "\n\n" + NormalizeHTMLforTelegram(html.UnescapeString(string(v.Description))) + + "\n\n" + v.Link + msg := tgbotapi.NewMessage(chatid, s) + msg.ParseMode = "Html" + _, err = bot.Send(msg) + if err != nil { + log.Panic(err) + + } + duration := time.Duration(10) * time.Second + time.Sleep(duration) + + db.Update(func(tx *bolt.Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte(rssname)) + if err != nil { + return err + } + encoded, err := json.Marshal(v) + if err != nil { + return err + } + err = b.Put([]byte(v.Link), encoded) + if err != nil { + return err + } + return nil + }) + } + return nil +} + +func main() { + var options Options + var parser = flags.NewParser(&options, flags.Default) + if _, err := parser.Parse(); err != nil { + switch flagsErr := err.(type) { + case flags.ErrorType: + if flagsErr == flags.ErrHelp { + os.Exit(0) + } + os.Exit(1) + default: + os.Exit(1) + } + } + log.Println("Flags processed") + + if options.ConfigPath != "" { + ConfigPath = options.ConfigPath + } + // Get config + log.Printf("Config file: %s\n", ConfigPath) + cfg, err := NewConfig(ConfigPath) + if err != nil { + log.Fatal(err) + } + + log.Println("Start to send") + for _, v := range cfg.RssList { + log.Printf("Feed: %s, URL: %s\n", v.Name, v.URL) + rss, err := GetRSS(v.Name, v.URL) + if err != nil { + log.Fatalln(err) + } + send, err := ProcessRss(*rss, cfg.Dbpath, v.Name) + if err != nil { + log.Fatal(err) + } + if nil != send && len(send.ItemList) > 0 { + if err := SendAndWriteToDB(*send, cfg.Dbpath, v.Name, cfg.Telegram.Token, cfg.Telegram.ChatId, cfg.Telegram.SendDebug); err != nil { + log.Fatal(err) + } + } else { + log.Println("Nothing to send") + } + } + log.Println("Stop to send") +}