Browse Source

Init commit

pull/1/head
commit
1fd61d2087
  1. 18
      .gitignore
  2. 21
      LICENSE
  3. 1
      README.md
  4. 12
      go.mod
  5. 15
      go.sum
  6. 368
      main.go

18
.gitignore vendored

@ -0,0 +1,18 @@ @@ -0,0 +1,18 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, built with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
*.yml
*.db*
rsscollector

21
LICENSE

@ -0,0 +1,21 @@ @@ -0,0 +1,21 @@
MIT License
Copyright (c) 2022 NetMoose
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

1
README.md

@ -0,0 +1 @@ @@ -0,0 +1 @@
# RSSCollector

12
go.mod

@ -0,0 +1,12 @@ @@ -0,0 +1,12 @@
module mod
go 1.17
require (
github.com/boltdb/bolt v1.3.1 // indirect
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 // indirect
github.com/jessevdk/go-flags v1.5.0 // indirect
golang.org/x/net v0.0.0-20220225172249-27dd8689420f // indirect
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
)

15
go.sum

@ -0,0 +1,15 @@ @@ -0,0 +1,15 @@
github.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4=
github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1 h1:wG8n/XJQ07TmjbITcGiUaOtXxdrINDz1b0J1w0SzqDc=
github.com/go-telegram-bot-api/telegram-bot-api/v5 v5.5.1/go.mod h1:A2S0CWkNylc2phvKXWBBdD3K0iGnDBGbzRpISP2zBl8=
github.com/jessevdk/go-flags v1.5.0 h1:1jKYvbxEjfUl0fmqTCOfonvskHHXMjBySTLW4y9LFvc=
github.com/jessevdk/go-flags v1.5.0/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f h1:oA4XRj0qtSt8Yo1Zms0CUlsT3KG69V2UGQWPBxujDmc=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 h1:EZ2mChiOa8udjfp6rRmswTbtZN/QzUQp4ptM4rnjHvc=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

368
main.go

@ -0,0 +1,368 @@ @@ -0,0 +1,368 @@
package main
import (
"crypto/tls"
"encoding/json"
"encoding/xml"
"fmt"
"sort"
"html/template"
"io"
"log"
"net/http"
"os"
"reflect"
"strings"
"time"
"github.com/boltdb/bolt"
tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5"
"github.com/jessevdk/go-flags"
"golang.org/x/net/html"
"gopkg.in/yaml.v2"
)
type RSSItem struct {
Name string `yaml:"name"`
URL string `yaml:"url"`
}
type Config struct {
Dbpath string `yaml:"dbpath"`
Telegram struct {
SendDebug bool `yaml:"senddebug"`
ChatId int64 `yaml:"chatid"`
Token string `yaml:"token"`
} `yaml:"telegram"`
RssList []RSSItem `yaml:"rsslist"`
}
func NewConfig(configPath string) (*Config, error) {
// Create config structure
config := &Config{}
// Open config file
file, err := os.Open(configPath)
if err != nil {
return nil, err
}
defer file.Close()
// Init new YAML decode
d := yaml.NewDecoder(file)
// Start YAML decoding from file
if err := d.Decode(&config); err != nil {
return nil, err
}
return config, nil
}
type Options struct {
ConfigPath string `short:"c" long:"configpath" description:"Config file path"`
}
var ConfigPath = "./config.yml"
type Rss2 struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
// Required
Title string `xml:"channel>title"`
Link string `xml:"channel>link"`
Description string `xml:"channel>description"`
// Optional
PubDate string `xml:"channel>pubDate"`
ItemList []Item `xml:"channel>item"`
}
type Item struct {
// Required
Title string `xml:"title"`
Link string `xml:"link"`
Description template.HTML `xml:"description"`
// Optional
Content template.HTML `xml:"encoded"`
PubDate string `xml:"pubDate"`
Comments string `xml:"comments"`
}
type SendItems struct {
ItemList []Item
}
type ByPubDate []Item
func (a ByPubDate) Len() int { return len(a) }
func (a ByPubDate) Less(i, j int) bool {
timeone, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[i].PubDate)
timetwo, _ := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", a[j].PubDate)
return timeone.Unix() > timetwo.Unix()
}
func (a ByPubDate) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func GetRSS(name string, url string) (*Rss2, error) {
rss := &Rss2{}
var netClient = &http.Client{}
customTransport := &(*http.DefaultTransport.(*http.Transport)) // make shallow copy
timeout := time.Duration(240 * time.Second)
customTransport = &http.Transport{
IdleConnTimeout: timeout,
ResponseHeaderTimeout: timeout,
DisableKeepAlives: false,
DisableCompression: false,
ForceAttemptHTTP2: true,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
TLSHandshakeTimeout: timeout,
MaxIdleConns: 20,
MaxIdleConnsPerHost: 100,
MaxConnsPerHost: 100,
}
netClient = &http.Client{Transport: customTransport, Timeout: timeout}
request, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
request.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0")
resp, err := netClient.Do(request)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
// Start RSS decoding from file
if err := xml.Unmarshal(body, rss); err != nil {
return nil, err
}
return rss, nil
}
func ProcessRss(rss Rss2, dbpath string, rssname string) (*SendItems, error) {
var si SendItems
db, err := bolt.Open(dbpath, 0600, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucketIfNotExists([]byte(rssname))
if err != nil {
return err
}
return nil
})
for _, v := range rss.ItemList {
db.View(func(tx *bolt.Tx) error {
// Assume bucket exists and has keys
b := tx.Bucket([]byte(rssname))
c := b.Cursor()
flag := false
for key, _ := c.First(); key != nil; key, _ = c.Next() {
if v.Link == string(key) {
flag = true
break
}
}
if !flag {
si.ItemList = append(si.ItemList, v)
}
return nil
})
}
sort.Sort(ByPubDate(si.ItemList))
if len(si.ItemList) > 0 {
return &si, nil
} else {
return nil, nil
}
}
func in_array(val interface{}, array interface{}) (exists bool) {
exists = false
switch reflect.TypeOf(array).Kind() {
case reflect.Slice:
s := reflect.ValueOf(array)
for i := 0; i < s.Len(); i++ {
if reflect.DeepEqual(val, s.Index(i).Interface()) == true {
exists = true
return
}
}
}
return
}
func NormalizeHTMLforTelegram(s string) (out string) {
tags := []string{"br", "img", "b", "strong", "i", "em", "code", "s", "strike", "del", "u", "pre"}
domDocTest := html.NewTokenizer(strings.NewReader(s))
previousStartTokenTest := domDocTest.Token()
for {
tt := domDocTest.Next()
if len(out) > 2500 {
if e := in_array(previousStartTokenTest.Data, tags) && previousStartTokenTest.Data != "img" && previousStartTokenTest.Data != "br"; e {
out += fmt.Sprintf("</%s> ...", previousStartTokenTest.Data)
} else {
out += " ..."
}
return
}
switch {
case tt == html.ErrorToken:
return
case tt == html.StartTagToken:
previousStartTokenTest = domDocTest.Token()
if e := in_array(previousStartTokenTest.Data, tags); e {
switch {
case previousStartTokenTest.Data == "br":
out += "\n"
case previousStartTokenTest.Data == "img" && previousStartTokenTest.Attr[0].Key == "src":
out += fmt.Sprintf("%s ", previousStartTokenTest.Attr[0].Val)
// case previousStartTokenTest.Data == "a" && previousStartTokenTest.Attr[0].Key == "href":
// out += fmt.Sprintf(" %s ", previousStartTokenTest.Attr[0].Val)
default:
out += fmt.Sprintf(" <%s>", previousStartTokenTest.Data)
}
}
case tt == html.EndTagToken:
t := domDocTest.Token()
if e := in_array(t.Data, tags); e {
// switch {
// case t.Data == "a":
// out += " "
// default:
out += fmt.Sprintf("</%s> ", t.Data)
// }
}
case tt == html.SelfClosingTagToken:
t := domDocTest.Token()
if e := in_array(t.Data, tags); e {
if t.Data == "br" {
out += "\n"
}
}
case tt == html.TextToken:
if previousStartTokenTest.Data == "script" {
continue
}
TxtContent := strings.TrimSpace(html.UnescapeString(string(domDocTest.Text())))
if len(TxtContent) > 0 {
out += TxtContent
}
}
}
}
func SendAndWriteToDB(send SendItems, dbpath string, rssname string, token string, chatid int64, debug bool) error {
db, err := bolt.Open(dbpath, 0600, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
for i := len(send.ItemList) - 1; i >= 0; i-- {
v := send.ItemList[i]
log.Printf("Send to telegram post: %s", v.Title)
bot, err := tgbotapi.NewBotAPI(token)
if err != nil {
log.Panic(err)
}
bot.Debug = debug
s := "<i>" + rssname + "</i>\n\n" + "<b>" + string(v.Title) + "</b>\n\n" + NormalizeHTMLforTelegram(html.UnescapeString(string(v.Description))) +
"\n\n" + v.Link
msg := tgbotapi.NewMessage(chatid, s)
msg.ParseMode = "Html"
_, err = bot.Send(msg)
if err != nil {
log.Panic(err)
}
duration := time.Duration(10) * time.Second
time.Sleep(duration)
db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucketIfNotExists([]byte(rssname))
if err != nil {
return err
}
encoded, err := json.Marshal(v)
if err != nil {
return err
}
err = b.Put([]byte(v.Link), encoded)
if err != nil {
return err
}
return nil
})
}
return nil
}
func main() {
var options Options
var parser = flags.NewParser(&options, flags.Default)
if _, err := parser.Parse(); err != nil {
switch flagsErr := err.(type) {
case flags.ErrorType:
if flagsErr == flags.ErrHelp {
os.Exit(0)
}
os.Exit(1)
default:
os.Exit(1)
}
}
log.Println("Flags processed")
if options.ConfigPath != "" {
ConfigPath = options.ConfigPath
}
// Get config
log.Printf("Config file: %s\n", ConfigPath)
cfg, err := NewConfig(ConfigPath)
if err != nil {
log.Fatal(err)
}
log.Println("Start to send")
for _, v := range cfg.RssList {
log.Printf("Feed: %s, URL: %s\n", v.Name, v.URL)
rss, err := GetRSS(v.Name, v.URL)
if err != nil {
log.Fatalln(err)
}
send, err := ProcessRss(*rss, cfg.Dbpath, v.Name)
if err != nil {
log.Fatal(err)
}
if nil != send && len(send.ItemList) > 0 {
if err := SendAndWriteToDB(*send, cfg.Dbpath, v.Name, cfg.Telegram.Token, cfg.Telegram.ChatId, cfg.Telegram.SendDebug); err != nil {
log.Fatal(err)
}
} else {
log.Println("Nothing to send")
}
}
log.Println("Stop to send")
}
Loading…
Cancel
Save