2015-06-01 11:57:58 +00:00
|
|
|
//Package rss provides a Simple RSS parser, tested with various feeds.
|
2012-03-24 20:11:38 +00:00
|
|
|
package rss
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/xml"
|
|
|
|
"net/http"
|
|
|
|
"time"
|
2018-07-29 21:41:17 +00:00
|
|
|
"crypto/tls"
|
2013-12-17 16:44:03 +00:00
|
|
|
|
2015-11-10 18:15:02 +00:00
|
|
|
"github.com/paulrosania/go-charset/charset"
|
|
|
|
_ "github.com/paulrosania/go-charset/data" //initialize only
|
2012-03-24 20:11:38 +00:00
|
|
|
)
|
|
|
|
|
2014-02-28 00:40:59 +00:00
|
|
|
const (
|
|
|
|
wordpressDateFormat = "Mon, 02 Jan 2006 15:04:05 -0700"
|
|
|
|
)
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//Fetcher interface
|
2014-02-27 23:15:37 +00:00
|
|
|
type Fetcher interface {
|
|
|
|
Get(url string) (resp *http.Response, err error)
|
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//Channel struct for RSS
|
2012-03-28 15:35:35 +00:00
|
|
|
type Channel struct {
|
|
|
|
Title string `xml:"title"`
|
|
|
|
Link string `xml:"link"`
|
|
|
|
Description string `xml:"description"`
|
|
|
|
Language string `xml:"language"`
|
|
|
|
LastBuildDate Date `xml:"lastBuildDate"`
|
|
|
|
Item []Item `xml:"item"`
|
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//ItemEnclosure struct for each Item Enclosure
|
2012-03-28 15:35:35 +00:00
|
|
|
type ItemEnclosure struct {
|
|
|
|
URL string `xml:"url,attr"`
|
|
|
|
Type string `xml:"type,attr"`
|
2012-03-24 20:11:38 +00:00
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//Item struct for each Item in the Channel
|
2012-03-24 20:11:38 +00:00
|
|
|
type Item struct {
|
2015-06-01 10:03:38 +00:00
|
|
|
Title string `xml:"title"`
|
|
|
|
Link string `xml:"link"`
|
|
|
|
Comments string `xml:"comments"`
|
|
|
|
PubDate Date `xml:"pubDate"`
|
|
|
|
GUID string `xml:"guid"`
|
|
|
|
Category []string `xml:"category"`
|
|
|
|
Enclosure []ItemEnclosure `xml:"enclosure"`
|
|
|
|
Description string `xml:"description"`
|
2019-03-10 01:32:51 +00:00
|
|
|
Author string `xml:"author"`
|
2015-06-01 10:03:38 +00:00
|
|
|
Content string `xml:"content"`
|
2017-04-27 00:31:58 +00:00
|
|
|
FullText string `xml:"full-text"`
|
2012-03-24 20:11:38 +00:00
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//Date type
|
2012-03-28 15:35:35 +00:00
|
|
|
type Date string
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//Parse (Date function) and returns Time, error
|
|
|
|
func (d Date) Parse() (time.Time, error) {
|
|
|
|
t, err := d.ParseWithFormat(wordpressDateFormat)
|
2012-03-24 20:11:38 +00:00
|
|
|
if err != nil {
|
2015-06-01 11:57:58 +00:00
|
|
|
t, err = d.ParseWithFormat(time.RFC822) // RSS 2.0 spec
|
2019-03-10 01:32:42 +00:00
|
|
|
if err != nil {
|
|
|
|
t, err = d.ParseWithFormat(time.RFC3339) // Atom
|
|
|
|
}
|
2012-03-24 20:11:38 +00:00
|
|
|
}
|
|
|
|
return t, err
|
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//ParseWithFormat (Date function), takes a string and returns Time, error
|
|
|
|
func (d Date) ParseWithFormat(format string) (time.Time, error) {
|
|
|
|
return time.Parse(format, string(d))
|
2014-02-28 00:40:59 +00:00
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//Format (Date function), takes a string and returns string, error
|
|
|
|
func (d Date) Format(format string) (string, error) {
|
|
|
|
t, err := d.Parse()
|
2012-03-24 20:11:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return t.Format(format), nil
|
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//MustFormat (Date function), take a string and returns string
|
|
|
|
func (d Date) MustFormat(format string) string {
|
|
|
|
s, err := d.Format(format)
|
2012-03-24 20:11:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return err.Error()
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//Read a string url and returns a Channel struct, error
|
2012-03-28 15:35:35 +00:00
|
|
|
func Read(url string) (*Channel, error) {
|
2014-02-27 00:57:03 +00:00
|
|
|
return ReadWithClient(url, http.DefaultClient)
|
|
|
|
}
|
|
|
|
|
2019-03-14 02:47:11 +00:00
|
|
|
//InsecureRead reads without certificate check
|
2018-07-29 21:41:17 +00:00
|
|
|
func InsecureRead(url string) (*Channel, error) {
|
|
|
|
tr := &http.Transport{
|
|
|
|
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
|
|
|
}
|
|
|
|
client := &http.Client{Transport: tr}
|
|
|
|
|
|
|
|
return ReadWithClient(url, client)
|
|
|
|
}
|
|
|
|
|
2015-06-01 11:57:58 +00:00
|
|
|
//ReadWithClient a string url and custom client that must match the Fetcher interface
|
|
|
|
//returns a Channel struct, error
|
2014-02-27 23:15:37 +00:00
|
|
|
func ReadWithClient(url string, client Fetcher) (*Channel, error) {
|
2014-02-27 00:57:03 +00:00
|
|
|
response, err := client.Get(url)
|
2012-03-24 20:11:38 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer response.Body.Close()
|
2013-03-14 23:25:13 +00:00
|
|
|
xmlDecoder := xml.NewDecoder(response.Body)
|
|
|
|
xmlDecoder.CharsetReader = charset.NewReader
|
2012-03-28 15:35:35 +00:00
|
|
|
|
2012-03-24 20:11:38 +00:00
|
|
|
var rss struct {
|
2012-03-28 15:35:35 +00:00
|
|
|
Channel Channel `xml:"channel"`
|
2012-03-24 20:11:38 +00:00
|
|
|
}
|
2014-02-27 00:57:03 +00:00
|
|
|
if err = xmlDecoder.Decode(&rss); err != nil {
|
2012-03-24 20:11:38 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &rss.Channel, nil
|
|
|
|
}
|