From da95a71843c79aa4df1f883e1b410af7ccd38f2f Mon Sep 17 00:00:00 2001 From: Marcin Wyszynski Date: Fri, 28 Feb 2014 00:40:59 +0000 Subject: [PATCH] Tests and fixes - added two feeds for testing purposes; - added tests for parsing these (and future) feeds; - removed AtomLink which does not work due to a known bug in Go (https://codereview.appspot.com/6868044); - added an option to parse date with arbitrary time; --- rss.go | 26 +- rss_test.go | 48 +++ testdata/techcrunch.rss | 796 ++++++++++++++++++++++++++++++++++++++++ testdata/wordpress.rss | 699 +++++++++++++++++++++++++++++++++++ 4 files changed, 1556 insertions(+), 13 deletions(-) create mode 100644 rss_test.go create mode 100644 testdata/techcrunch.rss create mode 100644 testdata/wordpress.rss diff --git a/rss.go b/rss.go index 83ac8b4..6c3c9d7 100644 --- a/rss.go +++ b/rss.go @@ -1,6 +1,6 @@ -/* -Simple RSS parser, tested with Wordpress feeds. -*/ +/** + * Simple RSS parser, tested with various feeds. + */ package rss import ( @@ -12,6 +12,10 @@ import ( _ "code.google.com/p/go-charset/data" ) +const ( + wordpressDateFormat = "Mon, 02 Jan 2006 15:04:05 -0700" +) + type Fetcher interface { Get(url string) (resp *http.Response, err error) } @@ -30,16 +34,9 @@ type ItemEnclosure struct { Type string `xml:"type,attr"` } -type AtomLink struct { - Href string `xml:"href,attr"` - Rel string `xml:"rel,attr"` - Type string `xml:"type,attr"` -} - type Item struct { Title string `xml:"title"` Link string `xml:"link"` - AtomLink AtomLink `xml:"http://www.w3.org/2005/Atom/ link"` Comments string `xml:"comments"` PubDate Date `xml:"pubDate"` GUID string `xml:"guid"` @@ -52,14 +49,17 @@ type Item struct { type Date string func (self Date) Parse() (time.Time, error) { - // Wordpress format - t, err := time.Parse("Mon, 02 Jan 2006 15:04:05 -0700", string(self)) + t, err := self.ParseWithFormat(wordpressDateFormat) if err != nil { - t, err = time.Parse(time.RFC822, string(self)) // RSS 2.0 spec + t, err = self.ParseWithFormat(time.RFC822) // RSS 2.0 spec } return t, err } +func (self Date) ParseWithFormat(format string) (time.Time, error) { + return time.Parse(format, string(self)) +} + func (self Date) Format(format string) (string, error) { t, err := self.Parse() if err != nil { diff --git a/rss_test.go b/rss_test.go new file mode 100644 index 0000000..f8f046f --- /dev/null +++ b/rss_test.go @@ -0,0 +1,48 @@ +package rss + +import ( + "io/ioutil" + "net/http" + "os" + "path/filepath" + "strings" + "testing" +) + +const ( + testDataDir = "testdata" + testFileSuffix = ".rss" +) + +// testFetcher is an implementation of the Fetcher interface which reads the +// content from a local file. +type testFetcher struct{} + +// Get takes a 'url' which is really just a name of a file in the 'testdata' +// directory and returns a fake http.Response with the file content as its body. +// It returns an error iff the file can not be opened. +func (f *testFetcher) Get(url string) (resp *http.Response, err error) { + file, err := os.Open(filepath.Join(testDataDir, url)) + if err != nil { + return nil, err + } + return &http.Response{Body: file}, nil +} + +// A trivial test making sure that that all feeds parse - it *does not* check +// for correctness or completeness thereof. +func TestAllFeedsParse(t *testing.T) { + fileInfos, err := ioutil.ReadDir(testDataDir) + if err != nil { + t.Fatalf("ioutil.ReadDir(%q) err = %v, expected nil", testDataDir, err) + } + for _, fileInfo := range fileInfos { + fileName := fileInfo.Name() + if !strings.HasSuffix(fileName, testFileSuffix) { + continue + } + if _, err := ReadWithClient(fileName, new(testFetcher)); err != nil { + t.Fatalf("ReadWithClient(%q) err = %v, expected nil", fileName, err) + } + } +} diff --git a/testdata/techcrunch.rss b/testdata/techcrunch.rss new file mode 100644 index 0000000..009e67c --- /dev/null +++ b/testdata/techcrunch.rss @@ -0,0 +1,796 @@ + + + + + TechCrunch » Europe + + http://techcrunch.com + Startup and Technology News + Thu, 27 Feb 2014 23:38:19 +0000 + en + hourly + 1 + http://wordpress.com/ + + + http://1.gravatar.com/blavatar/d9ea925a71f82f06a1e6224298f7fe80?s=96&d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png + » Europe + http://techcrunch.com + + + diff --git a/testdata/wordpress.rss b/testdata/wordpress.rss new file mode 100644 index 0000000..23e1b01 --- /dev/null +++ b/testdata/wordpress.rss @@ -0,0 +1,699 @@ + + + + + + + http://rottenindenmark.wordpress.com + Is this gentleman bothering you? + Thu, 27 Feb 2014 17:42:47 +0000 + en + hourly + 1 + http://wordpress.com/ + + + http://0.gravatar.com/blavatar/4ec3d17c0a8cdcc1402298b47dbee1f5?s=96&d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png + + http://rottenindenmark.wordpress.com + + +