package main import ( "database/sql" "log" "time" rss "github.com/ungerik/go-rss" ) type GenericRSSFetcher struct { // MedioName es el nombre para poner en la BD (field "medio") MedioName string FeedURL string DB *sql.DB // SubsetName es un nombre para un feed específico que se usa para debug SubsetName string PubDateFormat string } // SQLiteTimestampFormats[0] const DefaultDateFormat = "2006-01-02 15:04:05.999999999-07:00" // TODO: Capturar errores func (fetcher GenericRSSFetcher) Fetch() { resp, err := rss.Read(fetcher.FeedURL, false) if err != nil { log.Panicln(err) } feed, err := rss.Regular(resp) if err != nil { log.Panicln(err) } for _, entry := range feed.Item { var parsedDate time.Time if len(fetcher.PubDateFormat) != 0 { parsedDate, err = entry.PubDate.ParseWithFormat(fetcher.PubDateFormat) } else { parsedDate, err = entry.PubDate.Parse() } if err != nil { log.Panicln(err) } formatted_pub_date := parsedDate.Format(DefaultDateFormat) _, err = fetcher.DB.Exec(` INSERT INTO notas(medio, title, content, link, publication_date) VALUES(?, ?, ?, ?, ?) ON CONFLICT(link) DO UPDATE SET title=excluded.title, content=excluded.content; `, fetcher.MedioName, entry.Title, entry.ContentEncoded, entry.Link, formatted_pub_date) if err != nil { log.Println("Error when saving nota", err) } } log.Printf("[%s/%s] Procesé %d notas", fetcher.MedioName, fetcher.SubsetName, len(feed.Item)) } func Pagina12(db *sql.DB, subsetName string, url string) GenericRSSFetcher { return GenericRSSFetcher{ MedioName: "pagina_12_rss", SubsetName: subsetName, FeedURL: url, DB: db, PubDateFormat: time.RFC1123, } } func cronologicalFetcher(db *sql.DB) { for true { GenericRSSFetcher{ MedioName: "la_nacion_rss", FeedURL: "https://www.lanacion.com.ar/arcio/rss/", DB: db, }.Fetch() GenericRSSFetcher{ MedioName: "infobae", FeedURL: "https://www.infobae.com/feeds/rss/", DB: db, }.Fetch() Pagina12(db, "portada", "https://www.pagina12.com.ar/rss/portada").Fetch() Pagina12(db, "edicion-impresa", "https://www.pagina12.com.ar/rss/edicion-impresa").Fetch() Pagina12(db, "secciones/el-pais", "https://www.pagina12.com.ar/rss/secciones/el-pais/notas").Fetch() Pagina12(db, "secciones/sociedad", "https://www.pagina12.com.ar/rss/secciones/sociedad/notas").Fetch() time.Sleep(time.Minute * 2) } }