This commit is contained in:
Cat /dev/Nulo 2022-10-12 12:25:42 -03:00
parent 49415ec226
commit ce90f0be32
6 changed files with 202 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
test.db

90
fetcher.go Normal file
View File

@ -0,0 +1,90 @@
package main
import (
"database/sql"
"log"
"time"
rss "github.com/ungerik/go-rss"
)
type GenericRSSFetcher struct {
// MedioName es el nombre para poner en la BD (field "medio")
MedioName string
FeedURL string
DB *sql.DB
// SubsetName es un nombre para un feed específico que se usa para debug
SubsetName string
PubDateFormat string
}
// SQLiteTimestampFormats[0]
const DefaultDateFormat = "2006-01-02 15:04:05.999999999-07:00"
// TODO: Capturar errores
func (fetcher GenericRSSFetcher) Fetch() {
resp, err := rss.Read(fetcher.FeedURL, false)
if err != nil {
log.Panicln(err)
}
feed, err := rss.Regular(resp)
if err != nil {
log.Panicln(err)
}
for _, entry := range feed.Item {
var parsedDate time.Time
if len(fetcher.PubDateFormat) != 0 {
parsedDate, err = entry.PubDate.ParseWithFormat(fetcher.PubDateFormat)
} else {
parsedDate, err = entry.PubDate.Parse()
}
if err != nil {
log.Panicln(err)
}
formatted_pub_date := parsedDate.Format(DefaultDateFormat)
_, err = fetcher.DB.Exec(`
INSERT INTO notas(medio, title, link, publication_date)
VALUES(?, ?, ?, ?)
ON CONFLICT(link) DO UPDATE SET
title=excluded.title;
`, fetcher.MedioName, entry.Title, entry.Link, formatted_pub_date)
if err != nil {
log.Println("Error when saving nota", err)
}
}
log.Printf("[%s/%s] Procesé %d notas", fetcher.MedioName, fetcher.SubsetName, len(feed.Item))
}
func Pagina12(db *sql.DB, subsetName string, url string) GenericRSSFetcher {
return GenericRSSFetcher{
MedioName: "pagina_12_rss",
SubsetName: subsetName,
FeedURL: url,
DB: db,
PubDateFormat: time.RFC1123,
}
}
func cronologicalFetcher(db *sql.DB) {
for true {
GenericRSSFetcher{
MedioName: "la_nacion_rss",
FeedURL: "https://www.lanacion.com.ar/arcio/rss/",
DB: db,
}.Fetch()
Pagina12(db, "portada", "https://www.pagina12.com.ar/rss/portada").Fetch()
Pagina12(db, "edicion-impresa", "https://www.pagina12.com.ar/rss/edicion-impresa").Fetch()
Pagina12(db, "secciones/el-pais", "https://www.pagina12.com.ar/rss/secciones/el-pais/notas").Fetch()
Pagina12(db, "secciones/sociedad", "https://www.pagina12.com.ar/rss/secciones/sociedad/notas").Fetch()
time.Sleep(time.Minute * 2)
}
}

9
go.mod Normal file
View File

@ -0,0 +1,9 @@
module medios.nulo.in/web
go 1.19
require (
github.com/mattn/go-sqlite3 v1.14.15 // indirect
github.com/paulrosania/go-charset v0.0.0-20190326053356-55c9d7a5834c // indirect
github.com/ungerik/go-rss v0.0.0-20200405130036-81ac15598626 // indirect
)

6
go.sum Normal file
View File

@ -0,0 +1,6 @@
github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
github.com/paulrosania/go-charset v0.0.0-20190326053356-55c9d7a5834c h1:P6XGcuPTigoHf4TSu+3D/7QOQ1MbL6alNwrGhcW7sKw=
github.com/paulrosania/go-charset v0.0.0-20190326053356-55c9d7a5834c/go.mod h1:YnNlZP7l4MhyGQ4CBRwv6ohZTPrUJJZtEv4ZgADkbs4=
github.com/ungerik/go-rss v0.0.0-20200405130036-81ac15598626 h1:kCZvptlArqbfrFWL0V0YWwV9iXEvMKaUMjJvXQ/wsE4=
github.com/ungerik/go-rss v0.0.0-20200405130036-81ac15598626/go.mod h1:UeGmbG1AM6Cx0FfruZOzOC02lDvXbUWV+06tAxgCoGA=

88
main.go Normal file
View File

@ -0,0 +1,88 @@
package main
import (
"database/sql"
"fmt"
"html"
"log"
"net/http"
_ "github.com/mattn/go-sqlite3"
)
// type Nota struct {
// Medio string `json:"medio"`
// Title string `json:"title"`
// Link string `json:"link"`
// PublicationDate string `json:"publication_date" db:"publication_date"`
// CreatedAt string `json:"created_at" db:"created_at"`
// }
type fooHandler struct {
DB *sql.DB
}
func (foo fooHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, `<!doctype html>
<meta charset="utf-8">
<title>Observatorio de medios</title>
<ul>
`)
rows, err := foo.DB.Query(`
select medio, title, link, publication_date
from notas
where
title like "%colegio%" or
title like "%escuela%" or
title like "%Acosta%" or
title like "%tomas%"
order by publication_date asc
limit 50
`)
if err != nil {
log.Fatal(err)
}
defer rows.Close()
for rows.Next() {
var medio string
var title string
var link string
var publication_date string
err = rows.Scan(&medio, &title, &link, &publication_date)
if err != nil {
log.Fatal(err)
}
fmt.Fprintf(w, "<li>[%s] <a href=\"%s\">%s</a> %s\n",
html.EscapeString(medio),
html.EscapeString(link),
html.EscapeString(title),
html.EscapeString(publication_date),
)
}
err = rows.Err()
if err != nil {
log.Fatal(err)
}
fmt.Fprintf(w, `</ul>`)
}
func main() {
db, err := sql.Open("sqlite3", "./test.db?cache=shared")
if err != nil {
log.Panic(err)
}
defer db.Close()
db.SetMaxOpenConns(1)
http.Handle("/foo", fooHandler{db})
go cronologicalFetcher(db)
log.Println("Listening in :8080")
log.Fatal(http.ListenAndServe(":8080", nil))
}

View File

@ -0,0 +1,8 @@
CREATE TABLE notas (
medio TEXT NOT NULL,
title TEXT NOT NULL,
link TEXT NOT NULL,
publication_date DATETIME NOT NULL,
created_at DATETIME NOT NULL DEFAULT current_timestamp,
UNIQUE(link)
);