Implement the news builder

This commit is contained in:
idk
2022-12-11 00:48:43 +00:00
parent 1fd140742f
commit cda7a3c7f5
6 changed files with 302 additions and 48 deletions

View File

@ -18,7 +18,7 @@ Usage
Use these options to configure the software
#### Server Options(use with `serve`
#### Server Options(use with `serve`)
- `-newsdir`: directory to serve newsfeed from
- `-statsfile`: file to store the stats in, in json format
@ -26,10 +26,18 @@ Use these options to configure the software
- `-port`: port to serve news files on
- `-i2p`: serve news files directly to I2P using SAMv3
#### Builder Options(use with `build`
Not implemented yet
#### Signer Options(use with `sign`
#### Builder Options(use with `build`)
- `-newsfile`: entries to pass to news generator. If passed a directory, all `entries.html` files in the directory will be processed
- `-blockfile`: block list file to pass to news generator
- `-releasejson`: json file describing an update to pass to news generator
- `-feedtitle`: title to use for the RSS feed to pass to news generator
- `-feedsubtitle`: subtitle to use for the RSS feed to pass to news generator
- `-feedsite`: site for the RSS feed to pass to news generator
- `-feedmain`: Primary newsfeed for updates to pass to news generator
- `-feedbackup`: Backup newsfeed for updates to pass to news generator
- `-feeduri`: UUID to use for the RSS feed to pass to news generator
#### Signer Options(use with `sign`)
Not implemented yet

View File

@ -1,14 +1,119 @@
package newsbuilder
import (
"encoding/json"
"fmt"
"io/ioutil"
"time"
"github.com/google/uuid"
"github.com/yosssi/gohtml"
newsfeed "i2pgit.org/idk/newsgo/builder/feed"
)
type NewsBuilder struct {
Nodes []newsfeed.Node
File string
Feed newsfeed.Feed
ReleasesJson string
BlocklistXML string
URNID string
TITLE string
SITEURL string
MAINFEED string
BACKUPFEED string
SUBTITLE string
}
func (n *NewsBuilder) LoadFeed() {
n.Nodes = newsfeed.XMLData(n.File)
func (nb *NewsBuilder) JSONtoXML() (string, error) {
content, err := ioutil.ReadFile(nb.ReleasesJson)
if err != nil {
return "", err
}
// Now let's unmarshall the data into `payload`
var payload []map[string]interface{}
err = json.Unmarshal(content, &payload)
if err != nil {
return "", err
}
str := ""
/*
<i2p:release date="2022-11-21" minVersion="0.9.9" minJavaVersion="1.8">
<i2p:version>2.0.0</i2p:version>
<i2p:update type="su3">
<i2p:torrent href="magnet:?xt=urn:btih:a50f8479a39896f00431d7b500447fe303d2b6b5&amp;dn=i2pupdate-2.0.0.su3&amp;tr=http://tracker2.postman.i2p/announce.php"/>
<i2p:url href="http://stats.i2p/i2p/2.0.0/i2pupdate.su3"/>
<i2p:url href="http://mgp6yzdxeoqds3wucnbhfrdgpjjyqbiqjdwcfezpul3or7bzm4ga.b32.i2p/releases/2.0.0/i2pupdate.su3"/>
</i2p:update>
</i2p:release>
*/
releasedate := payload[0]["date"]
version := payload[0]["version"]
minVersion := payload[0]["minVersion"]
minJavaVersion := payload[0]["minJavaVersion"]
updates := payload[0]["updates"].(map[string]interface{})["su3"].(map[string]interface{})
magnet := updates["torrent"]
urls := updates["url"].([]interface{})
str += "<i2p:release date=" + releasedate.(string) + " minVersion=" + minVersion.(string) + " minJavaVersion=" + minJavaVersion.(string) + ">\n"
str += "<i2p:version>" + version.(string) + "</i2p:version>"
str += "<i2p:update type=\"su3\">"
str += "<i2p:torrent href=\"" + magnet.(string) + "\"/>"
for _, u := range urls {
str += "<i2p:url href=\"" + u.(string) + "\"/>"
}
str += "</i2p:update>"
str += "</i2p:release>"
return str, nil
}
func (nb *NewsBuilder) Build() (string, error) {
if err := nb.Feed.LoadHTML(); err != nil {
return "", fmt.Errorf("Build: error %s", err.Error())
}
current_time := time.Now()
str := "<?xml version='1.0' encoding='UTF-8'?>"
str += "<feed xmlns:i2p=\"http://geti2p.net/en/docs/spec/updates\" xmlns=\"http://www.w3.org/2005/Atom\" xml:lang=\"en\">"
str += "<id>" + "urn:uuid:" + nb.URNID + "</id>"
str += "<title>" + nb.TITLE + "</title>"
milli := current_time.Nanosecond() / 1000
t := fmt.Sprintf("%d-%02d-%02dT%02d:%02d:%02d.%02d+00:00\n", current_time.Year(), current_time.Month(), current_time.Day(), current_time.Hour(), current_time.Minute(), current_time.Second(), milli)
str += "<updated>" + t + "</updated>"
str += "<link href=\"" + nb.SITEURL + "\"/>"
str += "<link href=\"" + nb.MAINFEED + "\" rel=\"self\"/>"
if nb.BACKUPFEED != "" {
str += "<link href=\"" + nb.BACKUPFEED + "\" rel=\"alternate\"/>"
}
str += "<generator uri=\"http://idk.i2p/newsgo\" version=\"0.1.0\">newsgo</generator>"
str += "<subtitle>" + nb.SUBTITLE + "</subtitle>"
blocklistBytes, err := ioutil.ReadFile(nb.BlocklistXML)
if err != nil {
return "", err
}
str += string(blocklistBytes)
jsonxml, err := nb.JSONtoXML()
if err != nil {
return "", err
}
str += jsonxml
for index, _ := range nb.Feed.ArticlesSet {
art := nb.Feed.Article(index)
str += art.Entry()
}
str += "</feed>"
return gohtml.Format(str), nil
}
func Builder(newsFile, releasesJson, blocklistXML string) *NewsBuilder {
nb := &NewsBuilder{
Feed: newsfeed.Feed{
EntriesHTMLPath: newsFile,
},
ReleasesJson: releasesJson,
BlocklistXML: blocklistXML,
URNID: uuid.New().String(),
TITLE: "I2P News",
SITEURL: "http://i2p-projekt.i2p",
MAINFEED: "http://tc73n4kivdroccekirco7rhgxdg5f3cjvbaapabupeyzrqwv5guq.b32.i2p/news.atom.xml",
BACKUPFEED: "http://dn3tvalnjz432qkqsvpfdqrwpqkw3ye4n4i2uyfr4jexvo3sp5ka.b32.i2p/news/news.atom.xml",
SUBTITLE: "News feed, and router updates",
}
return nb
}

View File

@ -1,45 +1,85 @@
package newsfeed
import (
"bytes"
"encoding/xml"
"fmt"
"io/ioutil"
"github.com/anaskhan96/soup"
)
type Node struct {
XMLName xml.Name
Attrs []xml.Attr `xml:",any,attr"`
Content []byte `xml:",innerxml"`
Nodes []Node `xml:",any"`
type Feed struct {
HeaderTitle string
ArticlesSet []string
EntriesHTMLPath string
doc soup.Root
}
func (n *Node) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
n.Attrs = start.Attr
type node Node
return d.DecodeElement((*node)(n), &start)
}
func XMLData(data []byte) (m []Node) {
buf := bytes.NewBuffer(data)
dec := xml.NewDecoder(buf)
var n Node
err := dec.Decode(&n)
func (f *Feed) LoadHTML() error {
data, err := ioutil.ReadFile(f.EntriesHTMLPath)
if err != nil {
panic(err)
return fmt.Errorf("LoadHTML: error", err)
}
f.doc = soup.HTMLParse(string(data))
f.HeaderTitle = f.doc.Find("header").FullText()
articles := f.doc.FindAll("article")
for _, article := range articles {
f.ArticlesSet = append(f.ArticlesSet, article.HTML())
}
return nil
}
walk([]Node{n}, func(n Node) bool {
m = append(m, n)
return true
})
return m
func (f *Feed) Length() int {
return len(f.ArticlesSet)
}
func walk(nodes []Node, f func(Node) bool) {
for _, n := range nodes {
if f(n) {
walk(n.Nodes, f)
func (f *Feed) Article(index int) *Article {
html := soup.HTMLParse(f.ArticlesSet[index])
articleData := html.Find("article").Attrs()
articleSummary := html.Find("details").Find("summary").FullText()
return &Article{
UID: articleData["id"],
Title: articleData["title"],
Link: articleData["href"],
Author: articleData["author"],
PublishedDate: articleData["published"],
UpdatedDate: articleData["updated"],
Summary: articleSummary,
content: html.HTML(),
}
}
type Article struct {
UID string
Title string
Link string
Author string
PublishedDate string
UpdatedDate string
Summary string
// TODO: you have to collect this from the HTML itself and you have to take away the article and summary parts
content string
}
func (a *Article) Content() string {
str := ""
doc := soup.HTMLParse(string(a.content))
articleBody := doc.FindAll("")
for _, v := range articleBody[5:] {
str += v.HTML()
}
return str
}
func (a *Article) Entry() string {
return fmt.Sprintf(
"<entry>\n\t<id>%s</id>\n\t<title>%s</title>\n\t<updated>%s</updated>\n\t<author><name>%s</name></author>\n\t<link href=\"%s\" rel=\"alternate\"/>\n\t<published>%s</published>\n\t<summary>%s</summary>\n\t<content type=\"xhtml\">\n\t\t<div xmlns=\"http://www.w3.org/1999/xhtml\">\n\t\t%s\n\t\t</div>\n\t</content>\n</entry>",
a.UID,
a.Title,
a.UpdatedDate,
a.Author,
a.Link,
a.PublishedDate,
a.Summary,
a.Content(),
)
}

3
go.mod
View File

@ -3,8 +3,11 @@ module i2pgit.org/idk/newsgo
go 1.19
require (
github.com/anaskhan96/soup v1.2.5
github.com/eyedeekay/onramp v0.0.0-20220829050101-64cb1842d0f0
github.com/google/uuid v1.3.0
github.com/wcharczuk/go-chart/v2 v2.1.0
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4
gitlab.com/golang-commonmark/markdown v0.0.0-20211110145824-bf3e522c626a
)

8
go.sum
View File

@ -1,4 +1,6 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/anaskhan96/soup v1.2.5 h1:V/FHiusdTrPrdF4iA1YkVxsOpdNcgvqT1hG+YtcZ5hM=
github.com/anaskhan96/soup v1.2.5/go.mod h1:6YnEp9A2yywlYdM4EgDz9NEHclocMepEtku7wg6Cq3s=
github.com/cretz/bine v0.2.0 h1:8GiDRGlTgz+o8H9DSnsl+5MeBK4HsExxgl6WgzOCuZo=
github.com/cretz/bine v0.2.0/go.mod h1:WU4o9QR9wWp8AVKtTM1XD5vUHkEqnf2vVSo6dBqbetI=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
@ -26,6 +28,8 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF0
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/renameio v1.0.0/go.mod h1:t/HQoYBZSsWSNK35C6CO/TpPLDVWvxOHboWUAweKUpk=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
@ -40,10 +44,13 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/wcharczuk/go-chart/v2 v2.1.0 h1:tY2slqVQ6bN+yHSnDYwZebLQFkphK4WNrVwnt7CJZ2I=
github.com/wcharczuk/go-chart/v2 v2.1.0/go.mod h1:yx7MvAVNcP/kN9lKXM/NTce4au4DFN99j6i1OwDclNA=
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4 h1:0sw0nJM544SpsihWx1bkXdYLQDlzRflMgFJQ4Yih9ts=
github.com/yosssi/gohtml v0.0.0-20201013000340-ee4748c638f4/go.mod h1:+ccdNT0xMY1dtc5XBxumbYfOUhmduiGudqaDgD2rVRE=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
gitlab.com/golang-commonmark/html v0.0.0-20191124015941-a22733972181 h1:K+bMSIx9A7mLES1rtG+qKduLIXq40DAzYHtb0XuCukA=
@ -70,6 +77,7 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=

102
main.go
View File

@ -3,26 +3,42 @@ package main
import (
"flag"
"fmt"
"io/ioutil"
"log"
"net"
"net/http"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/eyedeekay/onramp"
"github.com/google/uuid"
builder "i2pgit.org/idk/newsgo/builder"
server "i2pgit.org/idk/newsgo/server"
)
var (
serve = flag.String("command", "help", "command to run(may be `serve`,`build`,`sign`")
serve = flag.String("command", "help", "command to run(may be `serve`,`build`,`sign`, or `help`(default)")
dir = flag.String("newsdir", "build", "directory to serve news from")
statsfile = flag.String("statsfile", "build/stats.json", "file to store stats in")
host = flag.String("host", "127.0.0.1", "host to serve on")
port = flag.String("port", "9696", "port to serve on")
i2p = flag.Bool("i2p", true, "automatically co-host on an I2P service using SAMv3")
i2p = flag.Bool("i2p", isSamAround(), "automatically co-host on an I2P service using SAMv3")
tcp = flag.Bool("http", true, "host on an HTTP service at host:port")
//newsfile = flag.String("newsfile", "data/entries.html", "entries to pass to news generator. If passed a directory, all `entries.html` files in the directory will be processed")
newsfile = flag.String("newsfile", "data", "entries to pass to news generator. If passed a directory, all `entries.html` files in the directory will be processed")
bloclist = flag.String("blocklist", "data/blocklist.xml", "block list file to pass to news generator")
releasejson = flag.String("releasejson", "data/releases.json", "json file describing an update to pass to news generator")
title = flag.String("feedtitle", "I2P News", "title to use for the RSS feed to pass to news generator")
subtitle = flag.String("feedsubtitle", "News feed, and router updates", "subtitle to use for the RSS feed to pass to news generator")
site = flag.String("feedsite", "http://i2p-projekt.i2p", "site for the RSS feed to pass to news generator")
mainurl = flag.String("feedmain", DefaultFeedURL(), "Primary newsfeed for updates to pass to news generator")
backupurl = flag.String("feedbackup", "http://dn3tvalnjz432qkqsvpfdqrwpqkw3ye4n4i2uyfr4jexvo3sp5ka.b32.i2p/news/news.atom.xml", "Backup newsfeed for updates to pass to news generator")
urn = flag.String("feeduid", uuid.New().String(), "UUID to use for the RSS feed to pass to news generator")
builddir = flag.String("builddir", "build", "Build directory to output feeds to.")
)
func validatePort(s *string) {
@ -67,7 +83,7 @@ func Help() {
fmt.Println("")
fmt.Println("Use these options to configure the software")
fmt.Println("")
fmt.Println("#### Server Options(use with `serve`")
fmt.Println("#### Server Options(use with `serve`)")
fmt.Println("")
fmt.Println(" - `-newsdir`: directory to serve newsfeed from")
fmt.Println(" - `-statsfile`: file to store the stats in, in json format")
@ -75,11 +91,19 @@ func Help() {
fmt.Println(" - `-port`: port to serve news files on")
fmt.Println(" - `-i2p`: serve news files directly to I2P using SAMv3")
fmt.Println("")
fmt.Println("#### Builder Options(use with `build`")
fmt.Println("#### Builder Options(use with `build`)")
fmt.Println("")
fmt.Println("Not implemented yet")
fmt.Println(" - `-newsfile`: entries to pass to news generator. If passed a directory, all `entries.html` files in the directory will be processed")
fmt.Println(" - `-blockfile`: block list file to pass to news generator")
fmt.Println(" - `-releasejson`: json file describing an update to pass to news generator")
fmt.Println(" - `-feedtitle`: title to use for the RSS feed to pass to news generator")
fmt.Println(" - `-feedsubtitle`: subtitle to use for the RSS feed to pass to news generator")
fmt.Println(" - `-feedsite`: site for the RSS feed to pass to news generator")
fmt.Println(" - `-feedmain`: Primary newsfeed for updates to pass to news generator")
fmt.Println(" - `-feedbackup`: Backup newsfeed for updates to pass to news generator")
fmt.Println(" - `-feeduri`: UUID to use for the RSS feed to pass to news generator")
fmt.Println("")
fmt.Println("#### Signer Options(use with `sign`")
fmt.Println("#### Signer Options(use with `sign`)")
fmt.Println("")
fmt.Println("Not implemented yet")
}
@ -103,6 +127,50 @@ func ServeI2P(s *server.NewsServer) error {
return http.Serve(ln, s)
}
func isSamAround() bool {
ln, err := net.Listen("tcp", "127.0.0.1:7656")
if err != nil {
return true
}
ln.Close()
return false
}
func DefaultFeedURL() string {
if !isSamAround() {
return "http://tc73n4kivdroccekirco7rhgxdg5f3cjvbaapabupeyzrqwv5guq.b32.i2p/news.atom.xml"
}
garlic := &onramp.Garlic{}
defer garlic.Close()
ln, err := garlic.Listen()
if err != nil {
return "http://tc73n4kivdroccekirco7rhgxdg5f3cjvbaapabupeyzrqwv5guq.b32.i2p/news.atom.xml"
}
defer ln.Close()
return "http://" + ln.Addr().String() + "/news.atom.xml"
}
func Build(newsFile string) {
news := builder.Builder(newsFile, *releasejson, *bloclist)
news.TITLE = *title
news.SITEURL = *site
news.MAINFEED = *mainurl
news.BACKUPFEED = *backupurl
news.SUBTITLE = *subtitle
news.URNID = *urn
if feed, err := news.Build(); err != nil {
log.Printf("Build error: %s", err)
} else {
filename := strings.Replace(strings.Replace(strings.Replace(newsFile, ".html", ".atom.xml", -1), "entries", "news", -1), "translations", "", -1)
if err := os.MkdirAll(filepath.Join(*builddir, filepath.Dir(filename)), 0755); err != nil {
panic(err)
}
if err = ioutil.WriteFile(filepath.Join(*builddir, filename), []byte(feed), 0644); err != nil {
panic(err)
}
}
}
func main() {
flag.Parse()
command := validateCommand(serve)
@ -140,6 +208,28 @@ func main() {
i++
}
case "build":
f, e := os.Stat(*newsfile)
if e != nil {
panic(e)
}
if f.IsDir() {
err := filepath.Walk(*newsfile,
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
ext := filepath.Ext(path)
if ext == ".html" {
Build(path)
}
return nil
})
if err != nil {
log.Println(err)
}
} else {
Build(*newsfile)
}
case "sign":
case "help":
Help()