Implements link walking

This commit is contained in:
Mikhail Klementyev 2016-07-22 15:43:23 +03:00
parent fac61961f5
commit d1d2f73b4f
2 changed files with 130 additions and 8 deletions

81
main.go
View File

@ -9,27 +9,32 @@
package main package main
import ( import (
"bytes"
"database/sql"
"strings"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"net/http" "net/http"
"./storage"
"github.com/jaytaylor/html2text" "github.com/jaytaylor/html2text"
"golang.org/x/net/html"
"golang.org/x/net/html/charset" "golang.org/x/net/html/charset"
kingpin "gopkg.in/alecthomas/kingpin.v2" kingpin "gopkg.in/alecthomas/kingpin.v2"
) )
var ( var (
url = kingpin.Flag("url", "Url").String() arg_url = kingpin.Flag("url", "Url").String()
arg_link = kingpin.Flag("link", "Link").Int()
) )
func main() { func cmd_url(db *sql.DB, url string) {
kingpin.Parse()
client := &http.Client{} client := &http.Client{}
req, err := http.NewRequest("GET", *url, nil) req, err := http.NewRequest("GET", url, nil)
if err != nil { if err != nil {
log.Fatalln(err) log.Fatalln(err)
} }
@ -55,10 +60,72 @@ func main() {
return return
} }
text, err := html2text.FromString(string(body)) htmlPage := string(body)
z := html.NewTokenizer(bytes.NewReader(body))
for {
tt := z.Next()
if tt == html.ErrorToken {
break
}
for {
key, value, moreAttr := z.TagAttr()
if string(key) == "href" {
url, err := req.URL.Parse(string(value))
if err != nil { if err != nil {
panic(err) panic(err)
} }
linkNo, err := storage.AddLink(db, url.String())
if err != nil {
panic(err)
}
for _, s := range []string{string(value), html.EscapeString(string(value))} {
htmlPage = strings.Replace(htmlPage, "\""+s+"\"",
"\""+fmt.Sprintf("%d", linkNo)+"\"", -1)
}
}
if !moreAttr {
break
}
}
}
text, err := html2text.FromString(htmlPage)
if err != nil {
panic(err)
}
text += ""
fmt.Println(text) fmt.Println(text)
} }
func cmd_link(db *sql.DB, linkID int) {
url, err := storage.GetLink(db, linkID)
if err != nil {
panic(err)
}
cmd_url(db, url)
}
func main() {
db, err := storage.OpenDB("/tmp/wi.db")
if err != nil {
panic(err)
}
kingpin.Parse()
if *arg_url != "" {
cmd_url(db, *arg_url)
} else if *arg_link != 0 {
cmd_link(db, *arg_link)
}
}

55
storage/storage.go Normal file
View File

@ -0,0 +1,55 @@
/**
* @file storage.go
* @author Mikhail Klementyev jollheef<AT>riseup.net
* @license GNU GPLv3
* @date July, 2016
*/
package storage
import (
"database/sql"
_ "github.com/mattn/go-sqlite3"
)
func OpenDB(path string) (db *sql.DB, err error) {
db, err = sql.Open("sqlite3", path)
if err != nil {
return
}
_, err = db.Exec("CREATE TABLE IF NOT EXISTS `links` " +
"( `id` INTEGER PRIMARY KEY AUTOINCREMENT, `url` TEXT );")
return
}
func AddLink(db *sql.DB, url string) (linkNo int64, err error) {
stmt, err := db.Prepare("INSERT INTO `links` (`url`) VALUES ($1);")
if err != nil {
return
}
defer stmt.Close()
r, err := stmt.Exec(url)
if err != nil {
return
}
linkNo, err = r.LastInsertId()
return
}
func GetLink(db *sql.DB, linkID int) (url string, err error) {
stmt, err := db.Prepare("SELECT `url` FROM `links` WHERE id=$1;")
if err != nil {
return
}
defer stmt.Close()
err = stmt.QueryRow(linkID).Scan(&url)
return
}