From d1d2f73b4fb125362266168a9710e3b1e4700e87 Mon Sep 17 00:00:00 2001 From: Mikhail Klementyev Date: Fri, 22 Jul 2016 15:43:23 +0300 Subject: [PATCH] Implements link walking --- main.go | 83 +++++++++++++++++++++++++++++++++++++++++----- storage/storage.go | 55 ++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 8 deletions(-) create mode 100644 storage/storage.go diff --git a/main.go b/main.go index 5464c2b..d1f2db6 100644 --- a/main.go +++ b/main.go @@ -9,27 +9,32 @@ package main import ( + "bytes" + "database/sql" + "strings" + "fmt" "io/ioutil" "log" "net/http" + "./storage" + "github.com/jaytaylor/html2text" + "golang.org/x/net/html" "golang.org/x/net/html/charset" kingpin "gopkg.in/alecthomas/kingpin.v2" ) var ( - url = kingpin.Flag("url", "Url").String() + arg_url = kingpin.Flag("url", "Url").String() + arg_link = kingpin.Flag("link", "Link").Int() ) -func main() { - - kingpin.Parse() - +func cmd_url(db *sql.DB, url string) { client := &http.Client{} - req, err := http.NewRequest("GET", *url, nil) + req, err := http.NewRequest("GET", url, nil) if err != nil { log.Fatalln(err) } @@ -55,10 +60,72 @@ func main() { return } - text, err := html2text.FromString(string(body)) + htmlPage := string(body) + + z := html.NewTokenizer(bytes.NewReader(body)) + + for { + tt := z.Next() + if tt == html.ErrorToken { + break + } + + for { + key, value, moreAttr := z.TagAttr() + + if string(key) == "href" { + + url, err := req.URL.Parse(string(value)) + if err != nil { + panic(err) + } + + linkNo, err := storage.AddLink(db, url.String()) + if err != nil { + panic(err) + } + + for _, s := range []string{string(value), html.EscapeString(string(value))} { + htmlPage = strings.Replace(htmlPage, "\""+s+"\"", + "\""+fmt.Sprintf("%d", linkNo)+"\"", -1) + } + } + + if !moreAttr { + break + } + } + } + + text, err := html2text.FromString(htmlPage) + if err != nil { + panic(err) + } + text += "" + + fmt.Println(text) +} + +func cmd_link(db *sql.DB, linkID int) { + url, err := storage.GetLink(db, linkID) if err != nil { panic(err) } - fmt.Println(text) + cmd_url(db, url) +} + +func main() { + db, err := storage.OpenDB("/tmp/wi.db") + if err != nil { + panic(err) + } + + kingpin.Parse() + + if *arg_url != "" { + cmd_url(db, *arg_url) + } else if *arg_link != 0 { + cmd_link(db, *arg_link) + } } diff --git a/storage/storage.go b/storage/storage.go new file mode 100644 index 0000000..b8819df --- /dev/null +++ b/storage/storage.go @@ -0,0 +1,55 @@ +/** + * @file storage.go + * @author Mikhail Klementyev jollheefriseup.net + * @license GNU GPLv3 + * @date July, 2016 + */ + +package storage + +import ( + "database/sql" + + _ "github.com/mattn/go-sqlite3" +) + +func OpenDB(path string) (db *sql.DB, err error) { + db, err = sql.Open("sqlite3", path) + if err != nil { + return + } + + _, err = db.Exec("CREATE TABLE IF NOT EXISTS `links` " + + "( `id` INTEGER PRIMARY KEY AUTOINCREMENT, `url` TEXT );") + + return +} + +func AddLink(db *sql.DB, url string) (linkNo int64, err error) { + stmt, err := db.Prepare("INSERT INTO `links` (`url`) VALUES ($1);") + if err != nil { + return + } + defer stmt.Close() + + r, err := stmt.Exec(url) + if err != nil { + return + } + + linkNo, err = r.LastInsertId() + + return +} + +func GetLink(db *sql.DB, linkID int) (url string, err error) { + stmt, err := db.Prepare("SELECT `url` FROM `links` WHERE id=$1;") + if err != nil { + return + } + defer stmt.Close() + + err = stmt.QueryRow(linkID).Scan(&url) + + return +}