More link functions
This commit is contained in:
parent
68f5cce136
commit
0bdf6ac5ea
@ -1,25 +1,30 @@
|
|||||||
(ns com-jakewindle-git.scraper
|
(ns com-jakewindle-git.scraper
|
||||||
(:gen-class)
|
|
||||||
(:require [clj-http.client :as client]
|
(:require [clj-http.client :as client]
|
||||||
[hickory.core :as hick]))
|
[hickory.core :as hick]
|
||||||
|
[hickory.select :as s]))
|
||||||
|
|
||||||
;; Models
|
;; Models
|
||||||
|
(def stat-urls ["https://www.espn.com/nfl/stats/team/_/season/2011/seasontype/2"])
|
||||||
|
|
||||||
;; Page
|
(defn not-nil [v]
|
||||||
(defn page [html]
|
(not (nil? v)))
|
||||||
{:html html :parsed nil})
|
|
||||||
|
|
||||||
(defn get-src [uri]
|
|
||||||
(:body (client/get uri)))
|
|
||||||
|
|
||||||
(defn parse-src [src]
|
|
||||||
(hick/parse src))
|
|
||||||
|
|
||||||
(defn new-page [uri]
|
(defn new-page [uri]
|
||||||
(-> uri
|
(-> uri
|
||||||
(get-src)
|
(client/get)
|
||||||
(parse-src)
|
:body
|
||||||
(page)))
|
hick/parse
|
||||||
|
hick/as-hickory))
|
||||||
|
|
||||||
|
(defn get-links [parsed]
|
||||||
|
(-> (s/select (s/child (s/tag "a"))
|
||||||
|
parsed)))
|
||||||
|
|
||||||
|
(defn links-to-text [links]
|
||||||
|
(filter not-nil (map #(-> % :attrs :href) links)))
|
||||||
|
|
||||||
|
(defn init-crawler []
|
||||||
|
(map #(new-page %) stat-urls))
|
||||||
|
|
||||||
;; Entities
|
;; Entities
|
||||||
(defn greet
|
(defn greet
|
||||||
|
Loading…
Reference in New Issue
Block a user