More link functions

This commit is contained in:
windlejacob12 2022-09-03 21:33:48 -04:00
parent 68f5cce136
commit 0bdf6ac5ea

View File

@ -1,25 +1,30 @@
(ns com-jakewindle-git.scraper (ns com-jakewindle-git.scraper
(:gen-class)
(:require [clj-http.client :as client] (:require [clj-http.client :as client]
[hickory.core :as hick])) [hickory.core :as hick]
[hickory.select :as s]))
;; Models ;; Models
(def stat-urls ["https://www.espn.com/nfl/stats/team/_/season/2011/seasontype/2"])
;; Page (defn not-nil [v]
(defn page [html] (not (nil? v)))
{:html html :parsed nil})
(defn get-src [uri]
(:body (client/get uri)))
(defn parse-src [src]
(hick/parse src))
(defn new-page [uri] (defn new-page [uri]
(-> uri (-> uri
(get-src) (client/get)
(parse-src) :body
(page))) hick/parse
hick/as-hickory))
(defn get-links [parsed]
(-> (s/select (s/child (s/tag "a"))
parsed)))
(defn links-to-text [links]
(filter not-nil (map #(-> % :attrs :href) links)))
(defn init-crawler []
(map #(new-page %) stat-urls))
;; Entities ;; Entities
(defn greet (defn greet