Bringing in URL for host extraction
This commit is contained in:
parent
0bdf6ac5ea
commit
e414debf33
@ -1,14 +1,19 @@
|
|||||||
(ns com-jakewindle-git.scraper
|
(ns com-jakewindle-git.scraper
|
||||||
(:require [clj-http.client :as client]
|
(:require [clj-http.client :as client]
|
||||||
[hickory.core :as hick]
|
[hickory.core :as hick]
|
||||||
[hickory.select :as s]))
|
[hickory.select :as s])
|
||||||
|
(:import (java.net URL)))
|
||||||
|
|
||||||
;; Models
|
;; Models
|
||||||
(def stat-urls ["https://www.espn.com/nfl/stats/team/_/season/2011/seasontype/2"])
|
(def stat-urls ["https://www.espn.com/nfl/stats/team/_/season/2011/seasontype/2"
|
||||||
|
"https://stats.premierlacrosseleague.com/games/2022/whipsnakes-chaos-2022-6-04"])
|
||||||
|
|
||||||
(defn not-nil [v]
|
(defn not-nil [v]
|
||||||
(not (nil? v)))
|
(not (nil? v)))
|
||||||
|
|
||||||
|
(defn get-host [url]
|
||||||
|
(.getHost (new URL)))
|
||||||
|
|
||||||
(defn new-page [uri]
|
(defn new-page [uri]
|
||||||
(-> uri
|
(-> uri
|
||||||
(client/get)
|
(client/get)
|
||||||
@ -26,6 +31,13 @@
|
|||||||
(defn init-crawler []
|
(defn init-crawler []
|
||||||
(map #(new-page %) stat-urls))
|
(map #(new-page %) stat-urls))
|
||||||
|
|
||||||
|
(defn parse-pll [parsed]
|
||||||
|
"PLL parser function"
|
||||||
|
)
|
||||||
|
|
||||||
|
(def parse-funcs
|
||||||
|
{"premierlacrosseleague.com" parse-pll})
|
||||||
|
|
||||||
;; Entities
|
;; Entities
|
||||||
(defn greet
|
(defn greet
|
||||||
"Callable entry point to the application."
|
"Callable entry point to the application."
|
||||||
|
Loading…
Reference in New Issue
Block a user