From e414debf3300b1adf55fdcc2c21524b646b3b576 Mon Sep 17 00:00:00 2001 From: windlejacob12 Date: Sat, 3 Sep 2022 23:27:04 -0400 Subject: [PATCH] Bringing in URL for host extraction --- src/com_jakewindle_git/scraper.clj | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/com_jakewindle_git/scraper.clj b/src/com_jakewindle_git/scraper.clj index 9279242..a877939 100644 --- a/src/com_jakewindle_git/scraper.clj +++ b/src/com_jakewindle_git/scraper.clj @@ -1,14 +1,19 @@ (ns com-jakewindle-git.scraper (:require [clj-http.client :as client] [hickory.core :as hick] - [hickory.select :as s])) + [hickory.select :as s]) + (:import (java.net URL))) ;; Models -(def stat-urls ["https://www.espn.com/nfl/stats/team/_/season/2011/seasontype/2"]) +(def stat-urls ["https://www.espn.com/nfl/stats/team/_/season/2011/seasontype/2" + "https://stats.premierlacrosseleague.com/games/2022/whipsnakes-chaos-2022-6-04"]) (defn not-nil [v] (not (nil? v))) +(defn get-host [url] + (.getHost (new URL))) + (defn new-page [uri] (-> uri (client/get) @@ -26,6 +31,13 @@ (defn init-crawler [] (map #(new-page %) stat-urls)) +(defn parse-pll [parsed] + "PLL parser function" + ) + +(def parse-funcs + {"premierlacrosseleague.com" parse-pll}) + ;; Entities (defn greet "Callable entry point to the application."