diff --git a/src/com_jakewindle_git/scraper.clj b/src/com_jakewindle_git/scraper.clj index 87fc0f7..de6beb2 100644 --- a/src/com_jakewindle_git/scraper.clj +++ b/src/com_jakewindle_git/scraper.clj @@ -1,16 +1,20 @@ (ns com-jakewindle-git.scraper (:require [clj-http.client :as client] [hickory.core :as hick] - [hickory.select :as s]) + [hickory.select :as s] + [re-graph.core :as re-graph]) (:import (java.net URL))) ;; Models (def stat-urls ["https://www.espn.com/nfl/stats/team/_/season/2011/seasontype/2" - "https://stats.premierlacrosseleague.com/games/2022/whipsnakes-chaos-2022-6-04"]) + "https://stats.premierlacrosseleague.com/games/2022/whipsnakes-chaos-2022-6-04" + "https://espn.com/nhl/team/stats/_/name/fla/split/1"]) ;; NOTE - the PLL GraphQL API can be found here: https://api.stats.premierlacrosseleague.com/graphql ;; use this as your query endpoint +;; #fittPageContainer > div.StickyContainer > div.page-container.cf > div > div > section > div > div:nth-child(4) > div.flex > div > div.Table__Scroller > table > thead > tr.Table__sub-header.Table__TR.Table__even > th:nth-child(1)` + (defn not-nil [v] (not (nil? v))) @@ -24,6 +28,66 @@ hick/parse hick/as-hickory)) +;; Our graph QL query for interacting with the PLL graphql backend +(def pll-graphql-query "query($season: Int, $statType: StatType!) { + allPlayers(season: $season, statType: $statType) { + officialId + clubTeam + college + collegeYear + country + countryCode + dob + age + firstName + lastName + lastNameSuffix + handedness + height + highSchool + hometown + instagramUrl + injuryDescription + injuryStatus + isCaptain + nickname + bio + profileUrl + scratch + twitterUrl + weight + experience + expFromYear + allYears + slug + currentTeam { + officialId + fullName + league + position + positionName + jerseyNum + } + allTeams { + officialId + league + position + positionName + jerseyNum + year + fullName + } + } +}") + +(defn on-thing [{:keys [data errors] :as response}] + (def pll-response response)) + +(defn query-pll-stats [] + (re-graph/query {:query pll-graphql-query + :variables {:season 2022 :stattype "regular"} + :callback on-thing})) + (defn get-links [parsed] (-> (s/select (s/child (s/tag "a")) parsed))) @@ -34,13 +98,6 @@ (defn init-crawler [] (map #(new-page %) stat-urls)) -(defn parse-pll [parsed] - "PLL parser function" - ) - -(def parse-funcs - {"premierlacrosseleague.com" parse-pll}) - ;; Entities (defn greet "Callable entry point to the application."