From 68f5cce136346e172993dc89ed4f5de3105f812d Mon Sep 17 00:00:00 2001 From: windlejacob12 Date: Sat, 3 Sep 2022 20:27:28 -0400 Subject: [PATCH] New scraping routines --- src/com_jakewindle_git/scraper.clj | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/com_jakewindle_git/scraper.clj b/src/com_jakewindle_git/scraper.clj index 3da23a2..6728b5a 100644 --- a/src/com_jakewindle_git/scraper.clj +++ b/src/com_jakewindle_git/scraper.clj @@ -1,6 +1,27 @@ (ns com-jakewindle-git.scraper - (:require [clj-http.client :as client])) + (:gen-class) + (:require [clj-http.client :as client] + [hickory.core :as hick])) +;; Models + +;; Page +(defn page [html] + {:html html :parsed nil}) + +(defn get-src [uri] + (:body (client/get uri))) + +(defn parse-src [src] + (hick/parse src)) + +(defn new-page [uri] + (-> uri + (get-src) + (parse-src) + (page))) + +;; Entities (defn greet "Callable entry point to the application." [data]