summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKyle K <kylek389@gmail.com>2017-03-27 03:14:49 -0500
committerKyle K <kylek389@gmail.com>2017-03-27 03:14:49 -0500
commit6c4a69c8fc0635140ed2cf589cb87397cd9e8e87 (patch)
treeb22c1abd08159b8f8e5b89e88c6eca1615214f6d
parentb407981d418943f737a9eb76d3566dc6f913ad37 (diff)
downloadCLscrap-6c4a69c8fc0635140ed2cf589cb87397cd9e8e87.tar.gz
CLscrap-6c4a69c8fc0635140ed2cf589cb87397cd9e8e87.tar.bz2
CLscrap-6c4a69c8fc0635140ed2cf589cb87397cd9e8e87.zip
use templating to render HTML page on /app
-rw-r--r--README.md27
-rw-r--r--cl.py8
-rw-r--r--django_clscrap/settings.py1
-rw-r--r--myapp/templates/index.html37
-rw-r--r--myapp/views.py4
-rw-r--r--requirements.txt1
-rw-r--r--screenshot.jpgbin0 -> 110832 bytes
7 files changed, 71 insertions, 7 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1eb0fff
--- /dev/null
+++ b/README.md
@@ -0,0 +1,27 @@
+# CLscrap
+
+CLscrap is a craigslist scraper that provides a RESTful API at /api/cl that returns JSON data
+
+ - uses Django and templating
+ - renders results at /app
+
+### Installation
+
+CLscrap depends on following modules
+ - beautifulsoup4==4.5.3
+ - bs4==0.0.1
+ - Django==1.10.6
+ - requests==2.13.0
+
+Install
+```sh
+$ pip install -r requirements.txt
+$ python manage.py migrate
+$ python manage.py createsuperuser
+$ python manage.py runserver
+$ curl http://localhost:8000/api/cl
+```
+
+### Screenshot
+
+![clscrap](https://raw.githubusercontent.com/fatalhalt/CLscrap/master/screenshot.jpg?raw=true)
diff --git a/cl.py b/cl.py
index fafd65b..a908a99 100644
--- a/cl.py
+++ b/cl.py
@@ -6,8 +6,8 @@ from bs4 import BeautifulSoup
def query_craigslist(baseurl=None, keyword='wrx|sti'):
if baseurl is None:
- baseurl = 'https://chicago.craigslist.org/'
- response = requests.get(baseurl + 'search/pta', params={'query': keyword, 'srchType': 'T'})
+ baseurl = 'https://chicago.craigslist.org'
+ response = requests.get(baseurl + '/search/pta', params={'query': keyword, 'srchType': 'T'})
soup = BeautifulSoup(response.content, "html.parser")
results = soup.find_all('li', {'class': 'result-row'}) # at max 120 results per 1 page
@@ -27,12 +27,12 @@ def query_craigslist(baseurl=None, keyword='wrx|sti'):
except AttributeError:
pass # ignore empty fields
- return items
+ return items, baseurl, keyword
def main():
parser = argparse.ArgumentParser(description="craigslist WRX and STi parts finder", parents=())
- parser.add_argument("-b", "--baseurl", help='baseurl, e.g. https://chicago.craigslist.org/')
+ parser.add_argument("-b", "--baseurl", help='baseurl, e.g. https://chicago.craigslist.org')
parser.add_argument("-k", "--keyword", default='wrx|sti', help='keyword to search')
args, extra_args = parser.parse_known_args()
diff --git a/django_clscrap/settings.py b/django_clscrap/settings.py
index 39a1021..5c8092f 100644
--- a/django_clscrap/settings.py
+++ b/django_clscrap/settings.py
@@ -37,7 +37,6 @@ INSTALLED_APPS = [
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
- 'rest_framework',
'myapp.apps.MyappConfig',
]
diff --git a/myapp/templates/index.html b/myapp/templates/index.html
new file mode 100644
index 0000000..1a1f618
--- /dev/null
+++ b/myapp/templates/index.html
@@ -0,0 +1,37 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+ <meta charset="UTF-8">
+ <title>Craigslist Scraper</title>
+ <style type="text/css">
+ #item-list { position: relative; }
+ .item { margin-bottom: 10px; width: 340px; height: 380px; float: left; margin-left: 10px; }
+ .item-title { display: block; font-size: 1.3em }
+ .item-price { font-weight: bold; padding-left: 20px;}
+ .item img { display: block; }
+ </style>
+</head>
+<body>
+<h2>Craigslist Scrapper</h2>
+<span>get list of current WRX STi parts for sale in Chicago area</span>
+<pre>
+ url: {{ baseurl }}
+ query: {{ keyword }}
+
+ author: fatalhalt
+ https://github.com/fatalhalt/CLscrap
+
+</pre>
+<div id="item-list">
+{% for i in data %}
+ <div class="item">
+ <a href="{{ baseurl }}{{ i.link }}" class="item-title">{{ i.title }}</a>
+ <span class="item-date">{{ i.date }}</span>
+ <span class="item-price">{{ i.price }}</span>
+ <span class="item-hood">{{ i.hood }}</span>
+ <img src="{{ i.img }}" alt="img" />
+ </div>
+{% endfor %}
+</div>
+</body>
+</html> \ No newline at end of file
diff --git a/myapp/views.py b/myapp/views.py
index 6b014ac..600a061 100644
--- a/myapp/views.py
+++ b/myapp/views.py
@@ -2,11 +2,13 @@ from django.shortcuts import render
# Create your views here.
from django.http import HttpResponse, JsonResponse
+from django.shortcuts import render
import cl
def app(request):
- return HttpResponse("hello, app!")
+ data, baseurl, keyword = cl.query_craigslist()
+ return render(request, 'index.html', {'data': data, 'baseurl': baseurl, 'keyword': keyword})
def page(request):
diff --git a/requirements.txt b/requirements.txt
index d6448fc..df760c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,4 @@
beautifulsoup4==4.5.3
bs4==0.0.1
Django==1.10.6
-djangorestframework==3.6.2
requests==2.13.0
diff --git a/screenshot.jpg b/screenshot.jpg
new file mode 100644
index 0000000..9f36f13
--- /dev/null
+++ b/screenshot.jpg
Binary files differ