diff options
| -rw-r--r-- | README.md | 27 | ||||
| -rw-r--r-- | cl.py | 8 | ||||
| -rw-r--r-- | django_clscrap/settings.py | 1 | ||||
| -rw-r--r-- | myapp/templates/index.html | 37 | ||||
| -rw-r--r-- | myapp/views.py | 4 | ||||
| -rw-r--r-- | requirements.txt | 1 | ||||
| -rw-r--r-- | screenshot.jpg | bin | 0 -> 110832 bytes | 
7 files changed, 71 insertions, 7 deletions
| diff --git a/README.md b/README.md new file mode 100644 index 0000000..1eb0fff --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# CLscrap + +CLscrap is a craigslist scraper that provides a RESTful API at /api/cl that returns JSON data + +  - uses Django and templating +  - renders results at /app + +### Installation + +CLscrap depends on following modules + - beautifulsoup4==4.5.3 + - bs4==0.0.1 + - Django==1.10.6 + - requests==2.13.0 + +Install +```sh +$ pip install -r requirements.txt +$ python manage.py migrate +$ python manage.py createsuperuser +$ python manage.py runserver +$ curl http://localhost:8000/api/cl +``` + +### Screenshot + + @@ -6,8 +6,8 @@ from bs4 import BeautifulSoup  def query_craigslist(baseurl=None, keyword='wrx|sti'):      if baseurl is None: -        baseurl = 'https://chicago.craigslist.org/' -    response = requests.get(baseurl + 'search/pta', params={'query': keyword, 'srchType': 'T'}) +        baseurl = 'https://chicago.craigslist.org' +    response = requests.get(baseurl + '/search/pta', params={'query': keyword, 'srchType': 'T'})      soup = BeautifulSoup(response.content, "html.parser")      results = soup.find_all('li', {'class': 'result-row'})  # at max 120 results per 1 page @@ -27,12 +27,12 @@ def query_craigslist(baseurl=None, keyword='wrx|sti'):          except AttributeError:              pass  # ignore empty fields -    return items +    return items, baseurl, keyword  def main():      parser = argparse.ArgumentParser(description="craigslist WRX and STi parts finder", parents=()) -    parser.add_argument("-b", "--baseurl", help='baseurl, e.g. https://chicago.craigslist.org/') +    parser.add_argument("-b", "--baseurl", help='baseurl, e.g. https://chicago.craigslist.org')      parser.add_argument("-k", "--keyword", default='wrx|sti', help='keyword to search')      args, extra_args = parser.parse_known_args() diff --git a/django_clscrap/settings.py b/django_clscrap/settings.py index 39a1021..5c8092f 100644 --- a/django_clscrap/settings.py +++ b/django_clscrap/settings.py @@ -37,7 +37,6 @@ INSTALLED_APPS = [      'django.contrib.sessions',      'django.contrib.messages',      'django.contrib.staticfiles', -    'rest_framework',      'myapp.apps.MyappConfig',  ] diff --git a/myapp/templates/index.html b/myapp/templates/index.html new file mode 100644 index 0000000..1a1f618 --- /dev/null +++ b/myapp/templates/index.html @@ -0,0 +1,37 @@ +<!DOCTYPE html> +<html lang="en"> +<head> +    <meta charset="UTF-8"> +    <title>Craigslist Scraper</title> +    <style type="text/css"> +        #item-list { position: relative; } +        .item { margin-bottom: 10px; width: 340px; height: 380px; float: left; margin-left: 10px; } +        .item-title { display: block; font-size: 1.3em } +        .item-price { font-weight: bold; padding-left: 20px;} +        .item img { display: block; } +    </style> +</head> +<body> +<h2>Craigslist Scrapper</h2> +<span>get list of current WRX STi parts for sale in Chicago area</span> +<pre> +    url: {{ baseurl }} +    query: {{ keyword }} + +    author: fatalhalt +    https://github.com/fatalhalt/CLscrap + +</pre> +<div id="item-list"> +{% for i in data %} +    <div class="item"> +        <a href="{{ baseurl }}{{ i.link }}" class="item-title">{{ i.title }}</a> +        <span class="item-date">{{ i.date }}</span> +        <span class="item-price">{{ i.price }}</span> +        <span class="item-hood">{{ i.hood }}</span> +        <img src="{{ i.img }}" alt="img" /> +    </div> +{% endfor %} +</div> +</body> +</html>
\ No newline at end of file diff --git a/myapp/views.py b/myapp/views.py index 6b014ac..600a061 100644 --- a/myapp/views.py +++ b/myapp/views.py @@ -2,11 +2,13 @@ from django.shortcuts import render  # Create your views here.  from django.http import HttpResponse, JsonResponse +from django.shortcuts import render  import cl  def app(request): -    return HttpResponse("hello, app!") +    data, baseurl, keyword = cl.query_craigslist() +    return render(request, 'index.html', {'data': data, 'baseurl': baseurl, 'keyword': keyword})  def page(request): diff --git a/requirements.txt b/requirements.txt index d6448fc..df760c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@  beautifulsoup4==4.5.3  bs4==0.0.1  Django==1.10.6 -djangorestframework==3.6.2  requests==2.13.0 diff --git a/screenshot.jpg b/screenshot.jpgBinary files differ new file mode 100644 index 0000000..9f36f13 --- /dev/null +++ b/screenshot.jpg | 
