From e7a8b308273e74279f04ca6635c1d4aa00a842b1 Mon Sep 17 00:00:00 2001 From: Kyle K Date: Tue, 18 Sep 2012 20:09:55 -0500 Subject: add a regexp file --- regexp.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 regexp.txt diff --git a/regexp.txt b/regexp.txt new file mode 100644 index 0000000..7cef9cb --- /dev/null +++ b/regexp.txt @@ -0,0 +1,15 @@ +[regexp] +- difference between ([^c])+ and ([^c]+) heh :p? first matches last letter occurence, latter matches whole string + +- grep a cl page +# grep -E "^\\s*\\s*([^<])+\\s*$" cl.html + +- same but using sed, notes how forward-slash in '' got escaped +$ sed -n -r "/^\\s*\\s*([^<])+<\/a>\\s*$/p" cl.html + +- now sed with 2 column output, link mapping to desc, note that +'s were moved into () +$ sed -r -n "s/^\\s*\\s*([^<]+)<\/a>\\s*$/\1 \2/p" cl.html + +- full cl search +$ curl -s -i 'http://chicago.craigslist.org/search/pta?query=wrx+|+sti+|+impreza+|+subaru&srchType=T' | sed -r -n "s/^\\s*\\s*([^<]+)<\/a>\\s*$/\1 \2/p" + -- cgit v1.2.3