summaryrefslogtreecommitdiffstats
path: root/multi-dim-dict.py
diff options
context:
space:
mode:
Diffstat (limited to 'multi-dim-dict.py')
-rw-r--r--multi-dim-dict.py42
1 files changed, 42 insertions, 0 deletions
diff --git a/multi-dim-dict.py b/multi-dim-dict.py
new file mode 100644
index 0000000..6b9e08f
--- /dev/null
+++ b/multi-dim-dict.py
@@ -0,0 +1,42 @@
+import sys
+import re
+
+# given following csv input, sum up the log size (last field) per each exchange per day
+
+csv_input = """date,process,host,log,bytes
+20140206,cme_trader_2,cme0001,0345-cme_trader_2.log.gz,15400000
+20140206,phlx_trader_1,phlx0001,0651-phlx_trader_1.log.gz,14100000
+20140206,phlx_trader_2,phlx0001,0645-phlx_trader_2.log.gz,13800000
+20140207,cme_trader_2,cme0001,0345-cme_trader_2.log.gz,15800000
+20140207,cme_trader_3,cme0001,0345-cme_trader_3.log.gz,14200000
+20140207,phlx_trader_1,phlx0001,0651-phlx_trader_1.log.gz,24100000"""
+
+# you need to access and update the numsize of each exch
+# you want levearge multi-dim dict for auto/mapping each line of csv to avoid keeping csv like structure in memory
+header = ''
+exchange_logs = {}
+lines = csv_input.splitlines()
+for line in lines:
+ l = line.split(',')
+ log_exch = ''
+ try:
+ log_exch = re.search(r"^(.*)_trader_\d$", l[1]).group(1)
+ except AttributeError:
+ header = line
+ continue
+ #print('csv line is: ', l)
+ log_date = l[0]
+ log_size = l[4]
+
+ if log_date not in exchange_logs.keys():
+ exchange_logs[log_date] = {}
+
+ if log_exch in exchange_logs[log_date].keys():
+ exchange_logs[log_date][log_exch] += int(log_size) # increment already present log size
+ else:
+ exchange_logs[log_date][log_exch] = int(log_size) # inital assignment
+
+print(header)
+for k, v in sorted(exchange_logs.items()):
+ for k2, v2 in sorted(v.items()):
+ print("{0},{1},{2}".format(k, k2, v2)) \ No newline at end of file