From cd625caa43644a964336280c1079b295010bee2b Mon Sep 17 00:00:00 2001
From: Kyle K <kylek389@gmail.com>
Date: Sat, 26 Sep 2020 19:59:14 -0500
Subject: examples: working with multi-dim dict, AWS VM provisioning with boto3
 SDK, calc num of days between 2 dates

---
 aws-provision-ec2.py         | 120 +++++++++++++++++++++++++++++++++++++++++++
 difference_between_2dates.py |  20 ++++++++
 multi-dim-dict.py            |  42 +++++++++++++++
 scratch.py                   |   3 ++
 4 files changed, 185 insertions(+)
 create mode 100644 aws-provision-ec2.py
 create mode 100644 difference_between_2dates.py
 create mode 100644 multi-dim-dict.py
 create mode 100644 scratch.py

diff --git a/aws-provision-ec2.py b/aws-provision-ec2.py
new file mode 100644
index 0000000..9eaf3e8
--- /dev/null
+++ b/aws-provision-ec2.py
@@ -0,0 +1,120 @@
+import boto3
+import os
+import time
+
+# my 'boto3-ec2-user' IAM user with Programmatic access
+settings = {
+    'aws_access_key_id': 'XXXXXXXXXXXXXXXXXXXX',
+    'aws_secret_access_key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX',
+    'myregion': 'us-east-2'
+}
+
+# create session and get ec2 resource
+session = boto3.Session(
+    aws_access_key_id = settings['aws_access_key_id'],
+    aws_secret_access_key = settings['aws_secret_access_key'],
+    region_name=settings['myregion']
+)
+ec2 = session.resource('ec2')
+ec2client = session.client('ec2') # also create session for the low-level Clients API, I needed it for describe_instances()
+
+
+#check if privkey was already saved to the disk
+if os.path.isfile('boto3-ec2-keypair.pem'):
+    with open('boto3-ec2-keypair.pem', 'r') as f:
+        keypair_str = f.read()
+else:
+    # create a keypair that will allow us to access instance/vm after it is provisioned
+    keypair = ec2.create_key_pair(KeyName='boto3-ec2-keypair')
+    outfile = open('boto3-ec2-keypair.pem', 'w')
+
+    # capture the key and store it in a file
+    keypair_str = str(keypair.key_material)
+    print('generated keypair:\n{0}'.format(keypair_str))
+    outfile.write(keypair_str) # save privkey so it can be used with 'ssh -i'
+    outfile.close()
+
+yaml_datadisk_dict = {
+    'yaml_datadisk_device': "/dev/xvdb",
+    'yaml_datadisk_size': "+5G",
+    'yaml_datadisk_fs': "xfs",
+    'yaml_datadisk_mountpoint': "/data",
+}
+
+# prepare EC2 User Data post install script that will parition the /dev/xvdb disk, create fs, and mount it
+myuserdata = '''
+sudo su
+echo -e "o\\nY\\nn\\n1\\n\\n\\n\\nw\\nY\\n" | gdisk {yaml_datadisk_device}
+mkfs.{yaml_datadisk_fs} {yaml_datadisk_device}1
+mkdir -p {yaml_datadisk_mountpoint}
+echo "{yaml_datadisk_device}1 {yaml_datadisk_mountpoint} {yaml_datadisk_fs} defaults 0 0" >> /etc/fstab
+mount -a
+'''.format(**yaml_datadisk_dict)
+
+
+# create a new micro EC2 instance/vm with Amazon Linux 2 AMI image
+instance = ec2.create_instances(
+    BlockDeviceMappings = [
+        {
+            'DeviceName': '/dev/xvda',
+            'Ebs': {'VolumeSize': 9},
+        },
+        {
+            'DeviceName': '/dev/xvdb',
+            'Ebs': {'VolumeSize': 9},
+        },
+    ],
+    ImageId = 'ami-0603cbe34fd08cb81',
+    MinCount = 1,
+    MaxCount = 1,
+    InstanceType = 't2.micro',
+    KeyName = 'boto3-ec2-keypair',
+    UserData = myuserdata,
+    TagSpecifications = [
+        {
+            'ResourceType' : 'instance',
+            'Tags': [
+                {
+                    'Key': 'Name',
+                    'Value': 'myBotoEC2micro'
+                },
+            ]
+        }
+    ],
+)
+# wait until instance's state changes to 'running'
+ec2client.wait_until_running()
+
+# wait until OS boots up, not ideal, could use Waiters but they are in the other 'Clients' API
+while instance[0].state == 'pending':
+    time.sleep(3)
+    instance[0].update()
+print("instance id: {0}".format(instance[0].id))
+
+# aws ec2 describe-instances --filters Name=tag:Name,Values=myBotoEC2micro --query 'Reservations[*].Instances[*].InstanceId' --output text
+res = ec2client.describe_instances(
+    Filters=[
+        {
+        'Name': 'tag:Name',
+        'Values': ['myBotoEC2micro']
+        }
+    ]
+) 
+instanceID = res['Reservations'][0]['Instances'][0]['InstanceId']
+print("again instance id is: {0}".format(instanceID))
+
+# boto3 API can only check if the volume is attached to an instance, but not if it is mounted
+# our volume that will hold /data was already attached in ec2.create_instances() step
+# we will use EC2 User Data to format the /dev/xvdb volume with xfs filesystem & mount it at /data
+
+# the catch is that EC2 User Data can be only used once during creation/launch of new instance/vm,
+# so you need to feed it to ec2.create_instances()
+
+# get domain name of newly launched instance
+publicDNS = res['Reservations'][0]['Instances'][0]['PublicDnsName']
+print("Login to new instance using:\n" \
+    "ssh -i {0} e2c-user@{1}".format('boto3-ec2-keypair.pem', publicDNS))
+
+
+#for i in ec2.instances.all():
+#    print(i.id)
diff --git a/difference_between_2dates.py b/difference_between_2dates.py
new file mode 100644
index 0000000..529bebb
--- /dev/null
+++ b/difference_between_2dates.py
@@ -0,0 +1,20 @@
+import sys
+
+def NumDaysBetween(y1,m1,d1,y2,m2,d2):
+    days = 0
+    for y in range(y1, y1 + (y2-y1)):
+        if (y < y2): # if not on current year then fast-forward to y2 and set m1 to January
+            for m in range(m1, 12):
+                days += NumDaysInMonth(y, m)
+            m1 = 1
+            m2 += 1
+    for m in range (m1, m2):
+        days += NumDaysInMonth(y2, m)
+    days = days + (d2 - d1)
+    return days
+
+def NumDaysInMonth(y, m):
+    return 30
+
+print(NumDaysBetween(2010,5,1,2011,5,1))
+print(NumDaysBetween(2010,5,1,2011,8,5))
diff --git a/multi-dim-dict.py b/multi-dim-dict.py
new file mode 100644
index 0000000..6b9e08f
--- /dev/null
+++ b/multi-dim-dict.py
@@ -0,0 +1,42 @@
+import sys
+import re
+
+# given following csv input, sum up the log size (last field) per each exchange per day
+
+csv_input = """date,process,host,log,bytes
+20140206,cme_trader_2,cme0001,0345-cme_trader_2.log.gz,15400000
+20140206,phlx_trader_1,phlx0001,0651-phlx_trader_1.log.gz,14100000
+20140206,phlx_trader_2,phlx0001,0645-phlx_trader_2.log.gz,13800000
+20140207,cme_trader_2,cme0001,0345-cme_trader_2.log.gz,15800000
+20140207,cme_trader_3,cme0001,0345-cme_trader_3.log.gz,14200000
+20140207,phlx_trader_1,phlx0001,0651-phlx_trader_1.log.gz,24100000"""
+
+# you need to access and update the numsize of each exch
+# you want levearge multi-dim dict for auto/mapping each line of csv to avoid keeping csv like structure in memory
+header = ''
+exchange_logs = {}
+lines = csv_input.splitlines()
+for line in lines:
+    l = line.split(',')
+    log_exch = ''
+    try:
+        log_exch = re.search(r"^(.*)_trader_\d$", l[1]).group(1)
+    except AttributeError:
+        header = line
+        continue
+    #print('csv line is: ', l)
+    log_date = l[0]
+    log_size = l[4]
+
+    if log_date not in exchange_logs.keys():
+        exchange_logs[log_date] = {}
+
+    if log_exch in exchange_logs[log_date].keys():
+        exchange_logs[log_date][log_exch] += int(log_size)  # increment already present log size
+    else:
+        exchange_logs[log_date][log_exch] = int(log_size)   # inital assignment
+
+print(header)
+for k, v in sorted(exchange_logs.items()):
+    for k2, v2 in sorted(v.items()):
+        print("{0},{1},{2}".format(k, k2, v2))
\ No newline at end of file
diff --git a/scratch.py b/scratch.py
new file mode 100644
index 0000000..cb03373
--- /dev/null
+++ b/scratch.py
@@ -0,0 +1,3 @@
+import sys
+
+print("hello, {0}".format('world!'))
\ No newline at end of file
-- 
cgit v1.2.3