From cd625caa43644a964336280c1079b295010bee2b Mon Sep 17 00:00:00 2001 From: Kyle K Date: Sat, 26 Sep 2020 19:59:14 -0500 Subject: examples: working with multi-dim dict, AWS VM provisioning with boto3 SDK, calc num of days between 2 dates --- aws-provision-ec2.py | 120 +++++++++++++++++++++++++++++++++++++++++++ difference_between_2dates.py | 20 ++++++++ multi-dim-dict.py | 42 +++++++++++++++ scratch.py | 3 ++ 4 files changed, 185 insertions(+) create mode 100644 aws-provision-ec2.py create mode 100644 difference_between_2dates.py create mode 100644 multi-dim-dict.py create mode 100644 scratch.py diff --git a/aws-provision-ec2.py b/aws-provision-ec2.py new file mode 100644 index 0000000..9eaf3e8 --- /dev/null +++ b/aws-provision-ec2.py @@ -0,0 +1,120 @@ +import boto3 +import os +import time + +# my 'boto3-ec2-user' IAM user with Programmatic access +settings = { + 'aws_access_key_id': 'XXXXXXXXXXXXXXXXXXXX', + 'aws_secret_access_key': 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', + 'myregion': 'us-east-2' +} + +# create session and get ec2 resource +session = boto3.Session( + aws_access_key_id = settings['aws_access_key_id'], + aws_secret_access_key = settings['aws_secret_access_key'], + region_name=settings['myregion'] +) +ec2 = session.resource('ec2') +ec2client = session.client('ec2') # also create session for the low-level Clients API, I needed it for describe_instances() + + +#check if privkey was already saved to the disk +if os.path.isfile('boto3-ec2-keypair.pem'): + with open('boto3-ec2-keypair.pem', 'r') as f: + keypair_str = f.read() +else: + # create a keypair that will allow us to access instance/vm after it is provisioned + keypair = ec2.create_key_pair(KeyName='boto3-ec2-keypair') + outfile = open('boto3-ec2-keypair.pem', 'w') + + # capture the key and store it in a file + keypair_str = str(keypair.key_material) + print('generated keypair:\n{0}'.format(keypair_str)) + outfile.write(keypair_str) # save privkey so it can be used with 'ssh -i' + outfile.close() + +yaml_datadisk_dict = { + 'yaml_datadisk_device': "/dev/xvdb", + 'yaml_datadisk_size': "+5G", + 'yaml_datadisk_fs': "xfs", + 'yaml_datadisk_mountpoint': "/data", +} + +# prepare EC2 User Data post install script that will parition the /dev/xvdb disk, create fs, and mount it +myuserdata = ''' +sudo su +echo -e "o\\nY\\nn\\n1\\n\\n\\n\\nw\\nY\\n" | gdisk {yaml_datadisk_device} +mkfs.{yaml_datadisk_fs} {yaml_datadisk_device}1 +mkdir -p {yaml_datadisk_mountpoint} +echo "{yaml_datadisk_device}1 {yaml_datadisk_mountpoint} {yaml_datadisk_fs} defaults 0 0" >> /etc/fstab +mount -a +'''.format(**yaml_datadisk_dict) + + +# create a new micro EC2 instance/vm with Amazon Linux 2 AMI image +instance = ec2.create_instances( + BlockDeviceMappings = [ + { + 'DeviceName': '/dev/xvda', + 'Ebs': {'VolumeSize': 9}, + }, + { + 'DeviceName': '/dev/xvdb', + 'Ebs': {'VolumeSize': 9}, + }, + ], + ImageId = 'ami-0603cbe34fd08cb81', + MinCount = 1, + MaxCount = 1, + InstanceType = 't2.micro', + KeyName = 'boto3-ec2-keypair', + UserData = myuserdata, + TagSpecifications = [ + { + 'ResourceType' : 'instance', + 'Tags': [ + { + 'Key': 'Name', + 'Value': 'myBotoEC2micro' + }, + ] + } + ], +) +# wait until instance's state changes to 'running' +ec2client.wait_until_running() + +# wait until OS boots up, not ideal, could use Waiters but they are in the other 'Clients' API +while instance[0].state == 'pending': + time.sleep(3) + instance[0].update() +print("instance id: {0}".format(instance[0].id)) + +# aws ec2 describe-instances --filters Name=tag:Name,Values=myBotoEC2micro --query 'Reservations[*].Instances[*].InstanceId' --output text +res = ec2client.describe_instances( + Filters=[ + { + 'Name': 'tag:Name', + 'Values': ['myBotoEC2micro'] + } + ] +) +instanceID = res['Reservations'][0]['Instances'][0]['InstanceId'] +print("again instance id is: {0}".format(instanceID)) + +# boto3 API can only check if the volume is attached to an instance, but not if it is mounted +# our volume that will hold /data was already attached in ec2.create_instances() step +# we will use EC2 User Data to format the /dev/xvdb volume with xfs filesystem & mount it at /data + +# the catch is that EC2 User Data can be only used once during creation/launch of new instance/vm, +# so you need to feed it to ec2.create_instances() + +# get domain name of newly launched instance +publicDNS = res['Reservations'][0]['Instances'][0]['PublicDnsName'] +print("Login to new instance using:\n" \ + "ssh -i {0} e2c-user@{1}".format('boto3-ec2-keypair.pem', publicDNS)) + + +#for i in ec2.instances.all(): +# print(i.id) diff --git a/difference_between_2dates.py b/difference_between_2dates.py new file mode 100644 index 0000000..529bebb --- /dev/null +++ b/difference_between_2dates.py @@ -0,0 +1,20 @@ +import sys + +def NumDaysBetween(y1,m1,d1,y2,m2,d2): + days = 0 + for y in range(y1, y1 + (y2-y1)): + if (y < y2): # if not on current year then fast-forward to y2 and set m1 to January + for m in range(m1, 12): + days += NumDaysInMonth(y, m) + m1 = 1 + m2 += 1 + for m in range (m1, m2): + days += NumDaysInMonth(y2, m) + days = days + (d2 - d1) + return days + +def NumDaysInMonth(y, m): + return 30 + +print(NumDaysBetween(2010,5,1,2011,5,1)) +print(NumDaysBetween(2010,5,1,2011,8,5)) diff --git a/multi-dim-dict.py b/multi-dim-dict.py new file mode 100644 index 0000000..6b9e08f --- /dev/null +++ b/multi-dim-dict.py @@ -0,0 +1,42 @@ +import sys +import re + +# given following csv input, sum up the log size (last field) per each exchange per day + +csv_input = """date,process,host,log,bytes +20140206,cme_trader_2,cme0001,0345-cme_trader_2.log.gz,15400000 +20140206,phlx_trader_1,phlx0001,0651-phlx_trader_1.log.gz,14100000 +20140206,phlx_trader_2,phlx0001,0645-phlx_trader_2.log.gz,13800000 +20140207,cme_trader_2,cme0001,0345-cme_trader_2.log.gz,15800000 +20140207,cme_trader_3,cme0001,0345-cme_trader_3.log.gz,14200000 +20140207,phlx_trader_1,phlx0001,0651-phlx_trader_1.log.gz,24100000""" + +# you need to access and update the numsize of each exch +# you want levearge multi-dim dict for auto/mapping each line of csv to avoid keeping csv like structure in memory +header = '' +exchange_logs = {} +lines = csv_input.splitlines() +for line in lines: + l = line.split(',') + log_exch = '' + try: + log_exch = re.search(r"^(.*)_trader_\d$", l[1]).group(1) + except AttributeError: + header = line + continue + #print('csv line is: ', l) + log_date = l[0] + log_size = l[4] + + if log_date not in exchange_logs.keys(): + exchange_logs[log_date] = {} + + if log_exch in exchange_logs[log_date].keys(): + exchange_logs[log_date][log_exch] += int(log_size) # increment already present log size + else: + exchange_logs[log_date][log_exch] = int(log_size) # inital assignment + +print(header) +for k, v in sorted(exchange_logs.items()): + for k2, v2 in sorted(v.items()): + print("{0},{1},{2}".format(k, k2, v2)) \ No newline at end of file diff --git a/scratch.py b/scratch.py new file mode 100644 index 0000000..cb03373 --- /dev/null +++ b/scratch.py @@ -0,0 +1,3 @@ +import sys + +print("hello, {0}".format('world!')) \ No newline at end of file -- cgit v1.2.3