Untitled

From Bulky Pheasant, 11 Months ago, written in Plain Text, viewed 217 times.
URL http://codebin.org/view/5746e7a0 Embed
Download Paste or View Raw
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4. import getopt
  5. from datetime import datetime
  6. import pandas as pd
  7. from sqlalchemy import create_engine
  8.  
  9. if __name__ == '__main__':
  10.  
  11.     # задаём входные параметры
  12.     unixOptions = 'sdt:edt:'
  13.     gnuOptions = ['start_dt=', 'end_dt=']
  14.  
  15.     fullCmdArguments = sys.argv
  16.     argumentList = fullCmdArguments[1:]  # excluding script name
  17.  
  18.     try:
  19.         arguments, values = getopt.getopt(
  20.             argumentList, unixOptions, gnuOptions
  21.         )
  22.     except getopt.error as err:
  23.         # output error, and return with an error code
  24.         print(str(err))
  25.         sys.exit(2)
  26.  
  27.     start_dt = '1981-01-01'
  28.     end_dt = '1998-01-01'
  29.     for currentArgument, currentValue in arguments:
  30.         if currentArgument in ('-sdt', '--start_dt'):
  31.             start_dt = currentValue
  32.         elif currentArgument in ('-edt', '--end_dt'):
  33.             end_dt = currentValue
  34.  
  35.     db_config = {
  36.         'user': 'my_user',
  37.         'pwd': 'my_user_password',
  38.         'host': 'localhost',
  39.         'port': 5432,
  40.         'db': 'games',
  41.     }
  42.     connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(
  43.         db_config['user'],
  44.         db_config['pwd'],
  45.         db_config['host'],
  46.         db_config['port'],
  47.         db_config['db'],
  48.     )
  49.  
  50.     engine = create_engine(connection_string)
  51.  
  52.     query = ''' SELECT *
  53.                 FROM data_raw
  54.                 WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  55.             '''.format(
  56.         start_dt, end_dt
  57.     )
  58.  
  59.     data_raw = pd.io.sql.read_sql(query, con=engine, index_col='game_id')
  60.  
  61.     columns_str = ['name', 'platform', 'genre', 'rating']
  62.     columns_numeric = [
  63.         'na_players',
  64.         'eu_players',
  65.         'jp_players',
  66.         'other_players',
  67.         'critic_score',
  68.         'user_score',
  69.     ]
  70.     columns_datetime = ['year_of_release']
  71.  
  72.     for column in columns_str:
  73.         data_raw[column] = data_raw[column].astype(str)
  74.     for column in columns_numeric:
  75.         data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
  76.     for column in columns_datetime:
  77.         data_raw[column] = pd.to_datetime(data_raw[column])
  78.     data_raw['total_copies_sold'] = data_raw[
  79.         ['na_players', 'eu_players', 'jp_players', 'other_players']
  80.     ].sum(axis=1)
  81.  
  82.     agg_games_year_genre_platform = data_raw.groupby(
  83.         ['year_of_release', 'genre', 'platform']
  84.     ).agg({'name': 'count', 'total_copies_sold': 'sum'})
  85.     agg_games_year_score = data_raw.groupby(
  86.         ['year_of_release', 'genre', 'platform']
  87.     ).agg({'critic_score': 'mean', 'user_score': 'mean'})
  88.  
  89.     agg_games_year_genre_platform = agg_games_year_genre_platform.rename(
  90.         columns={'name': 'games'}
  91.     )  # напишите код
  92.     agg_games_year_score = agg_games_year_score.rename(
  93.         columns={'critic_score': 'avg_critic_score', 'user_score': 'avg_user_score'}
  94.     )  # напишите код
  95.  
  96.     agg_games_year_genre_platform = agg_games_year_genre_platform.fillna(
  97.         0
  98.     ).reset_index()
  99.     agg_games_year_score = agg_games_year_score.fillna(0).reset_index()
  100.  
  101.     tables = {
  102.         'agg_games_year_genre_platform': agg_games_year_genre_platform,  # ваш код
  103.         'agg_games_year_score': agg_games_year_score,
  104.     }  # ваш код
  105.  
  106.     for table_name, table_data in tables.items():
  107.  
  108.         query = '''
  109.                   DELETE FROM {} WHERE year_of_release BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  110.                 '''.format(
  111.             table_name, start_dt, end_dt
  112.         )  # напишите код
  113.         engine.execute(query)
  114.  
  115.         table_data.to_sql(
  116.             name=table_name, con=engine, if_exists='append', index=False
  117.         )  # напишите код
  118.  
  119.     print('All done.')
  120.  

Reply to "Untitled"

Here you can reply to the paste above