Untitled

From Scanty Ibis, 1 Year ago, written in Plain Text, viewed 166 times.
URL http://codebin.org/view/50681255 Embed
Download Paste or View Raw
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4.  
  5. import getopt
  6. from datetime import datetime
  7.  
  8. import pandas as pd
  9.  
  10. from sqlalchemy import create_engine
  11.  
  12. if __name__ == '__main__':
  13.  
  14.     # задаём входные параметры
  15.     unixOptions = 'sdt:edt:'
  16.     gnuOptions = ['start_dt=', 'end_dt=']
  17.  
  18.     fullCmdArguments = sys.argv
  19.     argumentList = fullCmdArguments[1:]  # excluding script name
  20.  
  21.     try:
  22.         arguments, values = getopt.getopt(
  23.             argumentList, unixOptions, gnuOptions
  24.         )
  25.     except getopt.error as err:
  26.         # output error, and return with an error code
  27.         print(str(err))
  28.         sys.exit(2)
  29.  
  30.     start_dt = '1981-01-01'
  31.     end_dt = '1998-01-01'
  32.     for currentArgument, currentValue in arguments:
  33.         if currentArgument in ('-sdt', '--start_dt'):
  34.             start_dt = currentValue
  35.         elif currentArgument in ('-edt', '--end_dt'):
  36.             end_dt = currentValue
  37.  
  38.     db_config = {
  39.         'user': 'my_user',
  40.         'pwd': 'my_user_password',
  41.         'host': 'localhost',
  42.         'port': 5432,
  43.         'db': 'games',
  44.     }
  45.     connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(
  46.         db_config['user'],
  47.         db_config['pwd'],
  48.         db_config['host'],
  49.         db_config['port'],
  50.         db_config['db'],
  51.     )
  52.  
  53.     engine = create_engine(connection_string)
  54.  
  55.     query = ''' SELECT *
  56.                 FROM data_raw
  57.                 WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  58.             '''.format(
  59.         start_dt, end_dt
  60.     )
  61.  
  62.     data_raw = pd.io.sql.read_sql(query, con=engine, index_col='game_id')
  63.  
  64.     columns_str = ['name', 'platform', 'genre', 'rating']
  65.     columns_numeric = [
  66.         'na_players',
  67.         'eu_players',
  68.         'jp_players',
  69.         'other_players',
  70.         'critic_score',
  71.         'user_score',
  72.     ]
  73.     columns_datetime = ['year_of_release']
  74.  
  75.     for column in columns_str:
  76.         data_raw[column] = data_raw[column].astype(str)
  77.     for column in columns_numeric:
  78.         data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
  79.     for column in columns_datetime:
  80.         data_raw[column] = pd.to_datetime(data_raw[column])
  81.     data_raw['total_copies_sold'] = data_raw[
  82.         ['na_players', 'eu_players', 'jp_players', 'other_players']
  83.     ].sum(axis=1)
  84.  
  85.     agg_games_year_genre_platform = data_raw.groupby(
  86.         ['year_of_release', 'genre', 'platform']
  87.     ).agg({'name': 'count', 'total_copies_sold': 'sum'})
  88.     agg_games_year_score = data_raw.groupby(
  89.         ['year_of_release', 'genre', 'platform']
  90.     ).agg({'critic_score': 'mean', 'user_score': 'mean'})
  91.  
  92.     agg_games_year_genre_platform = agg_games_year_genre_platform.rename(
  93.         columns={'name': '...'}
  94.     )  # напишите код
  95.     agg_games_year_score = agg_games_year_score.rename(
  96.         columns={'critic_score': '...', 'user_score': '...'}
  97.     )  # напишите код
  98.  
  99.     agg_games_year_genre_platform = agg_games_year_genre_platform.fillna(
  100.         0
  101.     ).reset_index()
  102.     agg_games_year_score = agg_games_year_score.fillna(0).reset_index()
  103.  
  104.     tables = {
  105.         'agg_games_year_genre_platform': ...,  # ваш код
  106.         'agg_games_year_score': ...,
  107.     }  # ваш код
  108.  
  109.     for table_name, table_data in tables.items():
  110.  
  111.         query = '''
  112.                   DELETE FROM {} WHERE year_of_release BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
  113.                 '''.format(
  114.             ..., start_dt, end_dt
  115.         )  # напишите код
  116.         engine.execute(query)
  117.  
  118.         ....to_sql(
  119.             name=..., con=engine, if_exists='append', index=False
  120.         )  # напишите код
  121.  
  122.     print('All done.')

Reply to "Untitled"

Here you can reply to the paste above