- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import sys
- import getopt
- from datetime import datetime
- import pandas as pd
- from sqlalchemy import create_engine
- if __name__ == '__main__':
- # задаём входные параметры
- unixOptions = 'sdt:edt:'
- gnuOptions = ['start_dt=', 'end_dt=']
- fullCmdArguments = sys.argv
- argumentList = fullCmdArguments[1:] # excluding script name
- try:
- arguments, values = getopt.getopt(
- argumentList, unixOptions, gnuOptions
- )
- except getopt.error as err:
- # output error, and return with an error code
- print(str(err))
- sys.exit(2)
- start_dt = '1981-01-01'
- end_dt = '1998-01-01'
- for currentArgument, currentValue in arguments:
- if currentArgument in ('-sdt', '--start_dt'):
- start_dt = currentValue
- elif currentArgument in ('-edt', '--end_dt'):
- end_dt = currentValue
- db_config = {
- 'user': 'my_user',
- 'pwd': 'my_user_password',
- 'host': 'localhost',
- 'port': 5432,
- 'db': 'games',
- }
- connection_string = 'postgresql://{}:{}@{}:{}/{}'.format(
- db_config['user'],
- db_config['pwd'],
- db_config['host'],
- db_config['port'],
- db_config['db'],
- )
- engine = create_engine(connection_string)
- query = ''' SELECT *
- FROM data_raw
- WHERE year_of_release::TIMESTAMP BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
- '''.format(
- start_dt, end_dt
- )
- data_raw = pd.io.sql.read_sql(query, con=engine, index_col='game_id')
- columns_str = ['name', 'platform', 'genre', 'rating']
- columns_numeric = [
- 'na_players',
- 'eu_players',
- 'jp_players',
- 'other_players',
- 'critic_score',
- 'user_score',
- ]
- columns_datetime = ['year_of_release']
- for column in columns_str:
- data_raw[column] = data_raw[column].astype(str)
- for column in columns_numeric:
- data_raw[column] = pd.to_numeric(data_raw[column], errors='coerce')
- for column in columns_datetime:
- data_raw[column] = pd.to_datetime(data_raw[column])
- data_raw['total_copies_sold'] = data_raw[
- ['na_players', 'eu_players', 'jp_players', 'other_players']
- ].sum(axis=1)
- agg_games_year_genre_platform = data_raw.groupby(
- ['year_of_release', 'genre', 'platform']
- ).agg({'name': 'count', 'total_copies_sold': 'sum'})
- agg_games_year_score = data_raw.groupby(
- ['year_of_release', 'genre', 'platform']
- ).agg({'critic_score': 'mean', 'user_score': 'mean'})
- agg_games_year_genre_platform = agg_games_year_genre_platform.rename(
- columns={'name': 'name'}
- ) # напишите код
- agg_games_year_score = agg_games_year_score.rename(
- columns={'critic_score': 'mean_critic_score', 'user_score': 'mean_user_score'}
- ) # напишите код
- agg_games_year_genre_platform = agg_games_year_genre_platform.fillna(
- 0
- ).reset_index()
- agg_games_year_score = agg_games_year_score.fillna(0).reset_index()
- tables = {
- 'agg_games_year_genre_platform': agg_games_year_genre_platform, # ваш код
- 'agg_games_year_score': agg_games_year_score,
- } # ваш код
- for table_name, table_data in tables.items():
- query = '''
- DELETE FROM {} WHERE year_of_release BETWEEN '{}'::TIMESTAMP AND '{}'::TIMESTAMP
- '''.format(
- table_name, start_dt, end_dt
- ) # напишите код
- engine.execute(query)
- table_data.to_sql(
- name=table_name, con=engine, if_exists='append', index=False
- ) # напишите код
- print('All done.')