Skip to content

Commit

Permalink
Merge pull request #12 from kevinsunny1996/task/fix_platform_start_ye…
Browse files Browse the repository at this point in the history
…ar_column

Added type enforcing on column platform_year_start
  • Loading branch information
kevinsunny1996 authored Apr 30, 2024
2 parents ecca380 + 2a3fb9b commit f800973
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 47 deletions.
94 changes: 47 additions & 47 deletions dags/rawg_api_extractor_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,53 +370,53 @@ def get_game_id_related_data(api_key: str, game_ids_list: list, page_number: int


# Load contents from GCS onto BigQuery for that run
load_rawg_api_ratings_data_to_bq = GCSToBigQueryOperator(
task_id=f'load_ratings_to_bq',
bucket=rawg_landing_gcs_bucket, # Set your GCS bucket name to pick file from.
source_objects=[f'ratings_{rawg_page_number}.parquet'], # Set the name of the CSV file in GCS
source_format='PARQUET',
destination_project_dataset_table=f'{rawg_api_bq_dataset}.ratings', # Set your BigQuery table name to load the data to.
gcp_conn_id='gcp', # Set your GCP connection ID.
allow_quoted_newlines=True,
ignore_unknown_values=True,
schema_fields=schema_ratings,
create_disposition='CREATE_IF_NEEDED',
autodetect=False,
write_disposition='WRITE_APPEND', # If the table already exists, BigQuery appends the data to the table.
skip_leading_rows=1 # Skip the header row in the CSV file.
)

load_rawg_api_games_data_to_bq = GCSToBigQueryOperator(
task_id=f'load_games_to_bq',
bucket=rawg_landing_gcs_bucket, # Set your GCS bucket name to pick file from.
source_objects=[f'games_{rawg_page_number}.parquet'], # Set the name of the CSV file in GCS
source_format='PARQUET',
allow_quoted_newlines=True,
ignore_unknown_values=True,
destination_project_dataset_table=f'{rawg_api_bq_dataset}.games', # Set your BigQuery table name to load the data to.
gcp_conn_id='gcp', # Set your GCP connection ID.
create_disposition='CREATE_IF_NEEDED',
schema_fields=schema_games,
autodetect=False,
write_disposition='WRITE_APPEND', # If the table already exists, BigQuery appends the data to the table.
skip_leading_rows=1 # Skip the header row in the CSV file.
)

load_rawg_api_genres_data_to_bq = GCSToBigQueryOperator(
task_id=f'load_genres_to_bq',
bucket=rawg_landing_gcs_bucket, # Set your GCS bucket name to pick file from.
source_objects=[f'genres_{rawg_page_number}.parquet'], # Set the name of the CSV file in GCS
source_format='PARQUET',
allow_quoted_newlines=True,
ignore_unknown_values=True,
destination_project_dataset_table=f'{rawg_api_bq_dataset}.genres', # Set your BigQuery table name to load the data to.
gcp_conn_id='gcp', # Set your GCP connection ID.
create_disposition='CREATE_IF_NEEDED',
write_disposition='WRITE_APPEND', # If the table already exists, BigQuery appends the data to the table.
schema_fields=schema_genres,
autodetect=False,
skip_leading_rows=1 # Skip the header row in the CSV file.
)
# load_rawg_api_ratings_data_to_bq = GCSToBigQueryOperator(
# task_id=f'load_ratings_to_bq',
# bucket=rawg_landing_gcs_bucket, # Set your GCS bucket name to pick file from.
# source_objects=[f'ratings_{rawg_page_number}.parquet'], # Set the name of the CSV file in GCS
# source_format='PARQUET',
# destination_project_dataset_table=f'{rawg_api_bq_dataset}.ratings', # Set your BigQuery table name to load the data to.
# gcp_conn_id='gcp', # Set your GCP connection ID.
# allow_quoted_newlines=True,
# ignore_unknown_values=True,
# schema_fields=schema_ratings,
# create_disposition='CREATE_IF_NEEDED',
# autodetect=False,
# write_disposition='WRITE_APPEND', # If the table already exists, BigQuery appends the data to the table.
# skip_leading_rows=1 # Skip the header row in the CSV file.
# )

# load_rawg_api_games_data_to_bq = GCSToBigQueryOperator(
# task_id=f'load_games_to_bq',
# bucket=rawg_landing_gcs_bucket, # Set your GCS bucket name to pick file from.
# source_objects=[f'games_{rawg_page_number}.parquet'], # Set the name of the CSV file in GCS
# source_format='PARQUET',
# allow_quoted_newlines=True,
# ignore_unknown_values=True,
# destination_project_dataset_table=f'{rawg_api_bq_dataset}.games', # Set your BigQuery table name to load the data to.
# gcp_conn_id='gcp', # Set your GCP connection ID.
# create_disposition='CREATE_IF_NEEDED',
# schema_fields=schema_games,
# autodetect=False,
# write_disposition='WRITE_APPEND', # If the table already exists, BigQuery appends the data to the table.
# skip_leading_rows=1 # Skip the header row in the CSV file.
# )

# load_rawg_api_genres_data_to_bq = GCSToBigQueryOperator(
# task_id=f'load_genres_to_bq',
# bucket=rawg_landing_gcs_bucket, # Set your GCS bucket name to pick file from.
# source_objects=[f'genres_{rawg_page_number}.parquet'], # Set the name of the CSV file in GCS
# source_format='PARQUET',
# allow_quoted_newlines=True,
# ignore_unknown_values=True,
# destination_project_dataset_table=f'{rawg_api_bq_dataset}.genres', # Set your BigQuery table name to load the data to.
# gcp_conn_id='gcp', # Set your GCP connection ID.
# create_disposition='CREATE_IF_NEEDED',
# write_disposition='WRITE_APPEND', # If the table already exists, BigQuery appends the data to the table.
# schema_fields=schema_genres,
# autodetect=False,
# skip_leading_rows=1 # Skip the header row in the CSV file.
# )

load_rawg_api_platforms_data_to_bq = GCSToBigQueryOperator(
task_id=f'load_platforms_to_bq',
Expand Down
1 change: 1 addition & 0 deletions dags/utils/rawg_api_caller.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def get_game_details_per_id(self, api_key: str, endpoint_ids: list, page_number:
platforms_df['platform_id'] = platforms_df['platform_id'].astype(int)
platforms_df['platform_games_count'] = platforms_df['platform_games_count'].astype(int)
platforms_df['game_id'] = platforms_df['game_id'].astype(int)
platforms_df['platform_year_start'] = platforms_df['platform_year_start'].astype(str)

# Enforcing datatypes for columns of publisher dataframe
publisher_df['id'] = publisher_df['id'].astype(int)
Expand Down

0 comments on commit f800973

Please sign in to comment.