[go: up one dir, main page]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Onboard COVID-19 Genome Sequence dataset #460

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Prev Previous commit
Next Next commit
update : violatn pipeline
  • Loading branch information
aurogoogle committed Aug 19, 2022
commit 36af9617a057e700f3211b6979a5fce67ce96f91
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[
{
"name": "state_number",
"type": "integer",
"description": "This data element identifies the state in which the crash occurred. The codes are from the General Services Administration’s (GSA) publication of worldwide Geographic Location Codes (GLC). For more info on the codes, please look at <C1/V1/D1/PC1/P1/NM1 State Number> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "state_name",
"type": "string",
"description": "This data element identifies the state in which the crash occurred. The codes are from the General Services Administration’s (GSA) publication of worldwide Geographic Location Codes (GLC).",
"mode": "NULLABLE"
},
{
"name": "consecutive_number",
"type": "integer",
"description": "This data element is the unique case number assigned to each crash. It appears on each data file and is used to merge information from the data files together. xxxxxx Two Characters for State Code followed by Four Characters for Case Number",
"mode": "NULLABLE"
},
{
"name": "vehicle_number",
"type": "integer",
"description": "This data element is the consecutive number assigned to each vehicle in the case. This data element appears on each vehicle level data file and is used in conjunction with the ST_CASE data element to merge information from vehicle level data files. 000-999 Assigned Number of Motor Vehicle",
"mode": "NULLABLE"
},
{
"name": "violations_charged",
"type": "string",
"description": "This data element identifies all violations charged to this driver. For more info on the codes, please look at <D21 Violations Charged> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "violations_charged_name",
"type": "string",
"description": "This data element identifies all violations charged to this driver. For more info on the codes, please look at <D21 Violations Charged> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
}

]
Original file line number Diff line number Diff line change
Expand Up @@ -8096,7 +8096,64 @@ dag:
request_ephemeral_storage: "10G"
request_cpu: "1"

- operator: "KubernetesPodOperator"
description: "Run CSV transform within kubernetes pod for violatn pipelines"
args:
task_id: "violatn_2015_2020_transform_csv"
startup_timeout_seconds: 600
name: "violatn"
namespace: "composer"
service_account_name: "datasets"
image_pull_policy: "Always"
image: "{{ var.json.nhtsa_traffic_fatalities.container_registry.run_csv_transform_kub }}"
env_vars:
PIPELINE_NAME: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.pipeline_name }}"
SOURCE_URL: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.source_url }}"
CHUNKSIZE: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.chunksize }}"
SOURCE_ZIPFILE_EXTRACTED: "violatn_2015_2020.csv"
SOURCE_FILE: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.source_file }}"
# TARGET_FILE: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.target_file }}"
PROJECT_ID: "{{ var.value.gcp_project }}"
DATASET_ID: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.dataset_id }}"
TABLE_ID: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.destination_table }}"
START_YEAR: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.start_year }}"
END_YEAR: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.end_year }}"
DROP_DEST_TABLE: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.drop_dest_table }}"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.target_gcs_path }}"
SCHEMA_PATH: "{{ var.json.nhtsa_traffic_fatalities.violatn_2015_2020.schema_path }}"
INPUT_CSV_HEADERS: >-
[
"state_number",
"state_name",
"consecutive_number",
"vehicle_number",
"violations_charged",
"violations_charged_name"
]
INPUT_DTYPES: >-
{
"state_number": "str",
"state_name": "str",
"consecutive_number": "str",
"vehicle_number": "str",
"violations_charged": "str",
"violations_charged_name": "str"
}
RENAME_MAPPINGS_LIST: >-
{
"STATE": "state_number",
"STATENAME": "state_name",
"ST_CASE": "consecutive_number",
"VEH_NO": "vehicle_number",
"MVIOLATN": "violations_charged",
"MVIOLATNNAME": "violations_charged_name"
}
resources:
request_ephemeral_storage: "10G"
request_cpu: "1"



graph_paths:
- "create_cluster >> [ accident_2015_transform_csv,accident_2016_2019_transform_csv,accident_2020_transform_csv,cevent_2015_2020_transform_csv,damage_2015_2020_transform_csv,distract_2015_2020_transform_csv,drimpair_2015_2020_transform_csv,factor_2015_2020_transform_csv,maneuver_2015_2020_transform_csv,nmcrash_2015_2020_transform_csv,nmimpair_2015_2020_transform_csv,parkwork_2015_transform_csv,parkwork_2016_2017_transform_csv,parkwork_2018_transform_csv,parkwork_2019_transform_csv,parkwork_2020_transform_csv,pbtype_transform_csv,person_2015_2017_transform_csv,person_2018_transform_csv,person_2019_transform_csv,person_2020_transform_csv,safetyeq_2015_2016_transform_csv,safetyeq_2017_2020_transform_csv,vehicle_2015_transform_csv,vehicle_2016_2017_transform_csv,vehicle_2018_2019_transform_csv,vehicle_2020_transform_csv,vevent_2015_2020_transform_csv,vindecode_2015_transform_csv ] >> delete_cluster"
- "create_cluster >> [ accident_2015_transform_csv,accident_2016_2019_transform_csv,accident_2020_transform_csv,cevent_2015_2020_transform_csv,damage_2015_2020_transform_csv,distract_2015_2020_transform_csv,drimpair_2015_2020_transform_csv,factor_2015_2020_transform_csv,maneuver_2015_2020_transform_csv,nmcrash_2015_2020_transform_csv,nmimpair_2015_2020_transform_csv,parkwork_2015_transform_csv,parkwork_2016_2017_transform_csv,parkwork_2018_transform_csv,parkwork_2019_transform_csv,parkwork_2020_transform_csv,pbtype_transform_csv,person_2015_2017_transform_csv,person_2018_transform_csv,person_2019_transform_csv,person_2020_transform_csv,safetyeq_2015_2016_transform_csv,safetyeq_2017_2020_transform_csv,vehicle_2015_transform_csv,vehicle_2016_2017_transform_csv,vehicle_2018_2019_transform_csv,vehicle_2020_transform_csv,vevent_2015_2020_transform_csv,vindecode_2015_transform_csv,violatn_2015_2020_transform_csv ] >> delete_cluster"