[go: up one dir, main page]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Onboard COVID-19 Genome Sequence dataset #460

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Prev Previous commit
Next Next commit
update : vevent pipeline
  • Loading branch information
aurogoogle committed Aug 18, 2022
commit ce28547f37e0fc3419120a856742cbba23b10dff
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
[
{
"name": "state_number",
"type": "integer",
"description": "This data element identifies the state in which the crash occurred. The codes are from the General Services Administration’s (GSA) publication of worldwide Geographic Location Codes (GLC). For more info on the codes, please look at <C1/V1/D1/PC1/P1/NM1 State Number> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "state_name",
"type": "string",
"description": "This data element identifies the state in which the crash occurred. The codes are from the General Services Administration’s (GSA) publication of worldwide Geographic Location Codes (GLC).",
"mode": "NULLABLE"
},
{
"name": "consecutive_number",
"type": "integer",
"description": "This data element is the unique case number assigned to each crash. It appears on each data file and is used to merge information from the data files together. xxxxxx Two Characters for State Code followed by Four Characters for Case Number",
"mode": "NULLABLE"
},
{
"name": "event_number",
"type": "integer",
"description": "This data element is the consecutive number assigned to each harmful and nonharmful event in a crash, in chronological order. 001-999 Event Number",
"mode": "NULLABLE"
},
{
"name": "vehicle_number",
"type": "integer",
"description": "This data element is the consecutive number assigned to each vehicle in the case. This data element appears on each vehicle level data file and is used in conjunction with the ST_CASE data element to merge information from vehicle level data files. 000-999 Assigned Number of Motor Vehicle",
"mode": "NULLABLE"
},
{
"name": "vehicle_event_number",
"type": "string",
"description": "This data element is the consecutive number assigned to each harmful and nonharmful event for this vehicle, in chronological order. 001-999 Vehicle Event Number",
"mode": "NULLABLE"
},
{
"name": "vehicle_number_this_vehicle",
"type": "integer",
"description": "This data element identifies the “Vehicle Number” (VEH_NO) of this in-transport motor vehicle described in this event. 1-999 Vehicle Number",
"mode": "NULLABLE"
},
{
"name": "area_of_impact_this_vehicle",
"type": "integer",
"description": "This data element identifies the impact point, if any, on this in-transport motor vehicle that produced property damage or personal injury in this event. For more info on the codes, please look at <C18 Area of Impact (This Vehicle)> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "area_of_impact_this_vehicle_name",
"type": "string",
"description": "This data element identifies the impact point, if any, on this in-transport motor vehicle that produced property damage or personal injury in this event. For more info on the codes, please look at <C18 Area of Impact (This Vehicle)> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "sequence_of_events",
"type": "integer",
"description": "This data element describes this event. A motor vehicle traffic crash is a series of events resulting from an unstabilized situation. This series of harmful and non-harmful events is recorded in chronological order based on the PAR narrative and diagram. For more info on the codes, please look at <V31 Sequence of Events> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "sequence_of_events_name",
"type": "string",
"description": "This data element describes this event. A motor vehicle traffic crash is a series of events resulting from an unstabilized situation. This series of harmful and non-harmful events is recorded in chronological order based on the PAR narrative and diagram. For more info on the codes, please look at <V31 Sequence of Events> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "vehicle_number_other_vehicle",
"type": "integer",
"description": "This data element identifies the “Vehicle Number” (VEH_NO) of the other motor vehicle, if any, in this event. 1-999 Vehicle Number 5555 Non-Harmful Event 9999 Not a Motor Vehicle",
"mode": "NULLABLE"
},
{
"name": "vehicle_number_other_vehicle_name",
"type": "string",
"description": "This data element identifies the “Vehicle Number” (VEH_NO) of the other motor vehicle, if any, in this event. 1-999 Vehicle Number 5555 Non-Harmful Event 9999 Not a Motor Vehicle",
"mode": "NULLABLE"
},
{
"name": "area_of_impact_other_vehicle",
"type": "integer",
"description": "This data element identifies the impact point on the other motor vehicle, if any, in this event. For more info on the codes, please look at <C18 Area of Impact (Other Vehicle)> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
},
{
"name": "area_of_impact_other_vehicle_name",
"type": "string",
"description": "This data element identifies the impact point on the other motor vehicle, if any, in this event. For more info on the codes, please look at <C18 Area of Impact (Other Vehicle)> section in the pdf: https://crashstats.nhtsa.dot.gov/Api/Public/ViewPublication/812315",
"mode": "NULLABLE"
}

]
Original file line number Diff line number Diff line change
Expand Up @@ -4223,7 +4223,6 @@ dag:
request_ephemeral_storage: "10G"
request_cpu: "1"


- operator: "KubernetesPodOperator"
description: "Run CSV transform within kubernetes pod for person pipelines"
args:
Expand Down Expand Up @@ -6390,7 +6389,6 @@ dag:
request_ephemeral_storage: "10G"
request_cpu: "1"


- operator: "KubernetesPodOperator"
description: "Run CSV transform within kubernetes pod for vehicle pipelines"
args:
Expand Down Expand Up @@ -7663,7 +7661,91 @@ dag:
request_ephemeral_storage: "10G"
request_cpu: "1"

- operator: "KubernetesPodOperator"
description: "Run CSV transform within kubernetes pod for vevent pipelines"
args:
task_id: "vevent_2015_2020_transform_csv"
startup_timeout_seconds: 600
name: "vevent"
namespace: "composer"
service_account_name: "datasets"
image_pull_policy: "Always"
image: "{{ var.json.nhtsa_traffic_fatalities.container_registry.run_csv_transform_kub }}"
env_vars:
PIPELINE_NAME: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.pipeline_name }}"
SOURCE_URL: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.source_url }}"
CHUNKSIZE: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.chunksize }}"
SOURCE_ZIPFILE_EXTRACTED: "vevent_2015_2020.csv"
SOURCE_FILE: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.source_file }}"
# TARGET_FILE: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.target_file }}"
PROJECT_ID: "{{ var.value.gcp_project }}"
DATASET_ID: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.dataset_id }}"
TABLE_ID: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.destination_table }}"
START_YEAR: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.start_year }}"
END_YEAR: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.end_year }}"
DROP_DEST_TABLE: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.drop_dest_table }}"
TARGET_GCS_BUCKET: "{{ var.value.composer_bucket }}"
TARGET_GCS_PATH: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.target_gcs_path }}"
SCHEMA_PATH: "{{ var.json.nhtsa_traffic_fatalities.vevent_2015_2020.schema_path }}"
INPUT_CSV_HEADERS: >-
[
"state_number",
"state_name",
"consecutive_number",
"event_number",
"vehicle_number",
"vehicle_event_number",
"vehicle_number_this_vehicle",
"area_of_impact_this_vehicle",
"area_of_impact_this_vehicle_name",
"sequence_of_events",
"sequence_of_events_name",
"vehicle_number_other_vehicle",
"vehicle_number_other_vehicle_name",
"area_of_impact_other_vehicle",
"area_of_impact_other_vehicle_name"
]
INPUT_DTYPES: >-
{
"state_number": "str",
"state_name": "str",
"consecutive_number": "str",
"event_number": "str",
"vehicle_number": "str",
"vehicle_event_number": "str",
"vehicle_number_this_vehicle": "str",
"area_of_impact_this_vehicle": "str",
"area_of_impact_this_vehicle_name": "str",
"sequence_of_events": "str",
"sequence_of_events_name": "str",
"vehicle_number_other_vehicle": "str",
"vehicle_number_other_vehicle_name": "str",
"area_of_impact_other_vehicle": "str",
"area_of_impact_other_vehicle_name": "str"
}
RENAME_MAPPINGS_LIST: >-
{
"STATE": "state_number",
"STATENAME": "state_name",
"ST_CASE": "consecutive_number",
"EVENTNUM": "event_number",
"VEH_NO": "vehicle_number",
"VEVENTNUM": "vehicle_event_number",
"VNUMBER1": "vehicle_number_this_vehicle",
"AOI1": "area_of_impact_this_vehicle",
"AOI1NAME": "area_of_impact_this_vehicle_name",
"SOE": "sequence_of_events",
"SOENAME": "sequence_of_events_name",
"VNUMBER2": "vehicle_number_other_vehicle",
"VNUMBER2NAME": "vehicle_number_other_vehicle_name",
"AOI2": "area_of_impact_other_vehicle",
"AOI2NAME": "area_of_impact_other_vehicle_name"
}
resources:
request_ephemeral_storage: "10G"
request_cpu: "1"



graph_paths:
- "create_cluster >> [ accident_2015_transform_csv,accident_2016_2019_transform_csv,accident_2020_transform_csv,cevent_2015_2020_transform_csv,damage_2015_2020_transform_csv,distract_2015_2020_transform_csv,drimpair_2015_2020_transform_csv,factor_2015_2020_transform_csv,maneuver_2015_2020_transform_csv,nmcrash_2015_2020_transform_csv,nmimpair_2015_2020_transform_csv,parkwork_2015_transform_csv,parkwork_2016_2017_transform_csv,parkwork_2018_transform_csv,parkwork_2019_transform_csv,parkwork_2020_transform_csv,pbtype_transform_csv,person_2015_2017_transform_csv,person_2018_transform_csv,person_2019_transform_csv,person_2020_transform_csv,safetyeq_2015_2016_transform_csv,safetyeq_2017_2020_transform_csv,vehicle_2015_transform_csv,vehicle_2016_2017_transform_csv,vehicle_2018_2019_transform_csv,vehicle_2020_transform_csv ] >> delete_cluster"
- "create_cluster >> [ accident_2015_transform_csv,accident_2016_2019_transform_csv,accident_2020_transform_csv,cevent_2015_2020_transform_csv,damage_2015_2020_transform_csv,distract_2015_2020_transform_csv,drimpair_2015_2020_transform_csv,factor_2015_2020_transform_csv,maneuver_2015_2020_transform_csv,nmcrash_2015_2020_transform_csv,nmimpair_2015_2020_transform_csv,parkwork_2015_transform_csv,parkwork_2016_2017_transform_csv,parkwork_2018_transform_csv,parkwork_2019_transform_csv,parkwork_2020_transform_csv,pbtype_transform_csv,person_2015_2017_transform_csv,person_2018_transform_csv,person_2019_transform_csv,person_2020_transform_csv,safetyeq_2015_2016_transform_csv,safetyeq_2017_2020_transform_csv,vehicle_2015_transform_csv,vehicle_2016_2017_transform_csv,vehicle_2018_2019_transform_csv,vehicle_2020_transform_csv,vevent_2015_2020_transform_csv ] >> delete_cluster"