Skip to content

Commit ef3aaa9

Browse files
committed
Add SQLAlchemy ORM model for DimOrganization
1 parent 7130f7c commit ef3aaa9

File tree

5 files changed

+91
-177
lines changed

5 files changed

+91
-177
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from sqlalchemy import Boolean, Column, DateTime, Integer, String
2+
from sqlalchemy.orm import declarative_base
3+
4+
Base = declarative_base()
5+
6+
7+
class DimOrganization(Base):
8+
__tablename__ = "dim_organizations"
9+
10+
key = Column(String, primary_key=True)
11+
source_record_id = Column(String)
12+
name = Column(String)
13+
organization_type = Column(String)
14+
roles = Column(String)
15+
itp_id = Column(Integer)
16+
details = Column(String)
17+
website = Column(String)
18+
reporting_category = Column(String)
19+
hubspot_company_record_id = Column(String)
20+
gtfs_static_status = Column(String)
21+
gtfs_realtime_status = Column(String)
22+
_deprecated__assessment_status = Column(Boolean)
23+
manual_check__contact_on_website = Column(String)
24+
alias = Column(String)
25+
is_public_entity = Column(Boolean)
26+
ntd_id = Column(String)
27+
ntd_agency_info_key = Column(String)
28+
ntd_id_2022 = Column(String)
29+
rtpa_key = Column(String)
30+
rtpa_name = Column(String)
31+
mpo_key = Column(String)
32+
mpo_name = Column(String)
33+
public_currently_operating = Column(Boolean)
34+
public_currently_operating_fixed_route = Column(Boolean)
35+
_is_current = Column(Boolean)
36+
_valid_from = Column(DateTime)
37+
_valid_to = Column(DateTime)

_shared_utils/shared_utils/schedule_rt_utils.py

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,18 @@
88
import dask_geopandas as dg
99
import geopandas as gpd
1010
import pandas as pd
11-
from calitp_data_analysis.sql import query_sql
1211
from shared_utils.models.bridge_organizations_x_headquarters_county_geography import (
1312
BridgeOrganizationsXHeadquartersCountyGeography,
1413
)
1514
from shared_utils.models.dim_county_geography import DimCountyGeography
1615
from shared_utils.models.dim_gtfs_dataset import DimGtfsDataset
16+
from shared_utils.models.dim_organization import DimOrganization
1717
from shared_utils.models.dim_provider_gtfs_data import DimProviderGtfsData
1818
from shared_utils.models.fct_daily_feed_scheduled_service_summary import (
1919
FctDailyFeedScheduledServiceSummary,
2020
)
21-
from sqlalchemy import DateTime, String, create_engine, select
22-
from sqlalchemy.orm import InstrumentedAttribute, Session
21+
from sqlalchemy import String, create_engine, select
22+
from sqlalchemy.orm import Session
2323
from sqlalchemy.sql.expression import and_, cast, func
2424

2525
PACIFIC_TIMEZONE = "US/Pacific"
@@ -42,27 +42,6 @@ def _get_engine(max_bytes=None, project="cal-itp-data-infra", dataset=None):
4242
)
4343

4444

45-
def _query_sql_with_params(query_template: str, search_criteria: dict, as_df: bool) -> pd.DataFrame:
46-
# TODO: update query_sql to accept parameterized queries and use that instead
47-
search_conditions = ""
48-
search_params = {}
49-
50-
for k, v in (search_criteria or {}).items():
51-
search_conditions = f" AND {k} IN UNNEST(%({k}_values)s)"
52-
search_params[f"{k}_values"] = v
53-
54-
query = query_template.format(search_conditions=search_conditions)
55-
db_engine = _get_engine()
56-
57-
with db_engine.connect() as connection:
58-
if as_df:
59-
result = pd.read_sql(query, connection, params=search_params)
60-
else:
61-
result = connection.execute(query, params=search_params)
62-
63-
return result
64-
65-
6645
def localize_timestamp_col(df: dd.DataFrame, timestamp_col: Union[str, list]) -> dd.DataFrame:
6746
"""
6847
RT vehicle timestamps are given in UTC.
@@ -291,18 +270,29 @@ def filter_dim_organizations(
291270
"""
292271
project = kwargs.get("project", "cal-itp-data-infra")
293272
dataset = kwargs.get("dataset", "mart_transit_database")
294-
columns = []
273+
274+
db_engine = _get_engine(project=project, dataset=dataset)
275+
session = Session(db_engine)
276+
277+
dim_organization_columns = []
295278

296279
for column in keep_cols:
297280
if column == "source_record_id":
298-
columns.append("source_record_id AS organization_source_record_id")
281+
dim_organization_columns.append(DimOrganization.source_record_id.label("organization_source_record_id"))
299282
else:
300-
columns.append(column)
283+
dim_organization_columns.append(getattr(DimOrganization, column))
284+
285+
search_conditions = [DimOrganization._is_current == True]
286+
287+
for k, v in (custom_filtering or {}).items():
288+
search_conditions.append(getattr(DimOrganization, k).in_(v))
301289

302-
query_base = f"SELECT {','.join(columns)} FROM {project}.{dataset}.dim_organizations WHERE _is_current = true"
303-
query_template = query_base + "{search_conditions}"
290+
statement = select(*dim_organization_columns).where(and_(*search_conditions))
304291

305-
return _query_sql_with_params(query_template=query_template, search_criteria=custom_filtering, as_df=get_df)
292+
if get_df:
293+
return pd.read_sql(statement, session.bind)
294+
else:
295+
return session.scalars(statement)
306296

307297

308298
def sample_gtfs_dataset_key_to_organization_crosswalk(

_shared_utils/tests/shared_utils/cassettes/test_schedule_rt_utils/TestScheduleRtUtils.test_filter_dim_organizations.yaml

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,11 @@
11
interactions:
22
- request:
3-
body: null
4-
headers:
5-
Accept:
6-
- '*/*'
7-
Accept-Encoding:
8-
- gzip
9-
Authorization:
10-
- FILTERED
11-
Connection:
12-
- keep-alive
13-
User-Agent:
14-
- sqlalchemy/1.4.46 gl-python/3.11.13 grpc/1.76.0 gax/2.27.0 gapic/3.38.0 gccl/3.38.0
15-
X-Goog-API-Client:
16-
- sqlalchemy/1.4.46 gl-python/3.11.13 grpc/1.76.0 gax/2.27.0 gapic/3.38.0 gccl/3.38.0
17-
x-goog-user-project:
18-
- cal-itp-data-infra-staging
19-
method: GET
20-
uri: https://bigquery.googleapis.com/bigquery/v2/projects/SELECT%20source_record_id%20AS%20organization_source_record_id%20FROM%20cal-itp-data-infra-staging/datasets/test_shared_utils/tables/dim_organizations%20WHERE%20_is_current%20=%20true?prettyPrint=false
21-
response:
22-
body:
23-
string: '{"error":{"code":400,"message":"Invalid resource name projects/SELECT
24-
source_record_id AS organization_source_record_id FROM cal-itp-data-infra-staging;
25-
Project id: SELECT source_record_id AS organization_source_record_id FROM
26-
cal-itp-data-infra-staging","errors":[{"message":"Invalid resource name projects/SELECT
27-
source_record_id AS organization_source_record_id FROM cal-itp-data-infra-staging;
28-
Project id: SELECT source_record_id AS organization_source_record_id FROM
29-
cal-itp-data-infra-staging","domain":"global","reason":"badRequest"}],"status":"INVALID_ARGUMENT"}}'
30-
headers:
31-
Alt-Svc:
32-
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
33-
Content-Type:
34-
- application/json; charset=UTF-8
35-
Date:
36-
- Fri, 31 Oct 2025 20:39:31 GMT
37-
Transfer-Encoding:
38-
- chunked
39-
status:
40-
code: 400
41-
message: Bad Request
42-
- request:
43-
body: '{"maximumBytesBilled": "5000000000", "queryParameters": [], "useLegacySql":
44-
false, "formatOptions": {"useInt64Timestamp": true}, "location": "us-west2",
45-
"query": "SELECT source_record_id AS organization_source_record_id FROM cal-itp-data-infra-staging.test_shared_utils.dim_organizations
46-
WHERE _is_current = true", "maxResults": 5000, "requestId": "4a09c2b2-7eaa-4e1a-8be1-9af72e489683"}'
3+
body: '{"maximumBytesBilled": "5000000000", "defaultDataset": {"projectId": "cal-itp-data-infra-staging",
4+
"datasetId": "test_shared_utils"}, "queryParameters": [], "useLegacySql": false,
5+
"formatOptions": {"useInt64Timestamp": true}, "location": "us-west2", "query":
6+
"SELECT `dim_organizations`.`source_record_id` AS `organization_source_record_id`
7+
\nFROM `dim_organizations` \nWHERE `dim_organizations`.`_is_current` = true",
8+
"maxResults": 5000, "requestId": "fdea5969-f3fd-4e48-a6c2-efc4ae7141a3"}'
479
headers:
4810
Accept:
4911
- '*/*'
@@ -54,7 +16,7 @@ interactions:
5416
Connection:
5517
- keep-alive
5618
Content-Length:
57-
- '387'
19+
- '491'
5820
Content-Type:
5921
- application/json
6022
User-Agent:
@@ -64,10 +26,10 @@ interactions:
6426
x-goog-user-project:
6527
- cal-itp-data-infra-staging
6628
method: POST
67-
uri: https://bigquery.googleapis.com/bigquery/v2/projects/cal-itp-data-infra/queries?prettyPrint=false
29+
uri: https://bigquery.googleapis.com/bigquery/v2/projects/cal-itp-data-infra-staging/queries?prettyPrint=false
6830
response:
6931
body:
70-
string: '{"kind":"bigquery#queryResponse","schema":{"fields":[{"name":"organization_source_record_id","type":"STRING","mode":"NULLABLE"}]},"jobReference":{"projectId":"cal-itp-data-infra","jobId":"job_-HGFXaLlE0Zi_w9ZoDGF5c2eQumk","location":"us-west2"},"totalRows":"3","rows":[{"f":[{"v":"reckGS8egMZryjbX7"}]},{"f":[{"v":"recyqZ1zbZMkeA7Vf"}]},{"f":[{"v":"recOT4QO6t6mRhUEu"}]}],"totalBytesProcessed":"80","jobComplete":true,"cacheHit":false,"queryId":"job_-HGFXaLlE0Zi_w9ZoDGF5c2eQumk","jobCreationReason":{"code":"REQUESTED"},"totalBytesBilled":"10485760","totalSlotMs":"28","location":"us-west2","creationTime":"1761777933940","startTime":"1761777934027","endTime":"1761777934175"}'
32+
string: '{"kind":"bigquery#queryResponse","schema":{"fields":[{"name":"organization_source_record_id","type":"STRING","mode":"NULLABLE"}]},"jobReference":{"projectId":"cal-itp-data-infra-staging","jobId":"job_RI7jjD6r5zHgRoMbU9az15QDP9Aw","location":"us-west2"},"totalRows":"3","rows":[{"f":[{"v":"reckGS8egMZryjbX7"}]},{"f":[{"v":"recyqZ1zbZMkeA7Vf"}]},{"f":[{"v":"recOT4QO6t6mRhUEu"}]}],"totalBytesProcessed":"60","jobComplete":true,"cacheHit":false,"queryId":"job_RI7jjD6r5zHgRoMbU9az15QDP9Aw","jobCreationReason":{"code":"REQUESTED"},"totalBytesBilled":"10485760","totalSlotMs":"19","location":"us-west2","creationTime":"1762221451925","startTime":"1762221451976","endTime":"1762221452093"}'
7133
headers:
7234
Alt-Svc:
7335
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
@@ -76,7 +38,7 @@ interactions:
7638
Content-Type:
7739
- application/json; charset=UTF-8
7840
Date:
79-
- Fri, 31 Oct 2025 20:39:32 GMT
41+
- Tue, 04 Nov 2025 01:57:32 GMT
8042
Server:
8143
- ESF
8244
Transfer-Encoding:

_shared_utils/tests/shared_utils/cassettes/test_schedule_rt_utils/TestScheduleRtUtils.test_filter_dim_organizations_additional_keep_cols.yaml

Lines changed: 11 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,11 @@
11
interactions:
22
- request:
3-
body: null
4-
headers:
5-
Accept:
6-
- '*/*'
7-
Accept-Encoding:
8-
- gzip
9-
Authorization:
10-
- FILTERED
11-
Connection:
12-
- keep-alive
13-
User-Agent:
14-
- sqlalchemy/1.4.46 gl-python/3.11.13 grpc/1.76.0 gax/2.27.0 gapic/3.38.0 gccl/3.38.0
15-
X-Goog-API-Client:
16-
- sqlalchemy/1.4.46 gl-python/3.11.13 grpc/1.76.0 gax/2.27.0 gapic/3.38.0 gccl/3.38.0
17-
x-goog-user-project:
18-
- cal-itp-data-infra-staging
19-
method: GET
20-
uri: https://bigquery.googleapis.com/bigquery/v2/projects/SELECT%20key,name,organization_type%20FROM%20cal-itp-data-infra-staging/datasets/test_shared_utils/tables/dim_organizations%20WHERE%20_is_current%20=%20true?prettyPrint=false
21-
response:
22-
body:
23-
string: '{"error":{"code":400,"message":"Invalid resource name projects/SELECT
24-
key,name,organization_type FROM cal-itp-data-infra-staging; Project id: SELECT
25-
key,name,organization_type FROM cal-itp-data-infra-staging","errors":[{"message":"Invalid
26-
resource name projects/SELECT key,name,organization_type FROM cal-itp-data-infra-staging;
27-
Project id: SELECT key,name,organization_type FROM cal-itp-data-infra-staging","domain":"global","reason":"badRequest"}],"status":"INVALID_ARGUMENT"}}'
28-
headers:
29-
Alt-Svc:
30-
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
31-
Content-Type:
32-
- application/json; charset=UTF-8
33-
Date:
34-
- Fri, 31 Oct 2025 20:39:32 GMT
35-
Transfer-Encoding:
36-
- chunked
37-
status:
38-
code: 400
39-
message: Bad Request
40-
- request:
41-
body: '{"maximumBytesBilled": "5000000000", "queryParameters": [], "useLegacySql":
42-
false, "formatOptions": {"useInt64Timestamp": true}, "location": "us-west2",
43-
"query": "SELECT key,name,organization_type FROM cal-itp-data-infra-staging.test_shared_utils.dim_organizations
44-
WHERE _is_current = true", "maxResults": 5000, "requestId": "88df987d-003b-48f3-ac6c-5c6cf621aa6c"}'
3+
body: '{"maximumBytesBilled": "5000000000", "defaultDataset": {"projectId": "cal-itp-data-infra-staging",
4+
"datasetId": "test_shared_utils"}, "queryParameters": [], "useLegacySql": false,
5+
"formatOptions": {"useInt64Timestamp": true}, "location": "us-west2", "query":
6+
"SELECT `dim_organizations`.`key`, `dim_organizations`.`name`, `dim_organizations`.`organization_type`
7+
\nFROM `dim_organizations` \nWHERE `dim_organizations`.`_is_current` = true",
8+
"maxResults": 5000, "requestId": "8a36dcec-7885-458c-9d74-694ae378ae54"}'
459
headers:
4610
Accept:
4711
- '*/*'
@@ -52,7 +16,7 @@ interactions:
5216
Connection:
5317
- keep-alive
5418
Content-Length:
55-
- '364'
19+
- '512'
5620
Content-Type:
5721
- application/json
5822
User-Agent:
@@ -62,13 +26,13 @@ interactions:
6226
x-goog-user-project:
6327
- cal-itp-data-infra-staging
6428
method: POST
65-
uri: https://bigquery.googleapis.com/bigquery/v2/projects/cal-itp-data-infra/queries?prettyPrint=false
29+
uri: https://bigquery.googleapis.com/bigquery/v2/projects/cal-itp-data-infra-staging/queries?prettyPrint=false
6630
response:
6731
body:
68-
string: '{"kind":"bigquery#queryResponse","schema":{"fields":[{"name":"key","type":"STRING","mode":"NULLABLE"},{"name":"name","type":"STRING","mode":"NULLABLE"},{"name":"organization_type","type":"STRING","mode":"NULLABLE"}]},"jobReference":{"projectId":"cal-itp-data-infra","jobId":"job_Y_3jttHfxwl1ibRDbn80MBehaNad","location":"us-west2"},"totalRows":"3","rows":[{"f":[{"v":"35448956533b3ff4f8c9cf4e7886c974"},{"v":"City
32+
string: '{"kind":"bigquery#queryResponse","schema":{"fields":[{"name":"key","type":"STRING","mode":"NULLABLE"},{"name":"name","type":"STRING","mode":"NULLABLE"},{"name":"organization_type","type":"STRING","mode":"NULLABLE"}]},"jobReference":{"projectId":"cal-itp-data-infra-staging","jobId":"job__yhodyOpJAjm7sfj52CyteNx3vCz","location":"us-west2"},"totalRows":"3","rows":[{"f":[{"v":"35448956533b3ff4f8c9cf4e7886c974"},{"v":"City
6933
of Mission Viejo"},{"v":"City/Town"}]},{"f":[{"v":"02a0e06b1ddb80e5695fc82fcc0c3ccc"},{"v":"City
7034
of Patterson"},{"v":"City/Town"}]},{"f":[{"v":"4cb90bb76f9cd9472a2df6dd9014b4fa"},{"v":"City
71-
of Chula Vista"},{"v":"City/Town"}]}],"totalBytesProcessed":"270","jobComplete":true,"cacheHit":false,"queryId":"job_Y_3jttHfxwl1ibRDbn80MBehaNad","jobCreationReason":{"code":"REQUESTED"},"totalBytesBilled":"10485760","totalSlotMs":"20","location":"us-west2","creationTime":"1761777934983","startTime":"1761777935236","endTime":"1761777935371"}'
35+
of Chula Vista"},{"v":"City/Town"}]}],"totalBytesProcessed":"201","jobComplete":true,"cacheHit":false,"queryId":"job__yhodyOpJAjm7sfj52CyteNx3vCz","jobCreationReason":{"code":"REQUESTED"},"totalBytesBilled":"10485760","totalSlotMs":"16","location":"us-west2","creationTime":"1762221452731","startTime":"1762221452806","endTime":"1762221452921"}'
7236
headers:
7337
Alt-Svc:
7438
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
@@ -77,7 +41,7 @@ interactions:
7741
Content-Type:
7842
- application/json; charset=UTF-8
7943
Date:
80-
- Fri, 31 Oct 2025 20:39:33 GMT
44+
- Tue, 04 Nov 2025 01:57:32 GMT
8145
Server:
8246
- ESF
8347
Transfer-Encoding:

0 commit comments

Comments
 (0)