Skip to content

Commit 74bb539

Browse files
shollymantswast
authored andcommitted
BigQuery: switch list_partitions helper to a direct metatable read (googleapis#5273)
* simplify list_partitions from legacy query to a metatable read * add a length assertion to system test * switch from in test to equality, strip unit test * reintroduce a unit test for list_partitions, address linter responses. * address reviewer comments and linting * Fix str type in return * Clarify the return is a list of strings in the doc comment.
1 parent 8634a9b commit 74bb539

3 files changed

Lines changed: 77 additions & 70 deletions

File tree

bigquery/google/cloud/bigquery/client.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,31 @@ def insert_rows_json(self, table, json_rows, row_ids=None,
12391239

12401240
return errors
12411241

1242+
def list_partitions(self, table, retry=DEFAULT_RETRY):
1243+
"""List the partitions in a table.
1244+
1245+
Arguments:
1246+
table (Union[google.cloud.bigquery.table.Table,
1247+
google.cloud.bigquery.table.TableReference]):
1248+
The table or reference from which to get partition info
1249+
retry (google.api_core.retry.Retry):
1250+
(Optional) How to retry the RPC.
1251+
1252+
Returns:
1253+
List[str]:
1254+
A list of the partition ids present in the partitioned table
1255+
"""
1256+
meta_table = self.get_table(
1257+
TableReference(
1258+
self.dataset(table.dataset_id, project=table.project),
1259+
'%s$__PARTITIONS_SUMMARY__' % table.table_id))
1260+
1261+
subset = [col for col in
1262+
meta_table.schema if col.name == 'partition_id']
1263+
return [row[0] for row in self.list_rows(meta_table,
1264+
selected_fields=subset,
1265+
retry=retry)]
1266+
12421267
def list_rows(self, table, selected_fields=None, max_results=None,
12431268
page_token=None, start_index=None, page_size=None,
12441269
retry=DEFAULT_RETRY):
@@ -1326,29 +1351,6 @@ def list_rows(self, table, selected_fields=None, max_results=None,
13261351
extra_params=params)
13271352
return row_iterator
13281353

1329-
def list_partitions(self, table, retry=DEFAULT_RETRY):
1330-
"""List the partitions in a table.
1331-
1332-
:type table: One of:
1333-
:class:`~google.cloud.bigquery.table.Table`
1334-
:class:`~google.cloud.bigquery.table.TableReference`
1335-
:param table: the table to list, or a reference to it.
1336-
1337-
:type retry: :class:`google.api_core.retry.Retry`
1338-
:param retry: (Optional) How to retry the RPC.
1339-
1340-
:rtype: list
1341-
:returns: a list of time partitions
1342-
"""
1343-
config = job.QueryJobConfig()
1344-
config.use_legacy_sql = True # required for '$' syntax
1345-
query_job = self.query(
1346-
'SELECT partition_id from [%s:%s.%s$__PARTITIONS_SUMMARY__]' %
1347-
(table.project, table.dataset_id, table.table_id),
1348-
job_config=config,
1349-
retry=retry)
1350-
return [row[0] for row in query_job]
1351-
13521354

13531355
# pylint: disable=unused-argument
13541356
def _item_to_project(iterator, resource):

bigquery/tests/system.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,14 @@ def test_get_table_w_public_dataset(self):
280280
self.assertEqual(
281281
schema_names, ['word', 'word_count', 'corpus', 'corpus_date'])
282282

283+
def test_list_partitions(self):
284+
table_ref = DatasetReference(
285+
'bigquery-partition-samples',
286+
'samples').table('stackoverflow_comments')
287+
all_rows = Config.CLIENT.list_partitions(table_ref)
288+
self.assertIn('20150508', all_rows)
289+
self.assertEquals(2066, len(all_rows))
290+
283291
def test_list_tables(self):
284292
DATASET_ID = _make_dataset_id('list_tables')
285293
dataset = self.temp_dataset(DATASET_ID)

bigquery/tests/unit/test_client.py

Lines changed: 44 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2879,6 +2879,50 @@ def test_insert_rows_json(self):
28792879
path='/%s' % PATH,
28802880
data=SENT)
28812881

2882+
def test_list_partitions(self):
2883+
from google.cloud.bigquery.table import Table
2884+
2885+
rows = 3
2886+
meta_info = {
2887+
'tableReference':
2888+
{'projectId': self.PROJECT,
2889+
'datasetId': self.DS_ID,
2890+
'tableId': '%s$__PARTITIONS_SUMMARY__' % self.TABLE_ID},
2891+
'schema': {'fields': [
2892+
{'name': 'project_id', 'type': 'STRING', 'mode': 'NULLABLE'},
2893+
{'name': 'dataset_id', 'type': 'STRING', 'mode': 'NULLABLE'},
2894+
{'name': 'table_id', 'type': 'STRING', 'mode': 'NULLABLE'},
2895+
{'name': 'partition_id', 'type': 'STRING', 'mode': 'NULLABLE'}
2896+
]},
2897+
'etag': 'ETAG',
2898+
'numRows': rows,
2899+
}
2900+
2901+
data = {
2902+
'totalRows': str(rows),
2903+
'rows': [
2904+
{'f': [
2905+
{'v': '20180101'},
2906+
]},
2907+
{'f': [
2908+
{'v': '20180102'},
2909+
]},
2910+
{'f': [
2911+
{'v': '20180103'},
2912+
]},
2913+
]
2914+
}
2915+
creds = _make_credentials()
2916+
http = object()
2917+
client = self._make_one(project=self.PROJECT, credentials=creds,
2918+
_http=http)
2919+
client._connection = _make_connection(meta_info, data)
2920+
table = Table(self.TABLE_REF)
2921+
2922+
partition_list = client.list_partitions(table)
2923+
self.assertEqual(len(partition_list), rows)
2924+
self.assertIn('20180102', partition_list)
2925+
28822926
def test_list_rows(self):
28832927
import datetime
28842928
from google.cloud._helpers import UTC
@@ -3114,53 +3158,6 @@ def test_list_rows_errors(self):
31143158
with self.assertRaises(TypeError):
31153159
client.list_rows(1)
31163160

3117-
def test_list_partitions(self):
3118-
RESOURCE = {
3119-
'jobReference': {
3120-
'projectId': self.PROJECT,
3121-
'jobId': 'JOB_ID',
3122-
},
3123-
'configuration': {
3124-
'query': {
3125-
'query': 'q',
3126-
'destinationTable': {
3127-
'projectId': self.PROJECT,
3128-
'datasetId': 'DS_ID',
3129-
'tableId': 'TABLE_ID',
3130-
},
3131-
},
3132-
},
3133-
'status': {
3134-
'state': 'DONE',
3135-
},
3136-
}
3137-
RESULTS_RESOURCE = {
3138-
'jobReference': RESOURCE['jobReference'],
3139-
'jobComplete': True,
3140-
'schema': {
3141-
'fields': [
3142-
{'name': 'partition_id', 'type': 'INTEGER',
3143-
'mode': 'REQUIRED'},
3144-
]
3145-
},
3146-
'totalRows': '2',
3147-
'pageToken': 'next-page',
3148-
}
3149-
FIRST_PAGE = copy.deepcopy(RESULTS_RESOURCE)
3150-
FIRST_PAGE['rows'] = [
3151-
{'f': [{'v': 20160804}]},
3152-
{'f': [{'v': 20160805}]},
3153-
]
3154-
del FIRST_PAGE['pageToken']
3155-
creds = _make_credentials()
3156-
http = object()
3157-
client = self._make_one(project=self.PROJECT, credentials=creds,
3158-
_http=http)
3159-
client._connection = _make_connection(
3160-
RESOURCE, RESULTS_RESOURCE, FIRST_PAGE)
3161-
self.assertEqual(client.list_partitions(self.TABLE_REF),
3162-
[20160804, 20160805])
3163-
31643161

31653162
class Test_make_job_id(unittest.TestCase):
31663163
def _call_fut(self, job_id, prefix=None):

0 commit comments

Comments
 (0)