From dddf880cc105656d55034cd5eee2c27d6a988e00 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 26 Mar 2025 01:53:32 +0000 Subject: [PATCH 1/3] feat!: change default ingress setting for `remote_function` to internal-only --- bigframes/functions/_function_client.py | 2 +- bigframes/functions/_function_session.py | 30 ++++++------------- bigframes/pandas/__init__.py | 6 ++-- bigframes/session/__init__.py | 10 +++---- .../large/functions/test_remote_function.py | 4 +-- 5 files changed, 20 insertions(+), 32 deletions(-) diff --git a/bigframes/functions/_function_client.py b/bigframes/functions/_function_client.py index 37b435eeec..4048e42503 100644 --- a/bigframes/functions/_function_client.py +++ b/bigframes/functions/_function_client.py @@ -324,7 +324,7 @@ def create_cloud_function( is_row_processor=False, vpc_connector=None, memory_mib=1024, - ingress_settings="all", + ingress_settings="internal-only", ): """Create a cloud function from the given user defined function.""" diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 1444457c90..a18168f673 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -262,9 +262,9 @@ def remote_function( cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, cloud_function_memory_mib: Optional[int] = 1024, - cloud_function_ingress_settings: Optional[ - Literal["all", "internal-only", "internal-and-gclb"] - ] = None, + cloud_function_ingress_settings: Literal[ + "all", "internal-only", "internal-and-gclb" + ] = "internal-only", ): """Decorator to turn a user defined function into a BigQuery remote function. @@ -451,8 +451,9 @@ def remote_function( https://cloud.google.com/functions/docs/configuring/memory. cloud_function_ingress_settings (str, Optional): Ingress settings controls dictating what traffic can reach the - function. By default `all` will be used. It must be one of: - `all`, `internal-only`, `internal-and-gclb`. See for more details + function. Options are: `all`, `internal-only`, or `internal-and-gclb`. + If no setting is provided, `internal-only` will be used by default. + See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. """ # Some defaults may be used from the session if not provided otherwise. @@ -519,24 +520,11 @@ def remote_function( ) if cloud_function_ingress_settings is None: - cloud_function_ingress_settings = "all" - msg = bfe.format_message( - "The `cloud_function_ingress_settings` are set to 'all' by default, " - "which will change to 'internal-only' for enhanced security in future version 2.0 onwards. " - "However, you will be able to explicitly pass cloud_function_ingress_settings='all' if you need. " - "See https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings for details." - ) - warnings.warn(msg, category=FutureWarning, stacklevel=2) - - if cloud_function_ingress_settings is None: - cloud_function_ingress_settings = "all" + cloud_function_ingress_settings = "internal-only" msg = bfe.format_message( - "The `cloud_function_ingress_settings` are set to 'all' by default, " - "which will change to 'internal-only' for enhanced security in future version 2.0 onwards. " - "However, you will be able to explicitly pass cloud_function_ingress_settings='all' if you need. " - "See https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings for details." + "The `cloud_function_ingress_settings` is being set to 'internal-only' by default." ) - warnings.warn(msg, category=FutureWarning, stacklevel=2) + warnings.warn(msg, category=UserWarning, stacklevel=2) bq_connection_manager = session.bqconnectionmanager diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 8ea7e6c320..2de6cc991c 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -80,9 +80,9 @@ def remote_function( cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, cloud_function_memory_mib: Optional[int] = 1024, - cloud_function_ingress_settings: Optional[ - Literal["all", "internal-only", "internal-and-gclb"] - ] = None, + cloud_function_ingress_settings: Literal[ + "all", "internal-only", "internal-and-gclb" + ] = "internal-only", ): return global_session.with_default_session( bigframes.session.Session.remote_function, diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index dfee41c90b..b27ee7df93 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1217,9 +1217,9 @@ def remote_function( cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, cloud_function_memory_mib: Optional[int] = 1024, - cloud_function_ingress_settings: Optional[ - Literal["all", "internal-only", "internal-and-gclb"] - ] = None, + cloud_function_ingress_settings: Literal[ + "all", "internal-only", "internal-and-gclb" + ] = "internal-only", ): """Decorator to turn a user defined function into a BigQuery remote function. Check out the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes. @@ -1392,8 +1392,8 @@ def remote_function( cloud_function_ingress_settings (str, Optional): Ingress settings controls dictating what traffic can reach the function. Options are: `all`, `internal-only`, or `internal-and-gclb`. - If no setting is provided, `all` will be used by default and a warning - will be issued. See for more details + If no setting is provided, `internal-only` will be used by default. + See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. Returns: collections.abc.Callable: diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 1e5e7ede26..643ce3ef6c 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -2356,13 +2356,13 @@ def generate_stats(row: pandas.Series) -> list[int]: [ pytest.param( {}, - functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, + functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_ONLY, True, id="no-set", ), pytest.param( {"cloud_function_ingress_settings": None}, - functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, + functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_ONLY, True, id="set-none", ), From 19981e66c054ac99160f69f1cfb50d8e22d5e7d6 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 26 Mar 2025 07:44:59 +0000 Subject: [PATCH 2/3] fix ingress setting test --- tests/system/large/functions/test_remote_function.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 643ce3ef6c..1cf19934d2 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -2357,7 +2357,7 @@ def generate_stats(row: pandas.Series) -> list[int]: pytest.param( {}, functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_ONLY, - True, + False, id="no-set", ), pytest.param( @@ -2408,11 +2408,8 @@ def square(x: int) -> int: default_ingress_setting_warnings = [ warn for warn in record - if isinstance(warn.message, FutureWarning) - and "`cloud_function_ingress_settings` are set to 'all' by default" - in warn.message.args[0] - and "will change to 'internal-only' for enhanced security in future" - in warn.message.args[0] + if isinstance(warn.message, UserWarning) + and "The `cloud_function_ingress_settings` is being set to 'internal-only' by default." ] assert len(default_ingress_setting_warnings) == ( 1 if expect_default_ingress_setting_warning else 0 From 86f7aa10b840e4a23a950734a2551b584518f1f6 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 26 Mar 2025 19:27:43 +0000 Subject: [PATCH 3/3] use "all" ingress setting for some remote function tests --- .../large/functions/test_remote_function.py | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 1cf19934d2..2d40625e3b 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -1290,14 +1290,27 @@ def test_remote_function_via_session_custom_sa(scalars_dfs): try: + # TODO(shobs): Figure out why the default ingress setting + # (internal-only) does not work here @rf_session.remote_function( - [int], int, reuse=False, cloud_function_service_account=gcf_service_account + [int], + int, + reuse=False, + cloud_function_service_account=gcf_service_account, + cloud_function_ingress_settings="all", ) def square_num(x): if x is None: return x return x * x + # assert that the GCF is created with the intended SA + gcf = rf_session.cloudfunctionsclient.get_function( + name=square_num.bigframes_cloud_function + ) + assert gcf.service_config.service_account_email == gcf_service_account + + # assert that the function works as expected on data scalars_df, scalars_pandas_df = scalars_dfs bf_int64_col = scalars_df["int64_col"] @@ -1309,12 +1322,6 @@ def square_num(x): pd_result = pd_int64_col.to_frame().assign(result=pd_result_col) assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) - - # Assert that the GCF is created with the intended SA - gcf = rf_session.cloudfunctionsclient.get_function( - name=square_num.bigframes_cloud_function - ) - assert gcf.service_config.service_account_email == gcf_service_account finally: # clean up the gcp assets created for the remote function cleanup_function_assets( @@ -1452,10 +1459,23 @@ def square_num(x): return x return x * x + # TODO(shobs): See if the test vpc can be configured to make this flow + # work with the default ingress setting (internal-only) square_num_remote = rf_session.remote_function( - [int], int, reuse=False, cloud_function_vpc_connector=gcf_vpc_connector + [int], + int, + reuse=False, + cloud_function_vpc_connector=gcf_vpc_connector, + cloud_function_ingress_settings="all", )(square_num) + # assert that the GCF is created with the intended vpc connector + gcf = rf_session.cloudfunctionsclient.get_function( + name=square_num_remote.bigframes_cloud_function + ) + assert gcf.service_config.vpc_connector == gcf_vpc_connector + + # assert that the function works as expected on data scalars_df, scalars_pandas_df = scalars_dfs bf_int64_col = scalars_df["int64_col"] @@ -1467,12 +1487,6 @@ def square_num(x): pd_result = pd_int64_col.to_frame().assign(result=pd_result_col) assert_pandas_df_equal(bf_result, pd_result, check_dtype=False) - - # Assert that the GCF is created with the intended vpc connector - gcf = rf_session.cloudfunctionsclient.get_function( - name=square_num_remote.bigframes_cloud_function - ) - assert gcf.service_config.vpc_connector == gcf_vpc_connector finally: # clean up the gcp assets created for the remote function cleanup_function_assets(