From 6a7be0c0b30268e9987ec115c323d8e5eaf50cfc Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 21 Feb 2025 22:56:21 +0000 Subject: [PATCH 1/4] feat: warn if default ingress_settings is used in remote_functions --- bigframes/functions/_function_session.py | 20 ++++++++++++++----- bigframes/pandas/__init__.py | 6 +++--- bigframes/session/__init__.py | 11 +++++----- .../large/functions/test_remote_function.py | 5 +++++ 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 93b5c4c596..43dc6d95da 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -120,9 +120,9 @@ def remote_function( cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, cloud_function_memory_mib: Optional[int] = 1024, - cloud_function_ingress_settings: Literal[ - "all", "internal-only", "internal-and-gclb" - ] = "all", + cloud_function_ingress_settings: Optional[ + Literal["all", "internal-only", "internal-and-gclb"] + ] = None, ): """Decorator to turn a user defined function into a BigQuery remote function. @@ -302,8 +302,9 @@ def remote_function( https://cloud.google.com/functions/docs/configuring/memory. cloud_function_ingress_settings (str, Optional): Ingress settings controls dictating what traffic can reach the - function. By default `all` will be used. It must be one of: - `all`, `internal-only`, `internal-and-gclb`. See for more details + function. Options are: `all`, `internal-only`, or `internal-and-gclb`. + If no setting is provided, `all` will be used by default and a warning + will be issued. See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. """ # Some defaults may be used from the session if not provided otherwise @@ -400,6 +401,15 @@ def remote_function( " For more details see https://cloud.google.com/functions/docs/securing/cmek#before_you_begin" ) + if cloud_function_ingress_settings is None: + cloud_function_ingress_settings = "all" + msg = ( + "The `cloud_function_ingress_settings` are set to 'all' by default, " + "which may change. Consider using 'internal-only' for enhanced security. " + "See https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings for details." + ) + warnings.warn(msg, category=UserWarning) + bq_connection_manager = session.bqconnectionmanager def wrapper(func): diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 93c08a22aa..50a71c9b9e 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -80,9 +80,9 @@ def remote_function( cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, cloud_function_memory_mib: Optional[int] = 1024, - cloud_function_ingress_settings: Literal[ - "all", "internal-only", "internal-and-gclb" - ] = "all", + cloud_function_ingress_settings: Optional[ + Literal["all", "internal-only", "internal-and-gclb"] + ] = None, ): return global_session.with_default_session( bigframes.session.Session.remote_function, diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index b71a4447e5..600992b9eb 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1199,9 +1199,9 @@ def remote_function( cloud_function_max_instances: Optional[int] = None, cloud_function_vpc_connector: Optional[str] = None, cloud_function_memory_mib: Optional[int] = 1024, - cloud_function_ingress_settings: Literal[ - "all", "internal-only", "internal-and-gclb" - ] = "all", + cloud_function_ingress_settings: Optional[ + Literal["all", "internal-only", "internal-and-gclb"] + ] = None, ): """Decorator to turn a user defined function into a BigQuery remote function. Check out the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes. @@ -1365,8 +1365,9 @@ def remote_function( https://cloud.google.com/functions/docs/configuring/memory. cloud_function_ingress_settings (str, Optional): Ingress settings controls dictating what traffic can reach the - function. By default `all` will be used. It must be one of: - `all`, `internal-only`, `internal-and-gclb`. See for more details + function. Options are: `all`, `internal-only`, or `internal-and-gclb`. + If no setting is provided, `all` will be used by default and a warning + will be issued. See for more details https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings. Returns: collections.abc.Callable: diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 7363e370bb..32c146eed2 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -2358,6 +2358,11 @@ def generate_stats(row: pandas.Series) -> list[int]: pytest.param( {}, functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, id="no-set" ), + pytest.param( + {"cloud_function_ingress_settings": None}, + functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, + id="set-none", + ), pytest.param( {"cloud_function_ingress_settings": "all"}, functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, From 475269ab4ccd1e72f1883822f11208983d4c6314 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 27 Feb 2025 11:31:59 -0800 Subject: [PATCH 2/4] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Tim Sweña (Swast) --- bigframes/functions/_function_session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 43dc6d95da..0243ad2ad7 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -408,7 +408,7 @@ def remote_function( "which may change. Consider using 'internal-only' for enhanced security. " "See https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings for details." ) - warnings.warn(msg, category=UserWarning) + warnings.warn(msg, category=FutureWarning, stacklevel=2) bq_connection_manager = session.bqconnectionmanager From 8c762af702649a831b723fa11d5528749da0323f Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 27 Feb 2025 22:59:36 +0000 Subject: [PATCH 3/4] add tests to verify warning --- .../large/functions/test_remote_function.py | 36 ++++++++++++++----- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 32c146eed2..c42c47b1ae 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -21,6 +21,7 @@ import sys import tempfile import textwrap +import warnings import google.api_core.exceptions from google.cloud import bigquery, functions_v2, storage @@ -2353,45 +2354,64 @@ def generate_stats(row: pandas.Series) -> list[int]: @pytest.mark.parametrize( - ("ingress_settings_args", "effective_ingress_settings"), + ("ingress_settings_args", "effective_ingress_settings", "expected_warning"), [ pytest.param( - {}, functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, id="no-set" + {}, + functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, + FutureWarning, + id="no-set", ), pytest.param( {"cloud_function_ingress_settings": None}, functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, + FutureWarning, id="set-none", ), pytest.param( {"cloud_function_ingress_settings": "all"}, functions_v2.ServiceConfig.IngressSettings.ALLOW_ALL, + None, id="set-all", ), pytest.param( {"cloud_function_ingress_settings": "internal-only"}, functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_ONLY, + None, id="set-internal-only", ), pytest.param( {"cloud_function_ingress_settings": "internal-and-gclb"}, functions_v2.ServiceConfig.IngressSettings.ALLOW_INTERNAL_AND_GCLB, + None, id="set-internal-and-gclb", ), ], ) @pytest.mark.flaky(retries=2, delay=120) def test_remote_function_ingress_settings( - session, scalars_dfs, ingress_settings_args, effective_ingress_settings + session, + scalars_dfs, + ingress_settings_args, + effective_ingress_settings, + expected_warning, ): try: + # Verify the function raises the expected security warning message. + with warnings.catch_warnings(record=True) as w: - def square(x: int) -> int: - return x * x + def square(x: int) -> int: + return x * x - square_remote = session.remote_function(reuse=False, **ingress_settings_args)( - square - ) + square_remote = session.remote_function( + reuse=False, **ingress_settings_args + )(square) + + if expected_warning is not None: + assert issubclass(w[0].category, FutureWarning) + assert "Consider using 'internal-only' for enhanced security." in str( + w[0].message + ) # Assert that the GCF is created with the intended maximum timeout gcf = session.cloudfunctionsclient.get_function( From 400e0590aba10451d5ec731bb763c3a560b15e18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 28 Feb 2025 12:11:58 -0600 Subject: [PATCH 4/4] Update bigframes/functions/_function_session.py Co-authored-by: Shobhit Singh --- bigframes/functions/_function_session.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 0243ad2ad7..d8b5bf0f32 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -405,7 +405,8 @@ def remote_function( cloud_function_ingress_settings = "all" msg = ( "The `cloud_function_ingress_settings` are set to 'all' by default, " - "which may change. Consider using 'internal-only' for enhanced security. " + "which will change to 'internal-only' for enhanced security in future version 2.0 onwards. " + "However, you will be able to explicitly pass cloud_function_ingress_settings='all' if you need. " "See https://cloud.google.com/functions/docs/networking/network-settings#ingress_settings for details." ) warnings.warn(msg, category=FutureWarning, stacklevel=2)