Merge pull request #1478 from bijay27bit/E2EgcsNewChangesSink_BT

AnkitCLI · web-flow · commit 0b391d829700 · 2025-02-06T23:05:42.000+05:30
E2E GCS Sink additional test scenarios.
diff --git a/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature b/src/e2e-test/features/gcs/sink/BigQueryToGCSSink_WithMacro.feature
@@ -0,0 +1,76 @@
+@GCS_Sink
+Feature: GCS sink - Verification of GCS Sink plugin macro scenarios
+
+  @BQ_SOURCE_DATATYPE_TEST @GCS_SINK_TEST
+  Scenario:Validate successful records transfer from BigQuery to GCS sink with macro fields
+    Given Open Datafusion Project to configure pipeline
+    Then Select plugin: "BigQuery" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Open BigQuery source properties
+    Then Enter BigQuery property reference name
+    Then Enter BigQuery property projectId "projectId"
+    Then Enter BigQuery property datasetProjectId "projectId"
+    Then Override Service account details if set in environment variables
+    Then Enter BigQuery property dataset "dataset"
+    Then Enter BigQuery source property table name
+    Then Validate output schema with expectedSchema "bqSourceSchemaDatatype"
+    Then Validate "BigQuery" plugin properties
+    Then Close the BigQuery properties
+    Then Open GCS sink properties
+    Then Override Service account details if set in environment variables
+    Then Enter the GCS sink mandatory properties
+    Then Enter GCS property "projectId" as macro argument "gcsProjectId"
+    Then Enter GCS property "serviceAccountType" as macro argument "serviceAccountType"
+    Then Enter GCS property "serviceAccountFilePath" as macro argument "serviceAccount"
+    Then Enter GCS property "path" as macro argument "gcsSinkPath"
+    Then Enter GCS sink property "pathSuffix" as macro argument "gcsPathSuffix"
+    Then Enter GCS property "format" as macro argument "gcsFormat"
+    Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader"
+    Then Click on the Macro button of Property: "location" and set the value to: "gcsSinkLocation"
+    Then Click on the Macro button of Property: "contentType" and set the value to: "gcsContentType"
+    Then Click on the Macro button of Property: "outputFileNameBase" and set the value to: "OutFileNameBase"
+    Then Click on the Macro button of Property: "fileSystemProperties" and set the value to: "FileSystemPr"
+    Then Validate "GCS" plugin properties
+    Then Close the GCS properties
+    Then Connect source as "BigQuery" and sink as "GCS" to establish connection
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Enter runtime argument value "projectId" for key "gcsProjectId"
+    Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType"
+    Then Enter runtime argument value "serviceAccount" for key "serviceAccount"
+    Then Enter runtime argument value for GCS sink property path key "gcsSinkPath"
+    Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix"
+    Then Enter runtime argument value "jsonFormat" for key "gcsFormat"
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Enter runtime argument value "contentType" for key "gcsContentType"
+    Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation"
+    Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase"
+    Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr"
+    Then Run the preview of pipeline with runtime arguments
+    Then Wait till pipeline preview is in running state
+    Then Open and capture pipeline preview logs
+    Then Verify the preview run status of pipeline in the logs is "succeeded"
+    Then Close the pipeline logs
+    Then Click on preview data for GCS sink
+    Then Verify preview output schema matches the outputSchema captured in properties
+    Then Close the preview data
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Enter runtime argument value "projectId" for key "gcsProjectId"
+    Then Enter runtime argument value "serviceAccountType" for key "serviceAccountType"
+    Then Enter runtime argument value "serviceAccount" for key "serviceAccount"
+    Then Enter runtime argument value for GCS sink property path key "gcsSinkPath"
+    Then Enter runtime argument value "gcsPathDateSuffix" for key "gcsPathSuffix"
+    Then Enter runtime argument value "jsonFormat" for key "gcsFormat"
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Enter runtime argument value "contentType" for key "gcsContentType"
+    Then Enter runtime argument value "gcsSinkBucketLocation" for key "gcsSinkLocation"
+    Then Enter runtime argument value "outputFileNameBase" for key "OutFileNameBase"
+    Then Enter runtime argument value "gcsCSVFileSysProperty" for key "FileSystemPr"
+    Then Run the Pipeline in Runtime with runtime arguments
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Verify data is transferred to target GCS bucket
+    Then Validate the values of records transferred to GCS bucket is equal to the values from source BigQuery table
diff --git a/src/e2e-test/features/gcs/sink/GCSSink.feature b/src/e2e-test/features/gcs/sink/GCSSink.feature
@@ -95,7 +95,7 @@ Feature: GCS sink - Verification of GCS Sink plugin
       | parquet    | application/octet-stream  |
       | orc        | application/octet-stream  |
 
-  @GCS_SINK_TEST @BQ_SOURCE_TEST
+  @BQ_SOURCE_TEST @GCS_SINK_TEST
   Scenario Outline: To verify data is getting transferred successfully from BigQuery to GCS with combinations of contenttype
     Given Open Datafusion Project to configure pipeline
     When Source is BigQuery
@@ -265,3 +265,53 @@ Feature: GCS sink - Verification of GCS Sink plugin
     Then Open and capture logs
     Then Verify the pipeline status is "Succeeded"
     Then Verify data is transferred to target GCS bucket
+
+  @GCS_AVRO_FILE @GCS_SINK_TEST
+  Scenario Outline: To verify data transferred successfully from GCS Source to GCS Sink with datatypes and write header true
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "GCS" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Connect plugins: "GCS" and "GCS2" to establish connection
+    Then Navigate to the properties page of plugin: "GCS"
+    Then Replace input plugin property: "project" with value: "projectId"
+    Then Override Service account details if set in environment variables
+    Then Enter input plugin property: "referenceName" with value: "sourceRef"
+    Then Enter GCS source property path "gcsAvroAllDataFile"
+    Then Select GCS property format "avro"
+    Then Click on the Get Schema button
+    Then Verify the Output Schema matches the Expected Schema: "gcsAvroAllTypeDataSchema"
+    Then Validate "GCS" plugin properties
+    Then Close the Plugin Properties page
+    Then Navigate to the properties page of plugin: "GCS2"
+    Then Enter GCS property projectId and reference name
+    Then Enter GCS sink property path
+    Then Select GCS property format "<FileFormat>"
+    Then Select GCS sink property contentType "<contentType>"
+    Then Enter GCS File system properties field "gcsCSVFileSysProperty"
+    Then Click on the Macro button of Property: "writeHeader" and set the value to: "WriteHeader"
+    Then Validate "GCS" plugin properties
+    Then Close the GCS properties
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Run the preview of pipeline with runtime arguments
+    Then Wait till pipeline preview is in running state
+    Then Open and capture pipeline preview logs
+    Then Verify the preview run status of pipeline in the logs is "succeeded"
+    Then Close the pipeline logs
+    Then Close the preview
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Enter runtime argument value "writeHeader" for key "WriteHeader"
+    Then Run the Pipeline in Runtime with runtime arguments
+    Then Wait till pipeline is in running state
+    Then Open and capture logs
+    Then Verify the pipeline status is "Succeeded"
+    Then Verify data is transferred to target GCS bucket
+    Then Validate the data from GCS Source to GCS Sink with expected csv file and target data in GCS bucket
+    Examples:
+      | FileFormat | contentType |
+      | csv        | text/csv    |
+      | tsv        | text/plain  |
+      | delimited  | text/csv    |
diff --git a/src/e2e-test/features/gcs/sink/GCSSinkError.feature b/src/e2e-test/features/gcs/sink/GCSSinkError.feature
@@ -65,3 +65,39 @@ Feature: GCS sink - Verify GCS Sink plugin error scenarios
     Then Select GCS property format "csv"
     Then Click on the Validate button
     Then Verify that the Plugin Property: "format" is displaying an in-line error message: "errorMessageInvalidFormat"
+
+  @BQ_SOURCE_TEST @GCS_SINK_TEST
+  Scenario: To verify and validate the Error message in pipeline logs after deploy with invalid bucket path
+    Given Open Datafusion Project to configure pipeline
+    When Select plugin: "BigQuery" from the plugins list as: "Source"
+    When Expand Plugin group in the LHS plugins list: "Sink"
+    When Select plugin: "GCS" from the plugins list as: "Sink"
+    Then Connect source as "BigQuery" and sink as "GCS" to establish connection
+    Then Open BigQuery source properties
+    Then Enter the BigQuery source mandatory properties
+    Then Validate "BigQuery" plugin properties
+    Then Close the BigQuery properties
+    Then Open GCS sink properties
+    Then Enter GCS property projectId and reference name
+    Then Enter GCS property "path" as macro argument "gcsSinkPath"
+    Then Select GCS property format "csv"
+    Then Click on the Validate button
+    Then Close the GCS properties
+    Then Save the pipeline
+    Then Preview and run the pipeline
+    Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath"
+    Then Run the preview of pipeline with runtime arguments
+    Then Wait till pipeline preview is in running state and check if any error occurs
+    Then Open and capture pipeline preview logs
+    Then Close the pipeline logs
+    Then Close the preview
+    Then Deploy the pipeline
+    Then Run the Pipeline in Runtime
+    Then Enter runtime argument value "gcsInvalidBucketNameSink" for key "gcsSinkPath"
+    Then Run the Pipeline in Runtime with runtime arguments
+    Then Wait till pipeline is in running state
+    Then Verify the pipeline status is "Failed"
+    Then Open Pipeline logs and verify Log entries having below listed Level and Message:
+      | Level | Message                           |
+      | ERROR | errorMessageInvalidBucketNameSink |
+    Then Close the pipeline logs
diff --git a/src/e2e-test/resources/errorMessage.properties b/src/e2e-test/resources/errorMessage.properties
@@ -34,4 +34,4 @@ errorMessageMultipleFileWithoutClearDefaultSchema=Found a row with 4 fields when
 errorMessageInvalidSourcePath=Invalid bucket name in path 'abc@'. Bucket name should
 errorMessageInvalidDestPath=Invalid bucket name in path 'abc@'. Bucket name should
 errorMessageInvalidEncryptionKey=CryptoKeyName.parse: formattedString not in valid format: Parameter "abc@" must be
-
+errorMessageInvalidBucketNameSink=Unable to read or access GCS bucket.
diff --git a/src/e2e-test/resources/pluginParameters.properties b/src/e2e-test/resources/pluginParameters.properties
@@ -109,7 +109,6 @@ gcsDataTypeTest2File=testdata/GCS_DATATYPE_TEST_2.csv
 gcsReadRecursivePath=testdata/GCS_RECURSIVE_TEST
 gcsReadWildcardPath=testdata/GCS_WILDCARD_TEST,testdata/GCS_WILDCARD_TEST/test
 gcsFileSysProperty={"textinputformat.record.delimiter": "@"}
-gcsCSVFileSysProperty={"csvinputformat.record.csv": "1"}
 gcsDatatypeChange=[{"key":"createddate","value":"datetime"},{"key":"revenue","value":"double"},\
   {"key":"points","value":"decimal"},{"key":"BytesData","value":"bytes"}]
 gcsDataTypeTestFileSchema=[{"key":"id","value":"int"},{"key":"name","value":"string"},\
@@ -175,6 +174,13 @@ encryptedMetadataSuffix=.metadata
 gcsPathFieldOutputSchema={ "type": "record", "name": "text", "fields": [ \
   { "name": "EmployeeDepartment", "type": "string" }, { "name": "Employeename", "type": "string" }, \
   { "name": "Salary", "type": "int" }, { "name": "wotkhours", "type": "int" }, { "name": "pathFieldColumn", "type": "string" } ] }
+gcsInvalidBucketNameSink=ggg
+writeHeader=true
+gcsSinkBucketLocation=US
+contentType=application/octet-stream
+outputFileNameBase=part
+gcsCSVFileSysProperty={"csvinputformat.record.csv": "1"}
+jsonFormat=json
 ## GCS-PLUGIN-PROPERTIES-END
 
 ## BIGQUERY-PLUGIN-PROPERTIES-START