diff --git a/.coveragerc b/.coveragerc index 8620651a..2c6e2b9a 100644 --- a/.coveragerc +++ b/.coveragerc @@ -15,4 +15,4 @@ exclude_lines = # This is added at the module level as a safeguard for if someone # generates the code and tries to run it without pip installing. This # makes it virtually impossible to test properly. - except pkg_resources.DistributionNotFound \ No newline at end of file + except pkg_resources.DistributionNotFound diff --git a/.flake8 b/.flake8 index ed931638..29227d4c 100644 --- a/.flake8 +++ b/.flake8 @@ -26,6 +26,7 @@ exclude = *_pb2.py # Standard linting exemptions. + **/.nox/** __pycache__, .git, *.pyc, diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml new file mode 100644 index 00000000..fc281c05 --- /dev/null +++ b/.github/header-checker-lint.yml @@ -0,0 +1,15 @@ +{"allowedCopyrightHolders": ["Google LLC"], + "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "sourceFileExtensions": [ + "ts", + "js", + "java", + "sh", + "Dockerfile", + "yaml", + "py", + "html", + "txt" + ] +} \ No newline at end of file diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml new file mode 100644 index 00000000..af599353 --- /dev/null +++ b/.github/sync-repo-settings.yaml @@ -0,0 +1,13 @@ +# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings +# Rules for master branch protection +branchProtectionRules: +# Identifies the protection rule pattern. Name of the branch to be protected. +# Defaults to `master` +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' diff --git a/.gitignore b/.gitignore index b9daa52f..b4243ced 100644 --- a/.gitignore +++ b/.gitignore @@ -50,8 +50,10 @@ docs.metadata # Virtual environment env/ + +# Test logs coverage.xml -sponge_log.xml +*sponge_log.xml # System test environment variables. system_tests/local_test_setup diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 4eb2a8df..83e6a9e7 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -15,7 +15,11 @@ set -eo pipefail -cd github/python-documentai +if [[ -z "${PROJECT_ROOT:-}" ]]; then + PROJECT_ROOT="github/python-documentai" +fi + +cd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -30,16 +34,26 @@ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") # Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +python3 -m pip uninstall --yes --quiet nox-automation # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --upgrade --quiet nox +python3 -m nox --version + +# If this is a continuous build, send the test log to the FlakyBot. +# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then + cleanup() { + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + } + trap cleanup EXIT HUP +fi # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3.6 -m nox -s "${NOX_SESSION:-}" + python3 -m nox -s ${NOX_SESSION:-} else - python3.6 -m nox + python3 -m nox fi diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index fcc98d5e..c90da11c 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -30,7 +30,7 @@ env_vars: { env_vars: { key: "V2_STAGING_BUCKET" - value: "docs-staging-v2-staging" + value: "docs-staging-v2" } # It will upload the docker image after successful builds. diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg index 11181078..c1cf9b5a 100644 --- a/.kokoro/docs/docs-presubmit.cfg +++ b/.kokoro/docs/docs-presubmit.cfg @@ -15,3 +15,14 @@ env_vars: { key: "TRAMPOLINE_IMAGE_UPLOAD" value: "false" } + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-documentai/.kokoro/build.sh" +} + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "docs docfx" +} diff --git a/.kokoro/samples/python3.6/common.cfg b/.kokoro/samples/python3.6/common.cfg index f754d781..a042a54a 100644 --- a/.kokoro/samples/python3.6/common.cfg +++ b/.kokoro/samples/python3.6/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.6" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py36" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-documentai/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.6/periodic-head.cfg b/.kokoro/samples/python3.6/periodic-head.cfg new file mode 100644 index 00000000..f9cfcd33 --- /dev/null +++ b/.kokoro/samples/python3.6/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg index ac8e6e0a..10b51166 100644 --- a/.kokoro/samples/python3.7/common.cfg +++ b/.kokoro/samples/python3.7/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.7" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py37" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-documentai/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg new file mode 100644 index 00000000..f9cfcd33 --- /dev/null +++ b/.kokoro/samples/python3.7/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg index 72a772e8..9a69601d 100644 --- a/.kokoro/samples/python3.8/common.cfg +++ b/.kokoro/samples/python3.8/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.8" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py38" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-documentai/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg new file mode 100644 index 00000000..f9cfcd33 --- /dev/null +++ b/.kokoro/samples/python3.8/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh new file mode 100755 index 00000000..d04ee4fd --- /dev/null +++ b/.kokoro/test-samples-against-head.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A customized test runner for samples. +# +# For periodic builds, you can specify this file for testing against head. + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +cd github/python-documentai + +exec .kokoro/test-samples-impl.sh diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh new file mode 100755 index 00000000..cf5de74c --- /dev/null +++ b/.kokoro/test-samples-impl.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +# Exit early if samples directory doesn't exist +if [ ! -d "./samples" ]; then + echo "No tests run. `./samples` not found" + exit 0 +fi + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Debug: show build environment +env | grep KOKORO + +# Install nox +python3.6 -m pip install --upgrade --quiet nox + +# Use secrets acessor service account to get secrets +if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then + gcloud auth activate-service-account \ + --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ + --project="cloud-devrel-kokoro-resources" +fi + +# This script will create 3 files: +# - testing/test-env.sh +# - testing/service-account.json +# - testing/client-secrets.json +./scripts/decrypt-secrets.sh + +source ./testing/test-env.sh +export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json + +# For cloud-run session, we activate the service account for gcloud sdk. +gcloud auth activate-service-account \ + --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" + +export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json + +echo -e "\n******************** TESTING PROJECTS ********************" + +# Switch to 'fail at end' to allow all tests to complete before exiting. +set +e +# Use RTN to return a non-zero value if the test fails. +RTN=0 +ROOT=$(pwd) +# Find all requirements.txt in the samples directory (may break on whitespace). +for file in samples/**/requirements.txt; do + cd "$ROOT" + # Navigate to the project folder. + file=$(dirname "$file") + cd "$file" + + echo "------------------------------------------------------------" + echo "- testing $file" + echo "------------------------------------------------------------" + + # Use nox to execute the tests for the project. + python3.6 -m nox -s "$RUN_TESTS_SESSION" + EXIT=$? + + # If this is a periodic build, send the test log to the FlakyBot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + fi + + if [[ $EXIT -ne 0 ]]; then + RTN=1 + echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" + else + echo -e "\n Testing completed.\n" + fi + +done +cd "$ROOT" + +# Workaround for Kokoro permissions issue: delete secrets +rm testing/{test-env.sh,client-secrets.json,service-account.json} + +exit "$RTN" diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 557116a9..7dd0adac 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# The default test runner for samples. +# +# For periodic builds, we rewinds the repo to the latest release, and +# run test-samples-impl.sh. # `-e` enables the script to automatically fail when a command fails # `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero @@ -24,81 +28,19 @@ cd github/python-documentai # Run periodic samples tests at latest release if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + # preserving the test runner implementation. + cp .kokoro/test-samples-impl.sh "${TMPDIR}/test-samples-impl.sh" + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + echo "Now we rewind the repo back to the latest release..." LATEST_RELEASE=$(git describe --abbrev=0 --tags) git checkout $LATEST_RELEASE -fi - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Debug: show build environment -env | grep KOKORO - -# Install nox -python3.6 -m pip install --upgrade --quiet nox - -# Use secrets acessor service account to get secrets -if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then - gcloud auth activate-service-account \ - --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ - --project="cloud-devrel-kokoro-resources" -fi - -# This script will create 3 files: -# - testing/test-env.sh -# - testing/service-account.json -# - testing/client-secrets.json -./scripts/decrypt-secrets.sh - -source ./testing/test-env.sh -export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json - -# For cloud-run session, we activate the service account for gcloud sdk. -gcloud auth activate-service-account \ - --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" - -export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json - -echo -e "\n******************** TESTING PROJECTS ********************" - -# Switch to 'fail at end' to allow all tests to complete before exiting. -set +e -# Use RTN to return a non-zero value if the test fails. -RTN=0 -ROOT=$(pwd) -# Find all requirements.txt in the samples directory (may break on whitespace). -for file in samples/**/requirements.txt; do - cd "$ROOT" - # Navigate to the project folder. - file=$(dirname "$file") - cd "$file" - - echo "------------------------------------------------------------" - echo "- testing $file" - echo "------------------------------------------------------------" - - # Use nox to execute the tests for the project. - python3.6 -m nox -s "$RUN_TESTS_SESSION" - EXIT=$? - - # If this is a periodic build, send the test log to the Build Cop Bot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/buildcop. - if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/buildcop - $KOKORO_GFILE_DIR/linux_amd64/buildcop + echo "The current head is: " + echo $(git rev-parse --verify HEAD) + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + # move back the test runner implementation if there's no file. + if [ ! -f .kokoro/test-samples-impl.sh ]; then + cp "${TMPDIR}/test-samples-impl.sh" .kokoro/test-samples-impl.sh fi +fi - if [[ $EXIT -ne 0 ]]; then - RTN=1 - echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" - else - echo -e "\n Testing completed.\n" - fi - -done -cd "$ROOT" - -# Workaround for Kokoro permissions issue: delete secrets -rm testing/{test-env.sh,client-secrets.json,service-account.json} - -exit "$RTN" \ No newline at end of file +exec .kokoro/test-samples-impl.sh diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 719bcd5b..4af6cdc2 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -159,7 +159,7 @@ if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then "KOKORO_GITHUB_COMMIT" "KOKORO_GITHUB_PULL_REQUEST_NUMBER" "KOKORO_GITHUB_PULL_REQUEST_COMMIT" - # For Build Cop Bot + # For FlakyBot "KOKORO_GITHUB_COMMIT_URL" "KOKORO_GITHUB_PULL_REQUEST_URL" ) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..32302e48 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml +- repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.0 + hooks: + - id: flake8 diff --git a/.trampolinerc b/.trampolinerc index 995ee291..383b6ec8 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -24,6 +24,7 @@ required_envvars+=( pass_down_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" + "NOX_SESSION" ) # Prevent unintentional override on the default image. diff --git a/CHANGELOG.md b/CHANGELOG.md index 839b914e..fa0af008 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## [0.4.0](https://www.github.com/googleapis/python-documentai/compare/v0.3.0...v0.4.0) (2021-03-25) + + +### Features + +* add 'from_service_account_info' factory to clients ([d6f183a](https://www.github.com/googleapis/python-documentai/commit/d6f183a696b211c6d29bc28e9bbd0a8537f65577)) +* add common resource path helpers, expose client transport ([#43](https://www.github.com/googleapis/python-documentai/issues/43)) ([4918e62](https://www.github.com/googleapis/python-documentai/commit/4918e62033b4c118bf99ba83730377b4ecc86d17)) +* add documentai v1 ([#101](https://www.github.com/googleapis/python-documentai/issues/101)) ([74fabb5](https://www.github.com/googleapis/python-documentai/commit/74fabb5e260ecc27e9cf005502d79590fa7f72e4)) +* add from_service_account_info factory and fix sphinx identifiers ([#80](https://www.github.com/googleapis/python-documentai/issues/80)) ([d6f183a](https://www.github.com/googleapis/python-documentai/commit/d6f183a696b211c6d29bc28e9bbd0a8537f65577)) + + +### Bug Fixes + +* added if statement to filter out dir blob files ([#63](https://www.github.com/googleapis/python-documentai/issues/63)) ([7f7f541](https://www.github.com/googleapis/python-documentai/commit/7f7f541bcf4d2f42b2f619c2ceb45f53c5d0e9eb)) +* adds comment with explicit hostname change ([#94](https://www.github.com/googleapis/python-documentai/issues/94)) ([bb639f9](https://www.github.com/googleapis/python-documentai/commit/bb639f9470304b9c408143a3e8091a4ca8c54160)) +* fix sphinx identifiers ([d6f183a](https://www.github.com/googleapis/python-documentai/commit/d6f183a696b211c6d29bc28e9bbd0a8537f65577)) +* moves import statment inside region tags ([#71](https://www.github.com/googleapis/python-documentai/issues/71)) ([a04fbea](https://www.github.com/googleapis/python-documentai/commit/a04fbeaf026d3d204dbb6c6cecf181068ddcc882)) +* remove client recv msg limit and add enums to `types/__init__.py` ([#72](https://www.github.com/googleapis/python-documentai/issues/72)) ([c94afd5](https://www.github.com/googleapis/python-documentai/commit/c94afd55124b0abc8978bf86b84743dd4afb0778)) +* removes C-style semicolons and slash comments ([#59](https://www.github.com/googleapis/python-documentai/issues/59)) ([1b24bfd](https://www.github.com/googleapis/python-documentai/commit/1b24bfdfc603952db8d1c633dfde108a396aa707)) +* **samples:** swaps 'continue' for 'return' ([#93](https://www.github.com/googleapis/python-documentai/issues/93)) ([dabe48e](https://www.github.com/googleapis/python-documentai/commit/dabe48e8c1439ceb8a50c18aa3c7dca848a9117a)) + + +### Documentation + +* fix pypi link ([#46](https://www.github.com/googleapis/python-documentai/issues/46)) ([5162674](https://www.github.com/googleapis/python-documentai/commit/5162674091b9a2111b90eb26739b4e11f9119582)) +* **samples:** new Doc AI samples for v1beta3 ([#44](https://www.github.com/googleapis/python-documentai/issues/44)) ([cc8c58d](https://www.github.com/googleapis/python-documentai/commit/cc8c58d1bade4be53fde08f6a3497eb3f79f63b1)) + ## [0.3.0](https://www.github.com/googleapis/python-documentai/compare/v0.2.0...v0.3.0) (2020-09-30) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index b3d1f602..039f4368 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,44 +1,95 @@ -# Contributor Code of Conduct +# Code of Conduct -As contributors and maintainers of this project, -and in the interest of fostering an open and welcoming community, -we pledge to respect all people who contribute through reporting issues, -posting feature requests, updating documentation, -submitting pull requests or patches, and other activities. +## Our Pledge -We are committed to making participation in this project -a harassment-free experience for everyone, -regardless of level of experience, gender, gender identity and expression, -sexual orientation, disability, personal appearance, -body size, race, ethnicity, age, religion, or nationality. +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of +experience, education, socio-economic status, nationality, personal appearance, +race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members Examples of unacceptable behavior by participants include: -* The use of sexualized language or imagery -* Personal attacks -* Trolling or insulting/derogatory comments -* Public or private harassment -* Publishing other's private information, -such as physical or electronic -addresses, without explicit permission -* Other unethical or unprofessional conduct. +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct. -By adopting this Code of Conduct, -project maintainers commit themselves to fairly and consistently -applying these principles to every aspect of managing this project. -Project maintainers who do not follow or enforce the Code of Conduct -may be permanently removed from the project team. - -This code of conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. - -Instances of abusive, harassing, or otherwise unacceptable behavior -may be reported by opening an issue -or contacting one or more of the project maintainers. - -This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.2.0, -available at [http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/) +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, or to ban temporarily or permanently any +contributor for other behaviors that they deem inappropriate, threatening, +offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +This Code of Conduct also applies outside the project spaces when the Project +Steward has a reasonable belief that an individual's behavior may have a +negative impact on the project or its community. + +## Conflict Resolution + +We do not believe that all conflict is bad; healthy debate and disagreement +often yield positive results. However, it is never okay to be disrespectful or +to engage in behavior that violates the project’s code of conduct. + +If you see someone violating the code of conduct, you are encouraged to address +the behavior directly with those involved. Many issues can be resolved quickly +and easily, and this gives people more control over the outcome of their +dispute. If you are unable to resolve the matter for any reason, or if the +behavior is threatening or harassing, report it. We are dedicated to providing +an environment where participants feel welcome and safe. + + +Reports should be directed to *googleapis-stewards@google.com*, the +Project Steward(s) for *Google Cloud Client Libraries*. It is the Project Steward’s duty to +receive and address reported violations of the code of conduct. They will then +work with a committee consisting of representatives from the Open Source +Programs Office and the Google Open Source Strategy team. If for any reason you +are uncomfortable reaching out to the Project Steward, please email +opensource@google.com. + +We will investigate every complaint, but you may not receive a direct response. +We will use our discretion in determining when and how to follow up on reported +incidents, which may range from not taking action to permanent expulsion from +the project and project-sponsored spaces. We will notify the accused of the +report and provide them an opportunity to discuss it before any action is taken. +The identity of the reporter will be omitted from the details of the report +supplied to the accused. In potentially harmful situations, such as ongoing +harassment or threats to anyone's safety, we may take action without notice. + +## Attribution + +This Code of Conduct is adapted from the Contributor Covenant, version 1.4, +available at +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 9ea187b2..7307ccad 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -21,8 +21,8 @@ In order to add a feature: - The feature must be documented in both the API and narrative documentation. -- The feature must work fully on the following CPython versions: 2.7, - 3.5, 3.6, 3.7 and 3.8 on both UNIX and Windows. +- The feature must work fully on the following CPython versions: + 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -70,9 +70,14 @@ We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: $ nox -s unit-2.7 - $ nox -s unit-3.7 + $ nox -s unit-3.8 $ ... +- Args to pytest can be passed through the nox command separated by a `--`. For + example, to run a single test:: + + $ nox -s unit-3.8 -- -k + .. note:: The unit tests and system tests are described in the @@ -93,8 +98,12 @@ On Debian/Ubuntu:: ************ Coding Style ************ +- We use the automatic code formatter ``black``. You can run it using + the nox session ``blacken``. This will eliminate many lint errors. Run via:: -- PEP8 compliance, with exceptions defined in the linter configuration. + $ nox -s blacken + +- PEP8 compliance is required, with exceptions defined in the linter configuration. If you have ``nox`` installed, you can test that you have not introduced any non-compliant code via:: @@ -111,6 +120,16 @@ Coding Style should point to the official ``googleapis`` checkout and the the branch should be the main branch on that remote (``master``). +- This repository contains configuration for the + `pre-commit `__ tool, which automates checking + our linters during a commit. If you have it installed on your ``$PATH``, + you can enable enforcing those checks via: + +.. code-block:: bash + + $ pre-commit install + pre-commit installed at .git/hooks/pre-commit + Exceptions to PEP8: - Many unit tests use a helper method, ``_call_fut`` ("FUT" is short for @@ -123,13 +142,18 @@ Running System Tests - To run system tests, you can execute:: - $ nox -s system-3.7 + # Run all system tests + $ nox -s system-3.8 $ nox -s system-2.7 + # Run a single system test + $ nox -s system-3.8 -- -k + + .. note:: System tests are only configured to run under Python 2.7 and - Python 3.7. For expediency, we do not run them in older versions + Python 3.8. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local @@ -192,25 +216,24 @@ Supported Python Versions We support: -- `Python 3.5`_ - `Python 3.6`_ - `Python 3.7`_ - `Python 3.8`_ +- `Python 3.9`_ -.. _Python 3.5: https://docs.python.org/3.5/ .. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ +.. _Python 3.9: https://docs.python.org/3.9/ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-documentai/blob/master/noxfile.py -Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020. We also explicitly decided to support Python 3 beginning with version -3.5. Reasons for this include: +3.6. Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ diff --git a/LICENSE b/LICENSE index a8ee855d..d6456956 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ - Apache License + + Apache License Version 2.0, January 2004 - https://www.apache.org/licenses/ + http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION @@ -192,7 +193,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - https://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/MANIFEST.in b/MANIFEST.in index e9e29d12..e783f4c6 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -16,10 +16,10 @@ # Generated by synthtool. DO NOT EDIT! include README.rst LICENSE -recursive-include google *.json *.proto +recursive-include google *.json *.proto py.typed recursive-include tests * global-exclude *.py[co] global-exclude __pycache__ # Exclude scripts for samples readmegen -prune scripts/readme-gen \ No newline at end of file +prune scripts/readme-gen diff --git a/README.rst b/README.rst index f797d7a3..bbf479cd 100644 --- a/README.rst +++ b/README.rst @@ -14,8 +14,8 @@ language, computer vision, translation, and AutoML. :target: https://github.com/googleapis/google-cloud-python/blob/master/README.rst#beta-support .. |pypi| image:: https://img.shields.io/pypi/v/google-cloud-service-directory.svg :target: https://pypi.org/project/google-cloud-service-directory/ -.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-service-directory.svg - :target: https://pypi.org/project/google-cloud-service-directory/ +.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-documentai.svg + :target: https://pypi.org/project/google-cloud-documentai/ .. _Cloud Document AI API: https://cloud.google.com/document-understanding/docs/ .. _Client Library Documentation: https://googleapis.dev/python/documentai/latest .. _Product Documentation: https://cloud.google.com/document-understanding/docs/ @@ -81,4 +81,4 @@ Next Steps APIs that we cover. .. _Cloud Document AI API Product documentation: https://cloud.google.com/document-understanding/docs/ -.. _README: https://github.com/googleapis/google-cloud-python/blob/master/README.rst \ No newline at end of file +.. _README: https://github.com/googleapis/google-cloud-python/blob/master/README.rst diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 0abaf229..bcd37bbd 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,4 +1,9 @@ div#python2-eol { border-color: red; border-width: medium; -} \ No newline at end of file +} + +/* Ensure minimum width for 'Parameters' / 'Returns' column */ +dl.field-list > dt { + min-width: 100px +} diff --git a/docs/conf.py b/docs/conf.py index d5ee4abd..4982dd92 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -345,10 +345,11 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "python": ("http://python.readthedocs.org/en/latest/", None), - "google-auth": ("https://google-auth.readthedocs.io/en/stable", None), + "python": ("https://python.readthedocs.org/en/latest/", None), + "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), - "grpc": ("https://grpc.io/grpc/python/", None), + "grpc": ("https://grpc.github.io/grpc/python/", None), + "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), } diff --git a/docs/documentai_v1/document_processor_service.rst b/docs/documentai_v1/document_processor_service.rst new file mode 100644 index 00000000..3918355b --- /dev/null +++ b/docs/documentai_v1/document_processor_service.rst @@ -0,0 +1,6 @@ +DocumentProcessorService +------------------------------------------ + +.. automodule:: google.cloud.documentai_v1.services.document_processor_service + :members: + :inherited-members: diff --git a/docs/documentai_v1/services.rst b/docs/documentai_v1/services.rst new file mode 100644 index 00000000..551bb666 --- /dev/null +++ b/docs/documentai_v1/services.rst @@ -0,0 +1,6 @@ +Services for Google Cloud Documentai v1 API +=========================================== +.. toctree:: + :maxdepth: 2 + + document_processor_service diff --git a/docs/documentai_v1/types.rst b/docs/documentai_v1/types.rst new file mode 100644 index 00000000..68ac7119 --- /dev/null +++ b/docs/documentai_v1/types.rst @@ -0,0 +1,7 @@ +Types for Google Cloud Documentai v1 API +======================================== + +.. automodule:: google.cloud.documentai_v1.types + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/documentai_v1beta2/document_understanding_service.rst b/docs/documentai_v1beta2/document_understanding_service.rst new file mode 100644 index 00000000..a0d0da7e --- /dev/null +++ b/docs/documentai_v1beta2/document_understanding_service.rst @@ -0,0 +1,6 @@ +DocumentUnderstandingService +---------------------------------------------- + +.. automodule:: google.cloud.documentai_v1beta2.services.document_understanding_service + :members: + :inherited-members: diff --git a/docs/documentai_v1beta2/services.rst b/docs/documentai_v1beta2/services.rst index b1f00952..13f4a238 100644 --- a/docs/documentai_v1beta2/services.rst +++ b/docs/documentai_v1beta2/services.rst @@ -1,6 +1,6 @@ Services for Google Cloud Documentai v1beta2 API ================================================ +.. toctree:: + :maxdepth: 2 -.. automodule:: google.cloud.documentai_v1beta2.services.document_understanding_service - :members: - :inherited-members: + document_understanding_service diff --git a/docs/documentai_v1beta2/types.rst b/docs/documentai_v1beta2/types.rst index 2a437e9d..9edede43 100644 --- a/docs/documentai_v1beta2/types.rst +++ b/docs/documentai_v1beta2/types.rst @@ -3,3 +3,5 @@ Types for Google Cloud Documentai v1beta2 API .. automodule:: google.cloud.documentai_v1beta2.types :members: + :undoc-members: + :show-inheritance: diff --git a/docs/documentai_v1beta3/document_processor_service.rst b/docs/documentai_v1beta3/document_processor_service.rst new file mode 100644 index 00000000..6a2caa2b --- /dev/null +++ b/docs/documentai_v1beta3/document_processor_service.rst @@ -0,0 +1,6 @@ +DocumentProcessorService +------------------------------------------ + +.. automodule:: google.cloud.documentai_v1beta3.services.document_processor_service + :members: + :inherited-members: diff --git a/docs/documentai_v1beta3/services.rst b/docs/documentai_v1beta3/services.rst index b4a1011c..d19a944b 100644 --- a/docs/documentai_v1beta3/services.rst +++ b/docs/documentai_v1beta3/services.rst @@ -1,6 +1,6 @@ Services for Google Cloud Documentai v1beta3 API ================================================ +.. toctree:: + :maxdepth: 2 -.. automodule:: google.cloud.documentai_v1beta3.services.document_processor_service - :members: - :inherited-members: + document_processor_service diff --git a/docs/documentai_v1beta3/types.rst b/docs/documentai_v1beta3/types.rst index 03bcbfa7..7e22aabc 100644 --- a/docs/documentai_v1beta3/types.rst +++ b/docs/documentai_v1beta3/types.rst @@ -3,3 +3,5 @@ Types for Google Cloud Documentai v1beta3 API .. automodule:: google.cloud.documentai_v1beta3.types :members: + :undoc-members: + :show-inheritance: diff --git a/docs/index.rst b/docs/index.rst index c6c5efde..fd5e2754 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,6 +7,8 @@ API Reference .. toctree:: :maxdepth: 2 + documentai_v1/services + documentai_v1/types documentai_v1beta3/services documentai_v1beta3/types documentai_v1beta2/services diff --git a/google/cloud/documentai/__init__.py b/google/cloud/documentai/__init__.py index edd80431..b488ee65 100644 --- a/google/cloud/documentai/__init__.py +++ b/google/cloud/documentai/__init__.py @@ -15,52 +15,68 @@ # limitations under the License. # -from google.cloud.documentai_v1beta3.services.document_processor_service.async_client import ( +from google.cloud.documentai_v1.services.document_processor_service.async_client import ( DocumentProcessorServiceAsyncClient, ) -from google.cloud.documentai_v1beta3.services.document_processor_service.client import ( +from google.cloud.documentai_v1.services.document_processor_service.client import ( DocumentProcessorServiceClient, ) -from google.cloud.documentai_v1beta3.types.document import Document -from google.cloud.documentai_v1beta3.types.document_processor_service import ( +from google.cloud.documentai_v1.types.document import Document +from google.cloud.documentai_v1.types.document_io import BatchDocumentsInputConfig +from google.cloud.documentai_v1.types.document_io import DocumentOutputConfig +from google.cloud.documentai_v1.types.document_io import GcsDocument +from google.cloud.documentai_v1.types.document_io import GcsDocuments +from google.cloud.documentai_v1.types.document_io import GcsPrefix +from google.cloud.documentai_v1.types.document_io import RawDocument +from google.cloud.documentai_v1.types.document_processor_service import ( BatchProcessMetadata, ) -from google.cloud.documentai_v1beta3.types.document_processor_service import ( +from google.cloud.documentai_v1.types.document_processor_service import ( BatchProcessRequest, ) -from google.cloud.documentai_v1beta3.types.document_processor_service import ( +from google.cloud.documentai_v1.types.document_processor_service import ( BatchProcessResponse, ) -from google.cloud.documentai_v1beta3.types.document_processor_service import ( - ProcessRequest, +from google.cloud.documentai_v1.types.document_processor_service import ( + CommonOperationMetadata, ) -from google.cloud.documentai_v1beta3.types.document_processor_service import ( - ProcessResponse, +from google.cloud.documentai_v1.types.document_processor_service import ( + HumanReviewStatus, ) -from google.cloud.documentai_v1beta3.types.document_processor_service import ( +from google.cloud.documentai_v1.types.document_processor_service import ProcessRequest +from google.cloud.documentai_v1.types.document_processor_service import ProcessResponse +from google.cloud.documentai_v1.types.document_processor_service import ( ReviewDocumentOperationMetadata, ) -from google.cloud.documentai_v1beta3.types.document_processor_service import ( +from google.cloud.documentai_v1.types.document_processor_service import ( ReviewDocumentRequest, ) -from google.cloud.documentai_v1beta3.types.document_processor_service import ( +from google.cloud.documentai_v1.types.document_processor_service import ( ReviewDocumentResponse, ) -from google.cloud.documentai_v1beta3.types.geometry import BoundingPoly -from google.cloud.documentai_v1beta3.types.geometry import NormalizedVertex -from google.cloud.documentai_v1beta3.types.geometry import Vertex +from google.cloud.documentai_v1.types.geometry import BoundingPoly +from google.cloud.documentai_v1.types.geometry import NormalizedVertex +from google.cloud.documentai_v1.types.geometry import Vertex __all__ = ( + "BatchDocumentsInputConfig", "BatchProcessMetadata", "BatchProcessRequest", "BatchProcessResponse", "BoundingPoly", + "CommonOperationMetadata", "Document", + "DocumentOutputConfig", "DocumentProcessorServiceAsyncClient", "DocumentProcessorServiceClient", + "GcsDocument", + "GcsDocuments", + "GcsPrefix", + "HumanReviewStatus", "NormalizedVertex", "ProcessRequest", "ProcessResponse", + "RawDocument", "ReviewDocumentOperationMetadata", "ReviewDocumentRequest", "ReviewDocumentResponse", diff --git a/google/cloud/documentai_v1/__init__.py b/google/cloud/documentai_v1/__init__.py new file mode 100644 index 00000000..84d917be --- /dev/null +++ b/google/cloud/documentai_v1/__init__.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .services.document_processor_service import DocumentProcessorServiceClient +from .types.document import Document +from .types.document_io import BatchDocumentsInputConfig +from .types.document_io import DocumentOutputConfig +from .types.document_io import GcsDocument +from .types.document_io import GcsDocuments +from .types.document_io import GcsPrefix +from .types.document_io import RawDocument +from .types.document_processor_service import BatchProcessMetadata +from .types.document_processor_service import BatchProcessRequest +from .types.document_processor_service import BatchProcessResponse +from .types.document_processor_service import CommonOperationMetadata +from .types.document_processor_service import HumanReviewStatus +from .types.document_processor_service import ProcessRequest +from .types.document_processor_service import ProcessResponse +from .types.document_processor_service import ReviewDocumentOperationMetadata +from .types.document_processor_service import ReviewDocumentRequest +from .types.document_processor_service import ReviewDocumentResponse +from .types.geometry import BoundingPoly +from .types.geometry import NormalizedVertex +from .types.geometry import Vertex + + +__all__ = ( + "BatchDocumentsInputConfig", + "BatchProcessMetadata", + "BatchProcessRequest", + "BatchProcessResponse", + "BoundingPoly", + "CommonOperationMetadata", + "Document", + "DocumentOutputConfig", + "GcsDocument", + "GcsDocuments", + "GcsPrefix", + "HumanReviewStatus", + "NormalizedVertex", + "ProcessRequest", + "ProcessResponse", + "RawDocument", + "ReviewDocumentOperationMetadata", + "ReviewDocumentRequest", + "ReviewDocumentResponse", + "Vertex", + "DocumentProcessorServiceClient", +) diff --git a/google/cloud/documentai_v1/py.typed b/google/cloud/documentai_v1/py.typed new file mode 100644 index 00000000..81b45001 --- /dev/null +++ b/google/cloud/documentai_v1/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-documentai package uses inline types. diff --git a/google/cloud/documentai_v1/services/__init__.py b/google/cloud/documentai_v1/services/__init__.py new file mode 100644 index 00000000..42ffdf2b --- /dev/null +++ b/google/cloud/documentai_v1/services/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/google/cloud/documentai_v1/services/document_processor_service/__init__.py b/google/cloud/documentai_v1/services/document_processor_service/__init__.py new file mode 100644 index 00000000..9f87d9f4 --- /dev/null +++ b/google/cloud/documentai_v1/services/document_processor_service/__init__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .client import DocumentProcessorServiceClient +from .async_client import DocumentProcessorServiceAsyncClient + +__all__ = ( + "DocumentProcessorServiceClient", + "DocumentProcessorServiceAsyncClient", +) diff --git a/google/cloud/documentai_v1/services/document_processor_service/async_client.py b/google/cloud/documentai_v1/services/document_processor_service/async_client.py new file mode 100644 index 00000000..42cf58a4 --- /dev/null +++ b/google/cloud/documentai_v1/services/document_processor_service/async_client.py @@ -0,0 +1,476 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +import functools +import re +from typing import Dict, Sequence, Tuple, Type, Union +import pkg_resources + +import google.api_core.client_options as ClientOptions # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.api_core import operation # type: ignore +from google.api_core import operation_async # type: ignore +from google.cloud.documentai_v1.types import document +from google.cloud.documentai_v1.types import document_processor_service + +from .transports.base import DocumentProcessorServiceTransport, DEFAULT_CLIENT_INFO +from .transports.grpc_asyncio import DocumentProcessorServiceGrpcAsyncIOTransport +from .client import DocumentProcessorServiceClient + + +class DocumentProcessorServiceAsyncClient: + """Service to call Cloud DocumentAI to process documents + according to the processor's definition. Processors are built + using state-of-the-art Google AI such as natural language, + computer vision, and translation to extract structured + information from unstructured or semi-structured documents. + """ + + _client: DocumentProcessorServiceClient + + DEFAULT_ENDPOINT = DocumentProcessorServiceClient.DEFAULT_ENDPOINT + DEFAULT_MTLS_ENDPOINT = DocumentProcessorServiceClient.DEFAULT_MTLS_ENDPOINT + + human_review_config_path = staticmethod( + DocumentProcessorServiceClient.human_review_config_path + ) + parse_human_review_config_path = staticmethod( + DocumentProcessorServiceClient.parse_human_review_config_path + ) + processor_path = staticmethod(DocumentProcessorServiceClient.processor_path) + parse_processor_path = staticmethod( + DocumentProcessorServiceClient.parse_processor_path + ) + + common_billing_account_path = staticmethod( + DocumentProcessorServiceClient.common_billing_account_path + ) + parse_common_billing_account_path = staticmethod( + DocumentProcessorServiceClient.parse_common_billing_account_path + ) + + common_folder_path = staticmethod(DocumentProcessorServiceClient.common_folder_path) + parse_common_folder_path = staticmethod( + DocumentProcessorServiceClient.parse_common_folder_path + ) + + common_organization_path = staticmethod( + DocumentProcessorServiceClient.common_organization_path + ) + parse_common_organization_path = staticmethod( + DocumentProcessorServiceClient.parse_common_organization_path + ) + + common_project_path = staticmethod( + DocumentProcessorServiceClient.common_project_path + ) + parse_common_project_path = staticmethod( + DocumentProcessorServiceClient.parse_common_project_path + ) + + common_location_path = staticmethod( + DocumentProcessorServiceClient.common_location_path + ) + parse_common_location_path = staticmethod( + DocumentProcessorServiceClient.parse_common_location_path + ) + + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentProcessorServiceAsyncClient: The constructed client. + """ + return DocumentProcessorServiceClient.from_service_account_info.__func__(DocumentProcessorServiceAsyncClient, info, *args, **kwargs) # type: ignore + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentProcessorServiceAsyncClient: The constructed client. + """ + return DocumentProcessorServiceClient.from_service_account_file.__func__(DocumentProcessorServiceAsyncClient, filename, *args, **kwargs) # type: ignore + + from_service_account_json = from_service_account_file + + @property + def transport(self) -> DocumentProcessorServiceTransport: + """Return the transport used by the client instance. + + Returns: + DocumentProcessorServiceTransport: The transport used by the client instance. + """ + return self._client.transport + + get_transport_class = functools.partial( + type(DocumentProcessorServiceClient).get_transport_class, + type(DocumentProcessorServiceClient), + ) + + def __init__( + self, + *, + credentials: credentials.Credentials = None, + transport: Union[str, DocumentProcessorServiceTransport] = "grpc_asyncio", + client_options: ClientOptions = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the document processor service client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, ~.DocumentProcessorServiceTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (ClientOptions): Custom options for the client. It + won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + """ + + self._client = DocumentProcessorServiceClient( + credentials=credentials, + transport=transport, + client_options=client_options, + client_info=client_info, + ) + + async def process_document( + self, + request: document_processor_service.ProcessRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> document_processor_service.ProcessResponse: + r"""Processes a single document. + + Args: + request (:class:`google.cloud.documentai_v1.types.ProcessRequest`): + The request object. Request message for the process + document method. + name (:class:`str`): + Required. The processor resource + name. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.documentai_v1.types.ProcessResponse: + Response message for the process + document method. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = document_processor_service.ProcessRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.process_document, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + deadline=120.0, + ), + default_timeout=120.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + async def batch_process_documents( + self, + request: document_processor_service.BatchProcessRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> operation_async.AsyncOperation: + r"""LRO endpoint to batch process many documents. The output is + written to Cloud Storage as JSON in the [Document] format. + + Args: + request (:class:`google.cloud.documentai_v1.types.BatchProcessRequest`): + The request object. Request message for batch process + document method. + name (:class:`str`): + Required. The processor resource + name. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.api_core.operation_async.AsyncOperation: + An object representing a long-running operation. + + The result type for the operation will be + :class:`google.cloud.documentai_v1.types.BatchProcessResponse` + Response message for batch process document method. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = document_processor_service.BatchProcessRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.batch_process_documents, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + deadline=120.0, + ), + default_timeout=120.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Wrap the response in an operation future. + response = operation_async.from_gapic( + response, + self._client._transport.operations_client, + document_processor_service.BatchProcessResponse, + metadata_type=document_processor_service.BatchProcessMetadata, + ) + + # Done; return the response. + return response + + async def review_document( + self, + request: document_processor_service.ReviewDocumentRequest = None, + *, + human_review_config: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> operation_async.AsyncOperation: + r"""Send a document for Human Review. The input document + should be processed by the specified processor. + + Args: + request (:class:`google.cloud.documentai_v1.types.ReviewDocumentRequest`): + The request object. Request message for review document + method. + human_review_config (:class:`str`): + Required. The resource name of the + HumanReviewConfig that the document will + be reviewed with. + + This corresponds to the ``human_review_config`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.api_core.operation_async.AsyncOperation: + An object representing a long-running operation. + + The result type for the operation will be + :class:`google.cloud.documentai_v1.types.ReviewDocumentResponse` + Response message for review document method. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([human_review_config]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + request = document_processor_service.ReviewDocumentRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if human_review_config is not None: + request.human_review_config = human_review_config + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = gapic_v1.method_async.wrap_method( + self._client._transport.review_document, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + deadline=120.0, + ), + default_timeout=120.0, + client_info=DEFAULT_CLIENT_INFO, + ) + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("human_review_config", request.human_review_config),) + ), + ) + + # Send the request. + response = await rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Wrap the response in an operation future. + response = operation_async.from_gapic( + response, + self._client._transport.operations_client, + document_processor_service.ReviewDocumentResponse, + metadata_type=document_processor_service.ReviewDocumentOperationMetadata, + ) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-documentai", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("DocumentProcessorServiceAsyncClient",) diff --git a/google/cloud/documentai_v1/services/document_processor_service/client.py b/google/cloud/documentai_v1/services/document_processor_service/client.py new file mode 100644 index 00000000..46160b76 --- /dev/null +++ b/google/cloud/documentai_v1/services/document_processor_service/client.py @@ -0,0 +1,631 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from distutils import util +import os +import re +from typing import Callable, Dict, Optional, Sequence, Tuple, Type, Union +import pkg_resources + +from google.api_core import client_options as client_options_lib # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport import mtls # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore +from google.auth.exceptions import MutualTLSChannelError # type: ignore +from google.oauth2 import service_account # type: ignore + +from google.api_core import operation # type: ignore +from google.api_core import operation_async # type: ignore +from google.cloud.documentai_v1.types import document +from google.cloud.documentai_v1.types import document_processor_service + +from .transports.base import DocumentProcessorServiceTransport, DEFAULT_CLIENT_INFO +from .transports.grpc import DocumentProcessorServiceGrpcTransport +from .transports.grpc_asyncio import DocumentProcessorServiceGrpcAsyncIOTransport + + +class DocumentProcessorServiceClientMeta(type): + """Metaclass for the DocumentProcessorService client. + + This provides class-level methods for building and retrieving + support objects (e.g. transport) without polluting the client instance + objects. + """ + + _transport_registry = ( + OrderedDict() + ) # type: Dict[str, Type[DocumentProcessorServiceTransport]] + _transport_registry["grpc"] = DocumentProcessorServiceGrpcTransport + _transport_registry["grpc_asyncio"] = DocumentProcessorServiceGrpcAsyncIOTransport + + def get_transport_class( + cls, label: str = None, + ) -> Type[DocumentProcessorServiceTransport]: + """Return an appropriate transport class. + + Args: + label: The name of the desired transport. If none is + provided, then the first transport in the registry is used. + + Returns: + The transport class to use. + """ + # If a specific transport is requested, return that one. + if label: + return cls._transport_registry[label] + + # No transport is requested; return the default (that is, the first one + # in the dictionary). + return next(iter(cls._transport_registry.values())) + + +class DocumentProcessorServiceClient(metaclass=DocumentProcessorServiceClientMeta): + """Service to call Cloud DocumentAI to process documents + according to the processor's definition. Processors are built + using state-of-the-art Google AI such as natural language, + computer vision, and translation to extract structured + information from unstructured or semi-structured documents. + """ + + @staticmethod + def _get_default_mtls_endpoint(api_endpoint): + """Convert api endpoint to mTLS endpoint. + Convert "*.sandbox.googleapis.com" and "*.googleapis.com" to + "*.mtls.sandbox.googleapis.com" and "*.mtls.googleapis.com" respectively. + Args: + api_endpoint (Optional[str]): the api endpoint to convert. + Returns: + str: converted mTLS api endpoint. + """ + if not api_endpoint: + return api_endpoint + + mtls_endpoint_re = re.compile( + r"(?P[^.]+)(?P\.mtls)?(?P\.sandbox)?(?P\.googleapis\.com)?" + ) + + m = mtls_endpoint_re.match(api_endpoint) + name, mtls, sandbox, googledomain = m.groups() + if mtls or not googledomain: + return api_endpoint + + if sandbox: + return api_endpoint.replace( + "sandbox.googleapis.com", "mtls.sandbox.googleapis.com" + ) + + return api_endpoint.replace(".googleapis.com", ".mtls.googleapis.com") + + DEFAULT_ENDPOINT = "us-documentai.googleapis.com" + DEFAULT_MTLS_ENDPOINT = _get_default_mtls_endpoint.__func__( # type: ignore + DEFAULT_ENDPOINT + ) + + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentProcessorServiceClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_info(info) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentProcessorServiceClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_file(filename) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + + from_service_account_json = from_service_account_file + + @property + def transport(self) -> DocumentProcessorServiceTransport: + """Return the transport used by the client instance. + + Returns: + DocumentProcessorServiceTransport: The transport used by the client instance. + """ + return self._transport + + @staticmethod + def human_review_config_path(project: str, location: str, processor: str,) -> str: + """Return a fully-qualified human_review_config string.""" + return "projects/{project}/locations/{location}/processors/{processor}/humanReviewConfig".format( + project=project, location=location, processor=processor, + ) + + @staticmethod + def parse_human_review_config_path(path: str) -> Dict[str, str]: + """Parse a human_review_config path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/processors/(?P.+?)/humanReviewConfig$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def processor_path(project: str, location: str, processor: str,) -> str: + """Return a fully-qualified processor string.""" + return "projects/{project}/locations/{location}/processors/{processor}".format( + project=project, location=location, processor=processor, + ) + + @staticmethod + def parse_processor_path(path: str) -> Dict[str, str]: + """Parse a processor path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/processors/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def common_billing_account_path(billing_account: str,) -> str: + """Return a fully-qualified billing_account string.""" + return "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + + @staticmethod + def parse_common_billing_account_path(path: str) -> Dict[str, str]: + """Parse a billing_account path into its component segments.""" + m = re.match(r"^billingAccounts/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_folder_path(folder: str,) -> str: + """Return a fully-qualified folder string.""" + return "folders/{folder}".format(folder=folder,) + + @staticmethod + def parse_common_folder_path(path: str) -> Dict[str, str]: + """Parse a folder path into its component segments.""" + m = re.match(r"^folders/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_organization_path(organization: str,) -> str: + """Return a fully-qualified organization string.""" + return "organizations/{organization}".format(organization=organization,) + + @staticmethod + def parse_common_organization_path(path: str) -> Dict[str, str]: + """Parse a organization path into its component segments.""" + m = re.match(r"^organizations/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_project_path(project: str,) -> str: + """Return a fully-qualified project string.""" + return "projects/{project}".format(project=project,) + + @staticmethod + def parse_common_project_path(path: str) -> Dict[str, str]: + """Parse a project path into its component segments.""" + m = re.match(r"^projects/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_location_path(project: str, location: str,) -> str: + """Return a fully-qualified location string.""" + return "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + + @staticmethod + def parse_common_location_path(path: str) -> Dict[str, str]: + """Parse a location path into its component segments.""" + m = re.match(r"^projects/(?P.+?)/locations/(?P.+?)$", path) + return m.groupdict() if m else {} + + def __init__( + self, + *, + credentials: Optional[credentials.Credentials] = None, + transport: Union[str, DocumentProcessorServiceTransport, None] = None, + client_options: Optional[client_options_lib.ClientOptions] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the document processor service client. + + Args: + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + transport (Union[str, DocumentProcessorServiceTransport]): The + transport to use. If set to None, a transport is chosen + automatically. + client_options (google.api_core.client_options.ClientOptions): Custom options for the + client. It won't take effect if a ``transport`` instance is provided. + (1) The ``api_endpoint`` property can be used to override the + default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT + environment variable can also be used to override the endpoint: + "always" (always use the default mTLS endpoint), "never" (always + use the default regular endpoint) and "auto" (auto switch to the + default mTLS endpoint if client certificate is present, this is + the default value). However, the ``api_endpoint`` property takes + precedence if provided. + (2) If GOOGLE_API_USE_CLIENT_CERTIFICATE environment variable + is "true", then the ``client_cert_source`` property can be used + to provide client certificate for mutual TLS transport. If + not provided, the default SSL client certificate will be used if + present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not + set, no client certificate will be used. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + """ + if isinstance(client_options, dict): + client_options = client_options_lib.from_dict(client_options) + if client_options is None: + client_options = client_options_lib.ClientOptions() + + # Create SSL credentials for mutual TLS if needed. + use_client_cert = bool( + util.strtobool(os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE", "false")) + ) + + client_cert_source_func = None + is_mtls = False + if use_client_cert: + if client_options.client_cert_source: + is_mtls = True + client_cert_source_func = client_options.client_cert_source + else: + is_mtls = mtls.has_default_client_cert_source() + client_cert_source_func = ( + mtls.default_client_cert_source() if is_mtls else None + ) + + # Figure out which api endpoint to use. + if client_options.api_endpoint is not None: + api_endpoint = client_options.api_endpoint + else: + use_mtls_env = os.getenv("GOOGLE_API_USE_MTLS_ENDPOINT", "auto") + if use_mtls_env == "never": + api_endpoint = self.DEFAULT_ENDPOINT + elif use_mtls_env == "always": + api_endpoint = self.DEFAULT_MTLS_ENDPOINT + elif use_mtls_env == "auto": + api_endpoint = ( + self.DEFAULT_MTLS_ENDPOINT if is_mtls else self.DEFAULT_ENDPOINT + ) + else: + raise MutualTLSChannelError( + "Unsupported GOOGLE_API_USE_MTLS_ENDPOINT value. Accepted values: never, auto, always" + ) + + # Save or instantiate the transport. + # Ordinarily, we provide the transport, but allowing a custom transport + # instance provides an extensibility point for unusual situations. + if isinstance(transport, DocumentProcessorServiceTransport): + # transport is a DocumentProcessorServiceTransport instance. + if credentials or client_options.credentials_file: + raise ValueError( + "When providing a transport instance, " + "provide its credentials directly." + ) + if client_options.scopes: + raise ValueError( + "When providing a transport instance, " + "provide its scopes directly." + ) + self._transport = transport + else: + Transport = type(self).get_transport_class(transport) + self._transport = Transport( + credentials=credentials, + credentials_file=client_options.credentials_file, + host=api_endpoint, + scopes=client_options.scopes, + client_cert_source_for_mtls=client_cert_source_func, + quota_project_id=client_options.quota_project_id, + client_info=client_info, + ) + + def process_document( + self, + request: document_processor_service.ProcessRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> document_processor_service.ProcessResponse: + r"""Processes a single document. + + Args: + request (google.cloud.documentai_v1.types.ProcessRequest): + The request object. Request message for the process + document method. + name (str): + Required. The processor resource + name. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.cloud.documentai_v1.types.ProcessResponse: + Response message for the process + document method. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a document_processor_service.ProcessRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, document_processor_service.ProcessRequest): + request = document_processor_service.ProcessRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.process_document] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Done; return the response. + return response + + def batch_process_documents( + self, + request: document_processor_service.BatchProcessRequest = None, + *, + name: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> operation.Operation: + r"""LRO endpoint to batch process many documents. The output is + written to Cloud Storage as JSON in the [Document] format. + + Args: + request (google.cloud.documentai_v1.types.BatchProcessRequest): + The request object. Request message for batch process + document method. + name (str): + Required. The processor resource + name. + + This corresponds to the ``name`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.api_core.operation.Operation: + An object representing a long-running operation. + + The result type for the operation will be + :class:`google.cloud.documentai_v1.types.BatchProcessResponse` + Response message for batch process document method. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([name]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a document_processor_service.BatchProcessRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, document_processor_service.BatchProcessRequest): + request = document_processor_service.BatchProcessRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if name is not None: + request.name = name + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.batch_process_documents] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Wrap the response in an operation future. + response = operation.from_gapic( + response, + self._transport.operations_client, + document_processor_service.BatchProcessResponse, + metadata_type=document_processor_service.BatchProcessMetadata, + ) + + # Done; return the response. + return response + + def review_document( + self, + request: document_processor_service.ReviewDocumentRequest = None, + *, + human_review_config: str = None, + retry: retries.Retry = gapic_v1.method.DEFAULT, + timeout: float = None, + metadata: Sequence[Tuple[str, str]] = (), + ) -> operation.Operation: + r"""Send a document for Human Review. The input document + should be processed by the specified processor. + + Args: + request (google.cloud.documentai_v1.types.ReviewDocumentRequest): + The request object. Request message for review document + method. + human_review_config (str): + Required. The resource name of the + HumanReviewConfig that the document will + be reviewed with. + + This corresponds to the ``human_review_config`` field + on the ``request`` instance; if ``request`` is provided, this + should not be set. + + retry (google.api_core.retry.Retry): Designation of what errors, if any, + should be retried. + timeout (float): The timeout for this request. + metadata (Sequence[Tuple[str, str]]): Strings which should be + sent along with the request as metadata. + + Returns: + google.api_core.operation.Operation: + An object representing a long-running operation. + + The result type for the operation will be + :class:`google.cloud.documentai_v1.types.ReviewDocumentResponse` + Response message for review document method. + + """ + # Create or coerce a protobuf request object. + # Sanity check: If we got a request object, we should *not* have + # gotten any keyword arguments that map to the request. + has_flattened_params = any([human_review_config]) + if request is not None and has_flattened_params: + raise ValueError( + "If the `request` argument is set, then none of " + "the individual field arguments should be set." + ) + + # Minor optimization to avoid making a copy if the user passes + # in a document_processor_service.ReviewDocumentRequest. + # There's no risk of modifying the input as we've already verified + # there are no flattened fields. + if not isinstance(request, document_processor_service.ReviewDocumentRequest): + request = document_processor_service.ReviewDocumentRequest(request) + + # If we have keyword arguments corresponding to fields on the + # request, apply these. + + if human_review_config is not None: + request.human_review_config = human_review_config + + # Wrap the RPC method; this adds retry and timeout information, + # and friendly error handling. + rpc = self._transport._wrapped_methods[self._transport.review_document] + + # Certain fields should be provided within the metadata header; + # add these here. + metadata = tuple(metadata) + ( + gapic_v1.routing_header.to_grpc_metadata( + (("human_review_config", request.human_review_config),) + ), + ) + + # Send the request. + response = rpc(request, retry=retry, timeout=timeout, metadata=metadata,) + + # Wrap the response in an operation future. + response = operation.from_gapic( + response, + self._transport.operations_client, + document_processor_service.ReviewDocumentResponse, + metadata_type=document_processor_service.ReviewDocumentOperationMetadata, + ) + + # Done; return the response. + return response + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-documentai", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +__all__ = ("DocumentProcessorServiceClient",) diff --git a/google/cloud/documentai_v1/services/document_processor_service/transports/__init__.py b/google/cloud/documentai_v1/services/document_processor_service/transports/__init__.py new file mode 100644 index 00000000..e3e820b3 --- /dev/null +++ b/google/cloud/documentai_v1/services/document_processor_service/transports/__init__.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from collections import OrderedDict +from typing import Dict, Type + +from .base import DocumentProcessorServiceTransport +from .grpc import DocumentProcessorServiceGrpcTransport +from .grpc_asyncio import DocumentProcessorServiceGrpcAsyncIOTransport + + +# Compile a registry of transports. +_transport_registry = ( + OrderedDict() +) # type: Dict[str, Type[DocumentProcessorServiceTransport]] +_transport_registry["grpc"] = DocumentProcessorServiceGrpcTransport +_transport_registry["grpc_asyncio"] = DocumentProcessorServiceGrpcAsyncIOTransport + +__all__ = ( + "DocumentProcessorServiceTransport", + "DocumentProcessorServiceGrpcTransport", + "DocumentProcessorServiceGrpcAsyncIOTransport", +) diff --git a/google/cloud/documentai_v1/services/document_processor_service/transports/base.py b/google/cloud/documentai_v1/services/document_processor_service/transports/base.py new file mode 100644 index 00000000..cb344159 --- /dev/null +++ b/google/cloud/documentai_v1/services/document_processor_service/transports/base.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import abc +import typing +import pkg_resources + +from google import auth # type: ignore +from google.api_core import exceptions # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google.api_core import retry as retries # type: ignore +from google.api_core import operations_v1 # type: ignore +from google.auth import credentials # type: ignore + +from google.cloud.documentai_v1.types import document_processor_service +from google.longrunning import operations_pb2 as operations # type: ignore + + +try: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo( + gapic_version=pkg_resources.get_distribution( + "google-cloud-documentai", + ).version, + ) +except pkg_resources.DistributionNotFound: + DEFAULT_CLIENT_INFO = gapic_v1.client_info.ClientInfo() + + +class DocumentProcessorServiceTransport(abc.ABC): + """Abstract transport class for DocumentProcessorService.""" + + AUTH_SCOPES = ("https://www.googleapis.com/auth/cloud-platform",) + + def __init__( + self, + *, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: typing.Optional[str] = None, + scopes: typing.Optional[typing.Sequence[str]] = AUTH_SCOPES, + quota_project_id: typing.Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + **kwargs, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scope (Optional[Sequence[str]]): A list of scopes. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + """ + # Save the hostname. Default to port 443 (HTTPS) if none is specified. + if ":" not in host: + host += ":443" + self._host = host + + # Save the scopes. + self._scopes = scopes or self.AUTH_SCOPES + + # If no credentials are provided, then determine the appropriate + # defaults. + if credentials and credentials_file: + raise exceptions.DuplicateCredentialArgs( + "'credentials_file' and 'credentials' are mutually exclusive" + ) + + if credentials_file is not None: + credentials, _ = auth.load_credentials_from_file( + credentials_file, scopes=self._scopes, quota_project_id=quota_project_id + ) + + elif credentials is None: + credentials, _ = auth.default( + scopes=self._scopes, quota_project_id=quota_project_id + ) + + # Save the credentials. + self._credentials = credentials + + def _prep_wrapped_messages(self, client_info): + # Precompute the wrapped methods. + self._wrapped_methods = { + self.process_document: gapic_v1.method.wrap_method( + self.process_document, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + deadline=120.0, + ), + default_timeout=120.0, + client_info=client_info, + ), + self.batch_process_documents: gapic_v1.method.wrap_method( + self.batch_process_documents, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + deadline=120.0, + ), + default_timeout=120.0, + client_info=client_info, + ), + self.review_document: gapic_v1.method.wrap_method( + self.review_document, + default_retry=retries.Retry( + initial=0.1, + maximum=60.0, + multiplier=1.3, + predicate=retries.if_exception_type( + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, + ), + deadline=120.0, + ), + default_timeout=120.0, + client_info=client_info, + ), + } + + @property + def operations_client(self) -> operations_v1.OperationsClient: + """Return the client designed to process long-running operations.""" + raise NotImplementedError() + + @property + def process_document( + self, + ) -> typing.Callable[ + [document_processor_service.ProcessRequest], + typing.Union[ + document_processor_service.ProcessResponse, + typing.Awaitable[document_processor_service.ProcessResponse], + ], + ]: + raise NotImplementedError() + + @property + def batch_process_documents( + self, + ) -> typing.Callable[ + [document_processor_service.BatchProcessRequest], + typing.Union[operations.Operation, typing.Awaitable[operations.Operation]], + ]: + raise NotImplementedError() + + @property + def review_document( + self, + ) -> typing.Callable[ + [document_processor_service.ReviewDocumentRequest], + typing.Union[operations.Operation, typing.Awaitable[operations.Operation]], + ]: + raise NotImplementedError() + + +__all__ = ("DocumentProcessorServiceTransport",) diff --git a/google/cloud/documentai_v1/services/document_processor_service/transports/grpc.py b/google/cloud/documentai_v1/services/document_processor_service/transports/grpc.py new file mode 100644 index 00000000..8c55d69c --- /dev/null +++ b/google/cloud/documentai_v1/services/document_processor_service/transports/grpc.py @@ -0,0 +1,333 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import grpc_helpers # type: ignore +from google.api_core import operations_v1 # type: ignore +from google.api_core import gapic_v1 # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore + +from google.cloud.documentai_v1.types import document_processor_service +from google.longrunning import operations_pb2 as operations # type: ignore + +from .base import DocumentProcessorServiceTransport, DEFAULT_CLIENT_INFO + + +class DocumentProcessorServiceGrpcTransport(DocumentProcessorServiceTransport): + """gRPC backend transport for DocumentProcessorService. + + Service to call Cloud DocumentAI to process documents + according to the processor's definition. Processors are built + using state-of-the-art Google AI such as natural language, + computer vision, and translation to extract structured + information from unstructured or semi-structured documents. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _stubs: Dict[str, Callable] + + def __init__( + self, + *, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Sequence[str] = None, + channel: grpc.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + client_cert_source_for_mtls: Callable[[], Tuple[bytes, bytes]] = None, + quota_project_id: Optional[str] = None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional(Sequence[str])): A list of scopes. This argument is + ignored if ``channel`` is provided. + channel (Optional[grpc.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + client_cert_source_for_mtls (Optional[Callable[[], Tuple[bytes, bytes]]]): + A callback to provide client certificate bytes and private key bytes, + both in PEM format. It is used to configure mutual TLS channel. It is + ignored if ``channel`` or ``ssl_channel_credentials`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTLSChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + self._grpc_channel = None + self._ssl_channel_credentials = ssl_channel_credentials + self._stubs: Dict[str, Callable] = {} + self._operations_client = None + + if api_mtls_endpoint: + warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) + if client_cert_source: + warnings.warn("client_cert_source is deprecated", DeprecationWarning) + + if channel: + # Ignore credentials if a channel was passed. + credentials = False + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + self._ssl_channel_credentials = None + + else: + if api_mtls_endpoint: + host = api_mtls_endpoint + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + self._ssl_channel_credentials = SslCredentials().ssl_credentials + + else: + if client_cert_source_for_mtls and not ssl_channel_credentials: + cert, key = client_cert_source_for_mtls() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + + # The base transport sets the host, credentials and scopes + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + if not self._grpc_channel: + self._grpc_channel = type(self).create_channel( + self._host, + credentials=self._credentials, + credentials_file=credentials_file, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Wrap messages. This must be done after self._grpc_channel exists + self._prep_wrapped_messages(client_info) + + @classmethod + def create_channel( + cls, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: str = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> grpc.Channel: + """Create and return a gRPC channel object. + Args: + host (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is mutually exclusive with credentials. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + grpc.Channel: A gRPC channel object. + + Raises: + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + @property + def grpc_channel(self) -> grpc.Channel: + """Return the channel designed to connect to this service. + """ + return self._grpc_channel + + @property + def operations_client(self) -> operations_v1.OperationsClient: + """Create the client designed to process long-running operations. + + This property caches on the instance; repeated calls return the same + client. + """ + # Sanity check: Only create a new client if we do not already have one. + if self._operations_client is None: + self._operations_client = operations_v1.OperationsClient(self.grpc_channel) + + # Return the client from cache. + return self._operations_client + + @property + def process_document( + self, + ) -> Callable[ + [document_processor_service.ProcessRequest], + document_processor_service.ProcessResponse, + ]: + r"""Return a callable for the process document method over gRPC. + + Processes a single document. + + Returns: + Callable[[~.ProcessRequest], + ~.ProcessResponse]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "process_document" not in self._stubs: + self._stubs["process_document"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1.DocumentProcessorService/ProcessDocument", + request_serializer=document_processor_service.ProcessRequest.serialize, + response_deserializer=document_processor_service.ProcessResponse.deserialize, + ) + return self._stubs["process_document"] + + @property + def batch_process_documents( + self, + ) -> Callable[ + [document_processor_service.BatchProcessRequest], operations.Operation + ]: + r"""Return a callable for the batch process documents method over gRPC. + + LRO endpoint to batch process many documents. The output is + written to Cloud Storage as JSON in the [Document] format. + + Returns: + Callable[[~.BatchProcessRequest], + ~.Operation]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "batch_process_documents" not in self._stubs: + self._stubs["batch_process_documents"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1.DocumentProcessorService/BatchProcessDocuments", + request_serializer=document_processor_service.BatchProcessRequest.serialize, + response_deserializer=operations.Operation.FromString, + ) + return self._stubs["batch_process_documents"] + + @property + def review_document( + self, + ) -> Callable[ + [document_processor_service.ReviewDocumentRequest], operations.Operation + ]: + r"""Return a callable for the review document method over gRPC. + + Send a document for Human Review. The input document + should be processed by the specified processor. + + Returns: + Callable[[~.ReviewDocumentRequest], + ~.Operation]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "review_document" not in self._stubs: + self._stubs["review_document"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1.DocumentProcessorService/ReviewDocument", + request_serializer=document_processor_service.ReviewDocumentRequest.serialize, + response_deserializer=operations.Operation.FromString, + ) + return self._stubs["review_document"] + + +__all__ = ("DocumentProcessorServiceGrpcTransport",) diff --git a/google/cloud/documentai_v1/services/document_processor_service/transports/grpc_asyncio.py b/google/cloud/documentai_v1/services/document_processor_service/transports/grpc_asyncio.py new file mode 100644 index 00000000..3b172f81 --- /dev/null +++ b/google/cloud/documentai_v1/services/document_processor_service/transports/grpc_asyncio.py @@ -0,0 +1,341 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import warnings +from typing import Awaitable, Callable, Dict, Optional, Sequence, Tuple + +from google.api_core import gapic_v1 # type: ignore +from google.api_core import grpc_helpers_async # type: ignore +from google.api_core import operations_v1 # type: ignore +from google import auth # type: ignore +from google.auth import credentials # type: ignore +from google.auth.transport.grpc import SslCredentials # type: ignore + +import grpc # type: ignore +from grpc.experimental import aio # type: ignore + +from google.cloud.documentai_v1.types import document_processor_service +from google.longrunning import operations_pb2 as operations # type: ignore + +from .base import DocumentProcessorServiceTransport, DEFAULT_CLIENT_INFO +from .grpc import DocumentProcessorServiceGrpcTransport + + +class DocumentProcessorServiceGrpcAsyncIOTransport(DocumentProcessorServiceTransport): + """gRPC AsyncIO backend transport for DocumentProcessorService. + + Service to call Cloud DocumentAI to process documents + according to the processor's definition. Processors are built + using state-of-the-art Google AI such as natural language, + computer vision, and translation to extract structured + information from unstructured or semi-structured documents. + + This class defines the same methods as the primary client, so the + primary client can load the underlying transport implementation + and call it. + + It sends protocol buffers over the wire using gRPC (which is built on + top of HTTP/2); the ``grpcio`` package must be installed. + """ + + _grpc_channel: aio.Channel + _stubs: Dict[str, Callable] = {} + + @classmethod + def create_channel( + cls, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + quota_project_id: Optional[str] = None, + **kwargs, + ) -> aio.Channel: + """Create and return a gRPC AsyncIO channel object. + Args: + host (Optional[str]): The host for the channel to use. + credentials (Optional[~.Credentials]): The + authorization credentials to attach to requests. These + credentials identify this application to the service. If + none are specified, the client will attempt to ascertain + the credentials from the environment. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + kwargs (Optional[dict]): Keyword arguments, which are passed to the + channel creation. + Returns: + aio.Channel: A gRPC AsyncIO channel object. + """ + scopes = scopes or cls.AUTH_SCOPES + return grpc_helpers_async.create_channel( + host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + **kwargs, + ) + + def __init__( + self, + *, + host: str = "us-documentai.googleapis.com", + credentials: credentials.Credentials = None, + credentials_file: Optional[str] = None, + scopes: Optional[Sequence[str]] = None, + channel: aio.Channel = None, + api_mtls_endpoint: str = None, + client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, + ssl_channel_credentials: grpc.ChannelCredentials = None, + client_cert_source_for_mtls: Callable[[], Tuple[bytes, bytes]] = None, + quota_project_id=None, + client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, + ) -> None: + """Instantiate the transport. + + Args: + host (Optional[str]): The hostname to connect to. + credentials (Optional[google.auth.credentials.Credentials]): The + authorization credentials to attach to requests. These + credentials identify the application to the service; if none + are specified, the client will attempt to ascertain the + credentials from the environment. + This argument is ignored if ``channel`` is provided. + credentials_file (Optional[str]): A file with credentials that can + be loaded with :func:`google.auth.load_credentials_from_file`. + This argument is ignored if ``channel`` is provided. + scopes (Optional[Sequence[str]]): A optional list of scopes needed for this + service. These are only used when credentials are not specified and + are passed to :func:`google.auth.default`. + channel (Optional[aio.Channel]): A ``Channel`` instance through + which to make calls. + api_mtls_endpoint (Optional[str]): Deprecated. The mutual TLS endpoint. + If provided, it overrides the ``host`` argument and tries to create + a mutual TLS channel with client SSL credentials from + ``client_cert_source`` or applicatin default SSL credentials. + client_cert_source (Optional[Callable[[], Tuple[bytes, bytes]]]): + Deprecated. A callback to provide client SSL certificate bytes and + private key bytes, both in PEM format. It is ignored if + ``api_mtls_endpoint`` is None. + ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials + for grpc channel. It is ignored if ``channel`` is provided. + client_cert_source_for_mtls (Optional[Callable[[], Tuple[bytes, bytes]]]): + A callback to provide client certificate bytes and private key bytes, + both in PEM format. It is used to configure mutual TLS channel. It is + ignored if ``channel`` or ``ssl_channel_credentials`` is provided. + quota_project_id (Optional[str]): An optional project to use for billing + and quota. + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing + your own client library. + + Raises: + google.auth.exceptions.MutualTlsChannelError: If mutual TLS transport + creation failed for any reason. + google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` + and ``credentials_file`` are passed. + """ + self._grpc_channel = None + self._ssl_channel_credentials = ssl_channel_credentials + self._stubs: Dict[str, Callable] = {} + self._operations_client = None + + if api_mtls_endpoint: + warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) + if client_cert_source: + warnings.warn("client_cert_source is deprecated", DeprecationWarning) + + if channel: + # Ignore credentials if a channel was passed. + credentials = False + # If a channel was explicitly provided, set it. + self._grpc_channel = channel + self._ssl_channel_credentials = None + + else: + if api_mtls_endpoint: + host = api_mtls_endpoint + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + self._ssl_channel_credentials = SslCredentials().ssl_credentials + + else: + if client_cert_source_for_mtls and not ssl_channel_credentials: + cert, key = client_cert_source_for_mtls() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + + # The base transport sets the host, credentials and scopes + super().__init__( + host=host, + credentials=credentials, + credentials_file=credentials_file, + scopes=scopes, + quota_project_id=quota_project_id, + client_info=client_info, + ) + + if not self._grpc_channel: + self._grpc_channel = type(self).create_channel( + self._host, + credentials=self._credentials, + credentials_file=credentials_file, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Wrap messages. This must be done after self._grpc_channel exists + self._prep_wrapped_messages(client_info) + + @property + def grpc_channel(self) -> aio.Channel: + """Create the channel designed to connect to this service. + + This property caches on the instance; repeated calls return + the same channel. + """ + # Return the channel from cache. + return self._grpc_channel + + @property + def operations_client(self) -> operations_v1.OperationsAsyncClient: + """Create the client designed to process long-running operations. + + This property caches on the instance; repeated calls return the same + client. + """ + # Sanity check: Only create a new client if we do not already have one. + if self._operations_client is None: + self._operations_client = operations_v1.OperationsAsyncClient( + self.grpc_channel + ) + + # Return the client from cache. + return self._operations_client + + @property + def process_document( + self, + ) -> Callable[ + [document_processor_service.ProcessRequest], + Awaitable[document_processor_service.ProcessResponse], + ]: + r"""Return a callable for the process document method over gRPC. + + Processes a single document. + + Returns: + Callable[[~.ProcessRequest], + Awaitable[~.ProcessResponse]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "process_document" not in self._stubs: + self._stubs["process_document"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1.DocumentProcessorService/ProcessDocument", + request_serializer=document_processor_service.ProcessRequest.serialize, + response_deserializer=document_processor_service.ProcessResponse.deserialize, + ) + return self._stubs["process_document"] + + @property + def batch_process_documents( + self, + ) -> Callable[ + [document_processor_service.BatchProcessRequest], + Awaitable[operations.Operation], + ]: + r"""Return a callable for the batch process documents method over gRPC. + + LRO endpoint to batch process many documents. The output is + written to Cloud Storage as JSON in the [Document] format. + + Returns: + Callable[[~.BatchProcessRequest], + Awaitable[~.Operation]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "batch_process_documents" not in self._stubs: + self._stubs["batch_process_documents"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1.DocumentProcessorService/BatchProcessDocuments", + request_serializer=document_processor_service.BatchProcessRequest.serialize, + response_deserializer=operations.Operation.FromString, + ) + return self._stubs["batch_process_documents"] + + @property + def review_document( + self, + ) -> Callable[ + [document_processor_service.ReviewDocumentRequest], + Awaitable[operations.Operation], + ]: + r"""Return a callable for the review document method over gRPC. + + Send a document for Human Review. The input document + should be processed by the specified processor. + + Returns: + Callable[[~.ReviewDocumentRequest], + Awaitable[~.Operation]]: + A function that, when called, will call the underlying RPC + on the server. + """ + # Generate a "stub function" on-the-fly which will actually make + # the request. + # gRPC handles serialization and deserialization, so we just need + # to pass in the functions for each. + if "review_document" not in self._stubs: + self._stubs["review_document"] = self.grpc_channel.unary_unary( + "/google.cloud.documentai.v1.DocumentProcessorService/ReviewDocument", + request_serializer=document_processor_service.ReviewDocumentRequest.serialize, + response_deserializer=operations.Operation.FromString, + ) + return self._stubs["review_document"] + + +__all__ = ("DocumentProcessorServiceGrpcAsyncIOTransport",) diff --git a/google/cloud/documentai_v1/types/__init__.py b/google/cloud/documentai_v1/types/__init__.py new file mode 100644 index 00000000..0d60bd37 --- /dev/null +++ b/google/cloud/documentai_v1/types/__init__.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .document import Document +from .document_io import ( + BatchDocumentsInputConfig, + DocumentOutputConfig, + GcsDocument, + GcsDocuments, + GcsPrefix, + RawDocument, +) +from .document_processor_service import ( + BatchProcessMetadata, + BatchProcessRequest, + BatchProcessResponse, + CommonOperationMetadata, + HumanReviewStatus, + ProcessRequest, + ProcessResponse, + ReviewDocumentOperationMetadata, + ReviewDocumentRequest, + ReviewDocumentResponse, +) +from .geometry import ( + BoundingPoly, + NormalizedVertex, + Vertex, +) + +__all__ = ( + "Document", + "BatchDocumentsInputConfig", + "DocumentOutputConfig", + "GcsDocument", + "GcsDocuments", + "GcsPrefix", + "RawDocument", + "BatchProcessMetadata", + "BatchProcessRequest", + "BatchProcessResponse", + "CommonOperationMetadata", + "HumanReviewStatus", + "ProcessRequest", + "ProcessResponse", + "ReviewDocumentOperationMetadata", + "ReviewDocumentRequest", + "ReviewDocumentResponse", + "BoundingPoly", + "NormalizedVertex", + "Vertex", +) diff --git a/google/cloud/documentai_v1/types/document.py b/google/cloud/documentai_v1/types/document.py new file mode 100644 index 00000000..781e3c55 --- /dev/null +++ b/google/cloud/documentai_v1/types/document.py @@ -0,0 +1,1109 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.documentai_v1.types import geometry +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.rpc import status_pb2 as status # type: ignore +from google.type import color_pb2 as gt_color # type: ignore +from google.type import date_pb2 as date # type: ignore +from google.type import datetime_pb2 as datetime # type: ignore +from google.type import money_pb2 as money # type: ignore +from google.type import postal_address_pb2 as postal_address # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1", manifest={"Document",}, +) + + +class Document(proto.Message): + r"""Document represents the canonical document resource in + Document Understanding AI. + It is an interchange format that provides insights into + documents and allows for collaboration between users and + Document Understanding AI to iterate and optimize for quality. + + Attributes: + uri (str): + Optional. Currently supports Google Cloud Storage URI of the + form ``gs://bucket_name/object_name``. Object versioning is + not supported. See `Google Cloud Storage Request + URIs `__ + for more info. + content (bytes): + Optional. Inline document content, represented as a stream + of bytes. Note: As with all ``bytes`` fields, protobuffers + use a pure binary representation, whereas JSON + representations use base64. + mime_type (str): + An IANA published MIME type (also referred to + as media type). For more information, see + https://www.iana.org/assignments/media- + types/media-types.xhtml. + text (str): + Optional. UTF-8 encoded text in reading order + from the document. + text_styles (Sequence[google.cloud.documentai_v1.types.Document.Style]): + Styles for the + [Document.text][google.cloud.documentai.v1.Document.text]. + pages (Sequence[google.cloud.documentai_v1.types.Document.Page]): + Visual page layout for the + [Document][google.cloud.documentai.v1.Document]. + entities (Sequence[google.cloud.documentai_v1.types.Document.Entity]): + A list of entities detected on + [Document.text][google.cloud.documentai.v1.Document.text]. + For document shards, entities in this list may cross shard + boundaries. + entity_relations (Sequence[google.cloud.documentai_v1.types.Document.EntityRelation]): + Relationship among + [Document.entities][google.cloud.documentai.v1.Document.entities]. + text_changes (Sequence[google.cloud.documentai_v1.types.Document.TextChange]): + A list of text corrections made to [Document.text]. This is + usually used for annotating corrections to OCR mistakes. + Text changes for a given revision may not overlap with each + other. + shard_info (google.cloud.documentai_v1.types.Document.ShardInfo): + Information about the sharding if this + document is sharded part of a larger document. + If the document is not sharded, this message is + not specified. + error (google.rpc.status_pb2.Status): + Any error that occurred while processing this + document. + revisions (Sequence[google.cloud.documentai_v1.types.Document.Revision]): + Revision history of this document. + """ + + class ShardInfo(proto.Message): + r"""For a large document, sharding may be performed to produce + several document shards. Each document shard contains this field + to detail which shard it is. + + Attributes: + shard_index (int): + The 0-based index of this shard. + shard_count (int): + Total number of shards. + text_offset (int): + The index of the first character in + [Document.text][google.cloud.documentai.v1.Document.text] in + the overall document global text. + """ + + shard_index = proto.Field(proto.INT64, number=1) + + shard_count = proto.Field(proto.INT64, number=2) + + text_offset = proto.Field(proto.INT64, number=3) + + class Style(proto.Message): + r"""Annotation for common text style attributes. This adheres to + CSS conventions as much as possible. + + Attributes: + text_anchor (google.cloud.documentai_v1.types.Document.TextAnchor): + Text anchor indexing into the + [Document.text][google.cloud.documentai.v1.Document.text]. + color (google.type.color_pb2.Color): + Text color. + background_color (google.type.color_pb2.Color): + Text background color. + font_weight (str): + Font weight. Possible values are normal, bold, bolder, and + lighter. https://www.w3schools.com/cssref/pr_font_weight.asp + text_style (str): + Text style. Possible values are normal, italic, and oblique. + https://www.w3schools.com/cssref/pr_font_font-style.asp + text_decoration (str): + Text decoration. Follows CSS standard. + https://www.w3schools.com/cssref/pr_text_text-decoration.asp + font_size (google.cloud.documentai_v1.types.Document.Style.FontSize): + Font size. + """ + + class FontSize(proto.Message): + r"""Font size with unit. + + Attributes: + size (float): + Font size for the text. + unit (str): + Unit for the font size. Follows CSS naming + (in, px, pt, etc.). + """ + + size = proto.Field(proto.FLOAT, number=1) + + unit = proto.Field(proto.STRING, number=2) + + text_anchor = proto.Field( + proto.MESSAGE, number=1, message="Document.TextAnchor", + ) + + color = proto.Field(proto.MESSAGE, number=2, message=gt_color.Color,) + + background_color = proto.Field(proto.MESSAGE, number=3, message=gt_color.Color,) + + font_weight = proto.Field(proto.STRING, number=4) + + text_style = proto.Field(proto.STRING, number=5) + + text_decoration = proto.Field(proto.STRING, number=6) + + font_size = proto.Field( + proto.MESSAGE, number=7, message="Document.Style.FontSize", + ) + + class Page(proto.Message): + r"""A page in a [Document][google.cloud.documentai.v1.Document]. + + Attributes: + page_number (int): + 1-based index for current + [Page][google.cloud.documentai.v1.Document.Page] in a parent + [Document][google.cloud.documentai.v1.Document]. Useful when + a page is taken out of a + [Document][google.cloud.documentai.v1.Document] for + individual processing. + image (google.cloud.documentai_v1.types.Document.Page.Image): + Rendered image for this page. This image is + preprocessed to remove any skew, rotation, and + distortions such that the annotation bounding + boxes can be upright and axis-aligned. + transforms (Sequence[google.cloud.documentai_v1.types.Document.Page.Matrix]): + Transformation matrices that were applied to the original + document image to produce + [Page.image][google.cloud.documentai.v1.Document.Page.image]. + dimension (google.cloud.documentai_v1.types.Document.Page.Dimension): + Physical dimension of the page. + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for the page. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + blocks (Sequence[google.cloud.documentai_v1.types.Document.Page.Block]): + A list of visually detected text blocks on + the page. A block has a set of lines (collected + into paragraphs) that have a common line-spacing + and orientation. + paragraphs (Sequence[google.cloud.documentai_v1.types.Document.Page.Paragraph]): + A list of visually detected text paragraphs + on the page. A collection of lines that a human + would perceive as a paragraph. + lines (Sequence[google.cloud.documentai_v1.types.Document.Page.Line]): + A list of visually detected text lines on the + page. A collection of tokens that a human would + perceive as a line. + tokens (Sequence[google.cloud.documentai_v1.types.Document.Page.Token]): + A list of visually detected tokens on the + page. + visual_elements (Sequence[google.cloud.documentai_v1.types.Document.Page.VisualElement]): + A list of detected non-text visual elements + e.g. checkbox, signature etc. on the page. + tables (Sequence[google.cloud.documentai_v1.types.Document.Page.Table]): + A list of visually detected tables on the + page. + form_fields (Sequence[google.cloud.documentai_v1.types.Document.Page.FormField]): + A list of visually detected form fields on + the page. + """ + + class Dimension(proto.Message): + r"""Dimension for the page. + + Attributes: + width (float): + Page width. + height (float): + Page height. + unit (str): + Dimension unit. + """ + + width = proto.Field(proto.FLOAT, number=1) + + height = proto.Field(proto.FLOAT, number=2) + + unit = proto.Field(proto.STRING, number=3) + + class Image(proto.Message): + r"""Rendered image contents for this page. + + Attributes: + content (bytes): + Raw byte content of the image. + mime_type (str): + Encoding mime type for the image. + width (int): + Width of the image in pixels. + height (int): + Height of the image in pixels. + """ + + content = proto.Field(proto.BYTES, number=1) + + mime_type = proto.Field(proto.STRING, number=2) + + width = proto.Field(proto.INT32, number=3) + + height = proto.Field(proto.INT32, number=4) + + class Matrix(proto.Message): + r"""Representation for transformation matrix, intended to be + compatible and used with OpenCV format for image manipulation. + + Attributes: + rows (int): + Number of rows in the matrix. + cols (int): + Number of columns in the matrix. + type_ (int): + This encodes information about what data type the matrix + uses. For example, 0 (CV_8U) is an unsigned 8-bit image. For + the full list of OpenCV primitive data types, please refer + to + https://docs.opencv.org/4.3.0/d1/d1b/group__core__hal__interface.html + data (bytes): + The matrix data. + """ + + rows = proto.Field(proto.INT32, number=1) + + cols = proto.Field(proto.INT32, number=2) + + type_ = proto.Field(proto.INT32, number=3) + + data = proto.Field(proto.BYTES, number=4) + + class Layout(proto.Message): + r"""Visual element describing a layout unit on a page. + + Attributes: + text_anchor (google.cloud.documentai_v1.types.Document.TextAnchor): + Text anchor indexing into the + [Document.text][google.cloud.documentai.v1.Document.text]. + confidence (float): + Confidence of the current + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + within context of the object this layout is for. e.g. + confidence can be for a single token, a table, a visual + element, etc. depending on context. Range [0, 1]. + bounding_poly (google.cloud.documentai_v1.types.BoundingPoly): + The bounding polygon for the + [Layout][google.cloud.documentai.v1.Document.Page.Layout]. + orientation (google.cloud.documentai_v1.types.Document.Page.Layout.Orientation): + Detected orientation for the + [Layout][google.cloud.documentai.v1.Document.Page.Layout]. + """ + + class Orientation(proto.Enum): + r"""Detected human reading orientation.""" + ORIENTATION_UNSPECIFIED = 0 + PAGE_UP = 1 + PAGE_RIGHT = 2 + PAGE_DOWN = 3 + PAGE_LEFT = 4 + + text_anchor = proto.Field( + proto.MESSAGE, number=1, message="Document.TextAnchor", + ) + + confidence = proto.Field(proto.FLOAT, number=2) + + bounding_poly = proto.Field( + proto.MESSAGE, number=3, message=geometry.BoundingPoly, + ) + + orientation = proto.Field( + proto.ENUM, number=4, enum="Document.Page.Layout.Orientation", + ) + + class Block(proto.Message): + r"""A block has a set of lines (collected into paragraphs) that + have a common line-spacing and orientation. + + Attributes: + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for [Block][google.cloud.documentai.v1.Document.Page.Block]. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + provenance (google.cloud.documentai_v1.types.Document.Provenance): + The history of this annotation. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage", + ) + + provenance = proto.Field( + proto.MESSAGE, number=3, message="Document.Provenance", + ) + + class Paragraph(proto.Message): + r"""A collection of lines that a human would perceive as a + paragraph. + + Attributes: + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for + [Paragraph][google.cloud.documentai.v1.Document.Page.Paragraph]. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + provenance (google.cloud.documentai_v1.types.Document.Provenance): + The history of this annotation. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage", + ) + + provenance = proto.Field( + proto.MESSAGE, number=3, message="Document.Provenance", + ) + + class Line(proto.Message): + r"""A collection of tokens that a human would perceive as a line. + Does not cross column boundaries, can be horizontal, vertical, + etc. + + Attributes: + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for [Line][google.cloud.documentai.v1.Document.Page.Line]. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + provenance (google.cloud.documentai_v1.types.Document.Provenance): + The history of this annotation. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage", + ) + + provenance = proto.Field( + proto.MESSAGE, number=3, message="Document.Provenance", + ) + + class Token(proto.Message): + r"""A detected token. + + Attributes: + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for [Token][google.cloud.documentai.v1.Document.Page.Token]. + detected_break (google.cloud.documentai_v1.types.Document.Page.Token.DetectedBreak): + Detected break at the end of a + [Token][google.cloud.documentai.v1.Document.Page.Token]. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + provenance (google.cloud.documentai_v1.types.Document.Provenance): + The history of this annotation. + """ + + class DetectedBreak(proto.Message): + r"""Detected break at the end of a + [Token][google.cloud.documentai.v1.Document.Page.Token]. + + Attributes: + type_ (google.cloud.documentai_v1.types.Document.Page.Token.DetectedBreak.Type): + Detected break type. + """ + + class Type(proto.Enum): + r"""Enum to denote the type of break found.""" + TYPE_UNSPECIFIED = 0 + SPACE = 1 + WIDE_SPACE = 2 + HYPHEN = 3 + + type_ = proto.Field( + proto.ENUM, number=1, enum="Document.Page.Token.DetectedBreak.Type", + ) + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + detected_break = proto.Field( + proto.MESSAGE, number=2, message="Document.Page.Token.DetectedBreak", + ) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.DetectedLanguage", + ) + + provenance = proto.Field( + proto.MESSAGE, number=4, message="Document.Provenance", + ) + + class VisualElement(proto.Message): + r"""Detected non-text visual elements e.g. checkbox, signature + etc. on the page. + + Attributes: + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for + [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement]. + type_ (str): + Type of the + [VisualElement][google.cloud.documentai.v1.Document.Page.VisualElement]. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + type_ = proto.Field(proto.STRING, number=2) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.DetectedLanguage", + ) + + class Table(proto.Message): + r"""A table representation similar to HTML table structure. + + Attributes: + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for [Table][google.cloud.documentai.v1.Document.Page.Table]. + header_rows (Sequence[google.cloud.documentai_v1.types.Document.Page.Table.TableRow]): + Header rows of the table. + body_rows (Sequence[google.cloud.documentai_v1.types.Document.Page.Table.TableRow]): + Body rows of the table. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + class TableRow(proto.Message): + r"""A row of table cells. + + Attributes: + cells (Sequence[google.cloud.documentai_v1.types.Document.Page.Table.TableCell]): + Cells that make up this row. + """ + + cells = proto.RepeatedField( + proto.MESSAGE, number=1, message="Document.Page.Table.TableCell", + ) + + class TableCell(proto.Message): + r"""A cell representation inside the table. + + Attributes: + layout (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for + [TableCell][google.cloud.documentai.v1.Document.Page.Table.TableCell]. + row_span (int): + How many rows this cell spans. + col_span (int): + How many columns this cell spans. + detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages together with + confidence. + """ + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + row_span = proto.Field(proto.INT32, number=2) + + col_span = proto.Field(proto.INT32, number=3) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage", + ) + + layout = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + header_rows = proto.RepeatedField( + proto.MESSAGE, number=2, message="Document.Page.Table.TableRow", + ) + + body_rows = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.Table.TableRow", + ) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage", + ) + + class FormField(proto.Message): + r"""A form field detected on the page. + + Attributes: + field_name (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for the + [FormField][google.cloud.documentai.v1.Document.Page.FormField] + name. e.g. ``Address``, ``Email``, ``Grand total``, + ``Phone number``, etc. + field_value (google.cloud.documentai_v1.types.Document.Page.Layout): + [Layout][google.cloud.documentai.v1.Document.Page.Layout] + for the + [FormField][google.cloud.documentai.v1.Document.Page.FormField] + value. + name_detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages for name + together with confidence. + value_detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]): + A list of detected languages for value + together with confidence. + value_type (str): + If the value is non-textual, this field represents the type. + Current valid values are: + + - blank (this indicates the field_value is normal text) + - "unfilled_checkbox" + - "filled_checkbox". + """ + + field_name = proto.Field( + proto.MESSAGE, number=1, message="Document.Page.Layout", + ) + + field_value = proto.Field( + proto.MESSAGE, number=2, message="Document.Page.Layout", + ) + + name_detected_languages = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Page.DetectedLanguage", + ) + + value_detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage", + ) + + value_type = proto.Field(proto.STRING, number=5) + + class DetectedLanguage(proto.Message): + r"""Detected language for a structural component. + + Attributes: + language_code (str): + The BCP-47 language code, such as "en-US" or "sr-Latn". For + more information, see + http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. + confidence (float): + Confidence of detected language. Range [0, 1]. + """ + + language_code = proto.Field(proto.STRING, number=1) + + confidence = proto.Field(proto.FLOAT, number=2) + + page_number = proto.Field(proto.INT32, number=1) + + image = proto.Field(proto.MESSAGE, number=13, message="Document.Page.Image",) + + transforms = proto.RepeatedField( + proto.MESSAGE, number=14, message="Document.Page.Matrix", + ) + + dimension = proto.Field( + proto.MESSAGE, number=2, message="Document.Page.Dimension", + ) + + layout = proto.Field(proto.MESSAGE, number=3, message="Document.Page.Layout",) + + detected_languages = proto.RepeatedField( + proto.MESSAGE, number=4, message="Document.Page.DetectedLanguage", + ) + + blocks = proto.RepeatedField( + proto.MESSAGE, number=5, message="Document.Page.Block", + ) + + paragraphs = proto.RepeatedField( + proto.MESSAGE, number=6, message="Document.Page.Paragraph", + ) + + lines = proto.RepeatedField( + proto.MESSAGE, number=7, message="Document.Page.Line", + ) + + tokens = proto.RepeatedField( + proto.MESSAGE, number=8, message="Document.Page.Token", + ) + + visual_elements = proto.RepeatedField( + proto.MESSAGE, number=9, message="Document.Page.VisualElement", + ) + + tables = proto.RepeatedField( + proto.MESSAGE, number=10, message="Document.Page.Table", + ) + + form_fields = proto.RepeatedField( + proto.MESSAGE, number=11, message="Document.Page.FormField", + ) + + class Entity(proto.Message): + r"""A phrase in the text that is a known entity type, such as a + person, an organization, or location. + + Attributes: + text_anchor (google.cloud.documentai_v1.types.Document.TextAnchor): + Optional. Provenance of the entity. Text anchor indexing + into the + [Document.text][google.cloud.documentai.v1.Document.text]. + type_ (str): + Entity type from a schema e.g. ``Address``. + mention_text (str): + Optional. Text value in the document e.g. + ``1600 Amphitheatre Pkwy``. + mention_id (str): + Optional. Deprecated. Use ``id`` field instead. + confidence (float): + Optional. Confidence of detected Schema entity. Range [0, + 1]. + page_anchor (google.cloud.documentai_v1.types.Document.PageAnchor): + Optional. Represents the provenance of this + entity wrt. the location on the page where it + was found. + id (str): + Optional. Canonical id. This will be a unique + value in the entity list for this document. + normalized_value (google.cloud.documentai_v1.types.Document.Entity.NormalizedValue): + Optional. Normalized entity value. Absent if + the extracted value could not be converted or + the type (e.g. address) is not supported for + certain parsers. This field is also only + populated for certain supported document types. + properties (Sequence[google.cloud.documentai_v1.types.Document.Entity]): + Optional. Entities can be nested to form a + hierarchical data structure representing the + content in the document. + provenance (google.cloud.documentai_v1.types.Document.Provenance): + Optional. The history of this annotation. + redacted (bool): + Optional. Whether the entity will be redacted + for de-identification purposes. + """ + + class NormalizedValue(proto.Message): + r"""Parsed and normalized entity value. + + Attributes: + money_value (google.type.money_pb2.Money): + Money value. See also: + https://github.com/googleapis/googleapis/blob/master/google/type/money.proto + date_value (google.type.date_pb2.Date): + Date value. Includes year, month, day. See + also: + https://github.com/googleapis/googleapis/blob/master/google/type/date.proto + datetime_value (google.type.datetime_pb2.DateTime): + DateTime value. Includes date, time, and + timezone. See also: + https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto + address_value (google.type.postal_address_pb2.PostalAddress): + Postal address. See also: + https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto + boolean_value (bool): + Boolean value. Can be used for entities with + binary values, or for checkboxes. + text (str): + Required. Normalized entity value stored as a string. This + field is populated for supported document type (e.g. + Invoice). For some entity types, one of respective + 'structured_value' fields may also be populated. + + - Money/Currency type (``money_value``) is in the ISO 4217 + text format. + - Date type (``date_value``) is in the ISO 8601 text + format. + - Datetime type (``datetime_value``) is in the ISO 8601 + text format. + """ + + money_value = proto.Field( + proto.MESSAGE, number=2, oneof="structured_value", message=money.Money, + ) + + date_value = proto.Field( + proto.MESSAGE, number=3, oneof="structured_value", message=date.Date, + ) + + datetime_value = proto.Field( + proto.MESSAGE, + number=4, + oneof="structured_value", + message=datetime.DateTime, + ) + + address_value = proto.Field( + proto.MESSAGE, + number=5, + oneof="structured_value", + message=postal_address.PostalAddress, + ) + + boolean_value = proto.Field(proto.BOOL, number=6, oneof="structured_value") + + text = proto.Field(proto.STRING, number=1) + + text_anchor = proto.Field( + proto.MESSAGE, number=1, message="Document.TextAnchor", + ) + + type_ = proto.Field(proto.STRING, number=2) + + mention_text = proto.Field(proto.STRING, number=3) + + mention_id = proto.Field(proto.STRING, number=4) + + confidence = proto.Field(proto.FLOAT, number=5) + + page_anchor = proto.Field( + proto.MESSAGE, number=6, message="Document.PageAnchor", + ) + + id = proto.Field(proto.STRING, number=7) + + normalized_value = proto.Field( + proto.MESSAGE, number=9, message="Document.Entity.NormalizedValue", + ) + + properties = proto.RepeatedField( + proto.MESSAGE, number=10, message="Document.Entity", + ) + + provenance = proto.Field( + proto.MESSAGE, number=11, message="Document.Provenance", + ) + + redacted = proto.Field(proto.BOOL, number=12) + + class EntityRelation(proto.Message): + r"""Relationship between + [Entities][google.cloud.documentai.v1.Document.Entity]. + + Attributes: + subject_id (str): + Subject entity id. + object_id (str): + Object entity id. + relation (str): + Relationship description. + """ + + subject_id = proto.Field(proto.STRING, number=1) + + object_id = proto.Field(proto.STRING, number=2) + + relation = proto.Field(proto.STRING, number=3) + + class TextAnchor(proto.Message): + r"""Text reference indexing into the + [Document.text][google.cloud.documentai.v1.Document.text]. + + Attributes: + text_segments (Sequence[google.cloud.documentai_v1.types.Document.TextAnchor.TextSegment]): + The text segments from the + [Document.text][google.cloud.documentai.v1.Document.text]. + content (str): + Contains the content of the text span so that users do not + have to look it up in the text_segments. + """ + + class TextSegment(proto.Message): + r"""A text segment in the + [Document.text][google.cloud.documentai.v1.Document.text]. The + indices may be out of bounds which indicate that the text extends + into another document shard for large sharded documents. See + [ShardInfo.text_offset][google.cloud.documentai.v1.Document.ShardInfo.text_offset] + + Attributes: + start_index (int): + [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] + start UTF-8 char index in the + [Document.text][google.cloud.documentai.v1.Document.text]. + end_index (int): + [TextSegment][google.cloud.documentai.v1.Document.TextAnchor.TextSegment] + half open end UTF-8 char index in the + [Document.text][google.cloud.documentai.v1.Document.text]. + """ + + start_index = proto.Field(proto.INT64, number=1) + + end_index = proto.Field(proto.INT64, number=2) + + text_segments = proto.RepeatedField( + proto.MESSAGE, number=1, message="Document.TextAnchor.TextSegment", + ) + + content = proto.Field(proto.STRING, number=2) + + class PageAnchor(proto.Message): + r"""Referencing the visual context of the entity in the + [Document.pages][google.cloud.documentai.v1.Document.pages]. Page + anchors can be cross-page, consist of multiple bounding polygons and + optionally reference specific layout element types. + + Attributes: + page_refs (Sequence[google.cloud.documentai_v1.types.Document.PageAnchor.PageRef]): + One or more references to visual page + elements + """ + + class PageRef(proto.Message): + r"""Represents a weak reference to a page element within a + document. + + Attributes: + page (int): + Required. Index into the + [Document.pages][google.cloud.documentai.v1.Document.pages] + element, for example using [Document.pages][page_refs.page] + to locate the related page element. + layout_type (google.cloud.documentai_v1.types.Document.PageAnchor.PageRef.LayoutType): + Optional. The type of the layout element that + is being referenced if any. + layout_id (str): + Optional. Deprecated. Use + [PageRef.bounding_poly][google.cloud.documentai.v1.Document.PageAnchor.PageRef.bounding_poly] + instead. + bounding_poly (google.cloud.documentai_v1.types.BoundingPoly): + Optional. Identifies the bounding polygon of + a layout element on the page. + """ + + class LayoutType(proto.Enum): + r"""The type of layout that is being referenced.""" + LAYOUT_TYPE_UNSPECIFIED = 0 + BLOCK = 1 + PARAGRAPH = 2 + LINE = 3 + TOKEN = 4 + VISUAL_ELEMENT = 5 + TABLE = 6 + FORM_FIELD = 7 + + page = proto.Field(proto.INT64, number=1) + + layout_type = proto.Field( + proto.ENUM, number=2, enum="Document.PageAnchor.PageRef.LayoutType", + ) + + layout_id = proto.Field(proto.STRING, number=3) + + bounding_poly = proto.Field( + proto.MESSAGE, number=4, message=geometry.BoundingPoly, + ) + + page_refs = proto.RepeatedField( + proto.MESSAGE, number=1, message="Document.PageAnchor.PageRef", + ) + + class Provenance(proto.Message): + r"""Structure to identify provenance relationships between + annotations in different revisions. + + Attributes: + revision (int): + The index of the revision that produced this + element. + id (int): + The Id of this operation. Needs to be unique + within the scope of the revision. + parents (Sequence[google.cloud.documentai_v1.types.Document.Provenance.Parent]): + References to the original elements that are + replaced. + type_ (google.cloud.documentai_v1.types.Document.Provenance.OperationType): + The type of provenance operation. + """ + + class OperationType(proto.Enum): + r"""If a processor or agent does an explicit operation on + existing elements. + """ + OPERATION_TYPE_UNSPECIFIED = 0 + ADD = 1 + REMOVE = 2 + REPLACE = 3 + EVAL_REQUESTED = 4 + EVAL_APPROVED = 5 + EVAL_SKIPPED = 6 + + class Parent(proto.Message): + r"""Structure for referencing parent provenances. When an + element replaces one of more other elements parent references + identify the elements that are replaced. + + Attributes: + revision (int): + The index of the [Document.revisions] identifying the parent + revision. + id (int): + The id of the parent provenance. + """ + + revision = proto.Field(proto.INT32, number=1) + + id = proto.Field(proto.INT32, number=2) + + revision = proto.Field(proto.INT32, number=1) + + id = proto.Field(proto.INT32, number=2) + + parents = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Provenance.Parent", + ) + + type_ = proto.Field( + proto.ENUM, number=4, enum="Document.Provenance.OperationType", + ) + + class Revision(proto.Message): + r"""Contains past or forward revisions of this document. + + Attributes: + agent (str): + If the change was made by a person specify + the name or id of that person. + processor (str): + If the annotation was made by processor + identify the processor by its resource name. + id (str): + Id of the revision. Unique within the + context of the document. + parent (Sequence[int]): + The revisions that this revision is based on. This can + include one or more parent (when documents are merged.) This + field represents the index into the ``revisions`` field. + create_time (google.protobuf.timestamp_pb2.Timestamp): + The time that the revision was created. + human_review (google.cloud.documentai_v1.types.Document.Revision.HumanReview): + Human Review information of this revision. + """ + + class HumanReview(proto.Message): + r"""Human Review information of the document. + + Attributes: + state (str): + Human review state. e.g. ``requested``, ``succeeded``, + ``rejected``. + state_message (str): + A message providing more details about the current state of + processing. For example, the rejection reason when the state + is ``rejected``. + """ + + state = proto.Field(proto.STRING, number=1) + + state_message = proto.Field(proto.STRING, number=2) + + agent = proto.Field(proto.STRING, number=4, oneof="source") + + processor = proto.Field(proto.STRING, number=5, oneof="source") + + id = proto.Field(proto.STRING, number=1) + + parent = proto.RepeatedField(proto.INT32, number=2) + + create_time = proto.Field(proto.MESSAGE, number=3, message=timestamp.Timestamp,) + + human_review = proto.Field( + proto.MESSAGE, number=6, message="Document.Revision.HumanReview", + ) + + class TextChange(proto.Message): + r"""This message is used for text changes aka. OCR corrections. + + Attributes: + text_anchor (google.cloud.documentai_v1.types.Document.TextAnchor): + Provenance of the correction. Text anchor indexing into the + [Document.text][google.cloud.documentai.v1.Document.text]. + There can only be a single ``TextAnchor.text_segments`` + element. If the start and end index of the text segment are + the same, the text change is inserted before that index. + changed_text (str): + The text that replaces the text identified in the + ``text_anchor``. + provenance (Sequence[google.cloud.documentai_v1.types.Document.Provenance]): + The history of this annotation. + """ + + text_anchor = proto.Field( + proto.MESSAGE, number=1, message="Document.TextAnchor", + ) + + changed_text = proto.Field(proto.STRING, number=2) + + provenance = proto.RepeatedField( + proto.MESSAGE, number=3, message="Document.Provenance", + ) + + uri = proto.Field(proto.STRING, number=1, oneof="source") + + content = proto.Field(proto.BYTES, number=2, oneof="source") + + mime_type = proto.Field(proto.STRING, number=3) + + text = proto.Field(proto.STRING, number=4) + + text_styles = proto.RepeatedField(proto.MESSAGE, number=5, message=Style,) + + pages = proto.RepeatedField(proto.MESSAGE, number=6, message=Page,) + + entities = proto.RepeatedField(proto.MESSAGE, number=7, message=Entity,) + + entity_relations = proto.RepeatedField( + proto.MESSAGE, number=8, message=EntityRelation, + ) + + text_changes = proto.RepeatedField(proto.MESSAGE, number=14, message=TextChange,) + + shard_info = proto.Field(proto.MESSAGE, number=9, message=ShardInfo,) + + error = proto.Field(proto.MESSAGE, number=10, message=status.Status,) + + revisions = proto.RepeatedField(proto.MESSAGE, number=13, message=Revision,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/documentai_v1/types/document_io.py b/google/cloud/documentai_v1/types/document_io.py new file mode 100644 index 00000000..50196830 --- /dev/null +++ b/google/cloud/documentai_v1/types/document_io.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1", + manifest={ + "RawDocument", + "GcsDocument", + "GcsDocuments", + "GcsPrefix", + "BatchDocumentsInputConfig", + "DocumentOutputConfig", + }, +) + + +class RawDocument(proto.Message): + r"""Payload message of raw document content (bytes). + + Attributes: + content (bytes): + Inline document content. + mime_type (str): + An IANA MIME type (RFC6838) indicating the nature and format + of the [content]. + """ + + content = proto.Field(proto.BYTES, number=1) + + mime_type = proto.Field(proto.STRING, number=2) + + +class GcsDocument(proto.Message): + r"""Specifies a document stored on Cloud Storage. + + Attributes: + gcs_uri (str): + The Cloud Storage object uri. + mime_type (str): + An IANA MIME type (RFC6838) of the content. + """ + + gcs_uri = proto.Field(proto.STRING, number=1) + + mime_type = proto.Field(proto.STRING, number=2) + + +class GcsDocuments(proto.Message): + r"""Specifies a set of documents on Cloud Storage. + + Attributes: + documents (Sequence[google.cloud.documentai_v1.types.GcsDocument]): + The list of documents. + """ + + documents = proto.RepeatedField(proto.MESSAGE, number=1, message="GcsDocument",) + + +class GcsPrefix(proto.Message): + r"""Specifies all documents on Cloud Storage with a common + prefix. + + Attributes: + gcs_uri_prefix (str): + The URI prefix. + """ + + gcs_uri_prefix = proto.Field(proto.STRING, number=1) + + +class BatchDocumentsInputConfig(proto.Message): + r"""The common config to specify a set of documents used as + input. + + Attributes: + gcs_prefix (google.cloud.documentai_v1.types.GcsPrefix): + The set of documents that match the specified Cloud Storage + [gcs_prefix]. + gcs_documents (google.cloud.documentai_v1.types.GcsDocuments): + The set of documents individually specified + on Cloud Storage. + """ + + gcs_prefix = proto.Field( + proto.MESSAGE, number=1, oneof="source", message="GcsPrefix", + ) + + gcs_documents = proto.Field( + proto.MESSAGE, number=2, oneof="source", message="GcsDocuments", + ) + + +class DocumentOutputConfig(proto.Message): + r"""Config that controls the output of documents. All documents + will be written as a JSON file. + + Attributes: + gcs_output_config (google.cloud.documentai_v1.types.DocumentOutputConfig.GcsOutputConfig): + Output config to write the results to Cloud + Storage. + """ + + class GcsOutputConfig(proto.Message): + r"""The configuration used when outputting documents. + + Attributes: + gcs_uri (str): + The Cloud Storage uri (a directory) of the + output. + """ + + gcs_uri = proto.Field(proto.STRING, number=1) + + gcs_output_config = proto.Field( + proto.MESSAGE, number=1, oneof="destination", message=GcsOutputConfig, + ) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/documentai_v1/types/document_processor_service.py b/google/cloud/documentai_v1/types/document_processor_service.py new file mode 100644 index 00000000..cfdcc7f5 --- /dev/null +++ b/google/cloud/documentai_v1/types/document_processor_service.py @@ -0,0 +1,309 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.documentai_v1.types import document as gcd_document +from google.cloud.documentai_v1.types import document_io +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.rpc import status_pb2 as gr_status # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1", + manifest={ + "ProcessRequest", + "HumanReviewStatus", + "ProcessResponse", + "BatchProcessRequest", + "BatchProcessResponse", + "BatchProcessMetadata", + "ReviewDocumentRequest", + "ReviewDocumentResponse", + "ReviewDocumentOperationMetadata", + "CommonOperationMetadata", + }, +) + + +class ProcessRequest(proto.Message): + r"""Request message for the process document method. + + Attributes: + inline_document (google.cloud.documentai_v1.types.Document): + An inline document proto. + raw_document (google.cloud.documentai_v1.types.RawDocument): + A raw document content (bytes). + name (str): + Required. The processor resource name. + skip_human_review (bool): + Whether Human Review feature should be + skipped for this request. Default to false. + """ + + inline_document = proto.Field( + proto.MESSAGE, number=4, oneof="source", message=gcd_document.Document, + ) + + raw_document = proto.Field( + proto.MESSAGE, number=5, oneof="source", message=document_io.RawDocument, + ) + + name = proto.Field(proto.STRING, number=1) + + skip_human_review = proto.Field(proto.BOOL, number=3) + + +class HumanReviewStatus(proto.Message): + r"""The status of human review on a processed document. + + Attributes: + state (google.cloud.documentai_v1.types.HumanReviewStatus.State): + The state of human review on the processing + request. + state_message (str): + A message providing more details about the + human review state. + human_review_operation (str): + The name of the operation triggered by the processed + document. This field is populated only when the [state] is + [HUMAN_REVIEW_IN_PROGRESS]. It has the same response type + and metadata as the long running operation returned by + [ReviewDocument] method. + """ + + class State(proto.Enum): + r"""The final state of human review on a processed document.""" + STATE_UNSPECIFIED = 0 + SKIPPED = 1 + VALIDATION_PASSED = 2 + IN_PROGRESS = 3 + ERROR = 4 + + state = proto.Field(proto.ENUM, number=1, enum=State,) + + state_message = proto.Field(proto.STRING, number=2) + + human_review_operation = proto.Field(proto.STRING, number=3) + + +class ProcessResponse(proto.Message): + r"""Response message for the process document method. + + Attributes: + document (google.cloud.documentai_v1.types.Document): + The document payload, will populate fields + based on the processor's behavior. + human_review_status (google.cloud.documentai_v1.types.HumanReviewStatus): + The status of human review on the processed + document. + """ + + document = proto.Field(proto.MESSAGE, number=1, message=gcd_document.Document,) + + human_review_status = proto.Field( + proto.MESSAGE, number=3, message="HumanReviewStatus", + ) + + +class BatchProcessRequest(proto.Message): + r"""Request message for batch process document method. + + Attributes: + name (str): + Required. The processor resource name. + input_documents (google.cloud.documentai_v1.types.BatchDocumentsInputConfig): + The input documents for batch process. + document_output_config (google.cloud.documentai_v1.types.DocumentOutputConfig): + The overall output config for batch process. + skip_human_review (bool): + Whether Human Review feature should be + skipped for this request. Default to false. + """ + + name = proto.Field(proto.STRING, number=1) + + input_documents = proto.Field( + proto.MESSAGE, number=5, message=document_io.BatchDocumentsInputConfig, + ) + + document_output_config = proto.Field( + proto.MESSAGE, number=6, message=document_io.DocumentOutputConfig, + ) + + skip_human_review = proto.Field(proto.BOOL, number=4) + + +class BatchProcessResponse(proto.Message): + r"""Response message for batch process document method.""" + + +class BatchProcessMetadata(proto.Message): + r"""The long running operation metadata for batch process method. + + Attributes: + state (google.cloud.documentai_v1.types.BatchProcessMetadata.State): + The state of the current batch processing. + state_message (str): + A message providing more details about the + current state of processing. For example, the + error message if the operation is failed. + create_time (google.protobuf.timestamp_pb2.Timestamp): + The creation time of the operation. + update_time (google.protobuf.timestamp_pb2.Timestamp): + The last update time of the operation. + individual_process_statuses (Sequence[google.cloud.documentai_v1.types.BatchProcessMetadata.IndividualProcessStatus]): + The list of response details of each + document. + """ + + class State(proto.Enum): + r"""Possible states of the batch processing operation.""" + STATE_UNSPECIFIED = 0 + WAITING = 1 + RUNNING = 2 + SUCCEEDED = 3 + CANCELLING = 4 + CANCELLED = 5 + FAILED = 6 + + class IndividualProcessStatus(proto.Message): + r"""The status of a each individual document in the batch + process. + + Attributes: + input_gcs_source (str): + The source of the document, same as the [input_gcs_source] + field in the request when the batch process started. The + batch process is started by take snapshot of that document, + since a user can move or change that document during the + process. + status (google.rpc.status_pb2.Status): + The status of the processing of the document. + output_gcs_destination (str): + The output_gcs_destination (in the request as + 'output_gcs_destination') of the processed document if it + was successful, otherwise empty. + human_review_status (google.cloud.documentai_v1.types.HumanReviewStatus): + The status of human review on the processed + document. + """ + + input_gcs_source = proto.Field(proto.STRING, number=1) + + status = proto.Field(proto.MESSAGE, number=2, message=gr_status.Status,) + + output_gcs_destination = proto.Field(proto.STRING, number=3) + + human_review_status = proto.Field( + proto.MESSAGE, number=5, message="HumanReviewStatus", + ) + + state = proto.Field(proto.ENUM, number=1, enum=State,) + + state_message = proto.Field(proto.STRING, number=2) + + create_time = proto.Field(proto.MESSAGE, number=3, message=timestamp.Timestamp,) + + update_time = proto.Field(proto.MESSAGE, number=4, message=timestamp.Timestamp,) + + individual_process_statuses = proto.RepeatedField( + proto.MESSAGE, number=5, message=IndividualProcessStatus, + ) + + +class ReviewDocumentRequest(proto.Message): + r"""Request message for review document method. + + Attributes: + inline_document (google.cloud.documentai_v1.types.Document): + An inline document proto. + human_review_config (str): + Required. The resource name of the + HumanReviewConfig that the document will be + reviewed with. + """ + + inline_document = proto.Field( + proto.MESSAGE, number=4, oneof="source", message=gcd_document.Document, + ) + + human_review_config = proto.Field(proto.STRING, number=1) + + +class ReviewDocumentResponse(proto.Message): + r"""Response message for review document method. + + Attributes: + gcs_destination (str): + The Cloud Storage uri for the human reviewed + document. + """ + + gcs_destination = proto.Field(proto.STRING, number=1) + + +class ReviewDocumentOperationMetadata(proto.Message): + r"""The long running operation metadata for review document + method. + + Attributes: + common_metadata (google.cloud.documentai_v1.types.CommonOperationMetadata): + The basic metadata of the long running + operation. + """ + + common_metadata = proto.Field( + proto.MESSAGE, number=5, message="CommonOperationMetadata", + ) + + +class CommonOperationMetadata(proto.Message): + r"""The common metadata for long running operations. + + Attributes: + state (google.cloud.documentai_v1.types.CommonOperationMetadata.State): + The state of the operation. + state_message (str): + A message providing more details about the + current state of processing. + create_time (google.protobuf.timestamp_pb2.Timestamp): + The creation time of the operation. + update_time (google.protobuf.timestamp_pb2.Timestamp): + The last update time of the operation. + """ + + class State(proto.Enum): + r"""State of the longrunning operation.""" + STATE_UNSPECIFIED = 0 + RUNNING = 1 + CANCELLING = 2 + SUCCEEDED = 3 + FAILED = 4 + CANCELLED = 5 + + state = proto.Field(proto.ENUM, number=1, enum=State,) + + state_message = proto.Field(proto.STRING, number=2) + + create_time = proto.Field(proto.MESSAGE, number=3, message=timestamp.Timestamp,) + + update_time = proto.Field(proto.MESSAGE, number=4, message=timestamp.Timestamp,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/documentai_v1/types/geometry.py b/google/cloud/documentai_v1/types/geometry.py new file mode 100644 index 00000000..3b3258ca --- /dev/null +++ b/google/cloud/documentai_v1/types/geometry.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1", + manifest={"Vertex", "NormalizedVertex", "BoundingPoly",}, +) + + +class Vertex(proto.Message): + r"""A vertex represents a 2D point in the image. + NOTE: the vertex coordinates are in the same scale as the + original image. + + Attributes: + x (int): + X coordinate. + y (int): + Y coordinate. + """ + + x = proto.Field(proto.INT32, number=1) + + y = proto.Field(proto.INT32, number=2) + + +class NormalizedVertex(proto.Message): + r"""A vertex represents a 2D point in the image. + NOTE: the normalized vertex coordinates are relative to the + original image and range from 0 to 1. + + Attributes: + x (float): + X coordinate. + y (float): + Y coordinate. + """ + + x = proto.Field(proto.FLOAT, number=1) + + y = proto.Field(proto.FLOAT, number=2) + + +class BoundingPoly(proto.Message): + r"""A bounding polygon for the detected image annotation. + + Attributes: + vertices (Sequence[google.cloud.documentai_v1.types.Vertex]): + The bounding polygon vertices. + normalized_vertices (Sequence[google.cloud.documentai_v1.types.NormalizedVertex]): + The bounding polygon normalized vertices. + """ + + vertices = proto.RepeatedField(proto.MESSAGE, number=1, message="Vertex",) + + normalized_vertices = proto.RepeatedField( + proto.MESSAGE, number=2, message="NormalizedVertex", + ) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/async_client.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/async_client.py index a293e4b2..05afa3f5 100644 --- a/google/cloud/documentai_v1beta2/services/document_understanding_service/async_client.py +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/async_client.py @@ -50,11 +50,82 @@ class DocumentUnderstandingServiceAsyncClient: DEFAULT_ENDPOINT = DocumentUnderstandingServiceClient.DEFAULT_ENDPOINT DEFAULT_MTLS_ENDPOINT = DocumentUnderstandingServiceClient.DEFAULT_MTLS_ENDPOINT - from_service_account_file = ( - DocumentUnderstandingServiceClient.from_service_account_file + common_billing_account_path = staticmethod( + DocumentUnderstandingServiceClient.common_billing_account_path ) + parse_common_billing_account_path = staticmethod( + DocumentUnderstandingServiceClient.parse_common_billing_account_path + ) + + common_folder_path = staticmethod( + DocumentUnderstandingServiceClient.common_folder_path + ) + parse_common_folder_path = staticmethod( + DocumentUnderstandingServiceClient.parse_common_folder_path + ) + + common_organization_path = staticmethod( + DocumentUnderstandingServiceClient.common_organization_path + ) + parse_common_organization_path = staticmethod( + DocumentUnderstandingServiceClient.parse_common_organization_path + ) + + common_project_path = staticmethod( + DocumentUnderstandingServiceClient.common_project_path + ) + parse_common_project_path = staticmethod( + DocumentUnderstandingServiceClient.parse_common_project_path + ) + + common_location_path = staticmethod( + DocumentUnderstandingServiceClient.common_location_path + ) + parse_common_location_path = staticmethod( + DocumentUnderstandingServiceClient.parse_common_location_path + ) + + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentUnderstandingServiceAsyncClient: The constructed client. + """ + return DocumentUnderstandingServiceClient.from_service_account_info.__func__(DocumentUnderstandingServiceAsyncClient, info, *args, **kwargs) # type: ignore + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentUnderstandingServiceAsyncClient: The constructed client. + """ + return DocumentUnderstandingServiceClient.from_service_account_file.__func__(DocumentUnderstandingServiceAsyncClient, filename, *args, **kwargs) # type: ignore + from_service_account_json = from_service_account_file + @property + def transport(self) -> DocumentUnderstandingServiceTransport: + """Return the transport used by the client instance. + + Returns: + DocumentUnderstandingServiceTransport: The transport used by the client instance. + """ + return self._client.transport + get_transport_class = functools.partial( type(DocumentUnderstandingServiceClient).get_transport_class, type(DocumentUnderstandingServiceClient), @@ -121,13 +192,14 @@ async def batch_process_documents( written to Cloud Storage as JSON in the [Document] format. Args: - request (:class:`~.document_understanding.BatchProcessDocumentsRequest`): + request (:class:`google.cloud.documentai_v1beta2.types.BatchProcessDocumentsRequest`): The request object. Request to batch process documents as an asynchronous operation. The output is written to Cloud Storage as JSON in the [Document] format. - requests (:class:`Sequence[~.document_understanding.ProcessDocumentRequest]`): + requests (:class:`Sequence[google.cloud.documentai_v1beta2.types.ProcessDocumentRequest]`): Required. Individual requests for each document. + This corresponds to the ``requests`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -139,20 +211,18 @@ async def batch_process_documents( sent along with the request as metadata. Returns: - ~.operation_async.AsyncOperation: + google.api_core.operation_async.AsyncOperation: An object representing a long-running operation. - The result type for the operation will be - :class:``~.document_understanding.BatchProcessDocumentsResponse``: - Response to an batch document processing request. This - is returned in the LRO Operation after the operation is - complete. + The result type for the operation will be :class:`google.cloud.documentai_v1beta2.types.BatchProcessDocumentsResponse` Response to an batch document processing request. This is returned in + the LRO Operation after the operation is complete. """ # Create or coerce a protobuf request object. # Sanity check: If we got a request object, we should *not* have # gotten any keyword arguments that map to the request. - if request is not None and any([requests]): + has_flattened_params = any([requests]) + if request is not None and has_flattened_params: raise ValueError( "If the `request` argument is set, then none of " "the individual field arguments should be set." @@ -163,8 +233,8 @@ async def batch_process_documents( # If we have keyword arguments corresponding to fields on the # request, apply these. - if requests is not None: - request.requests = requests + if requests: + request.requests.extend(requests) # Wrap the RPC method; this adds retry and timeout information, # and friendly error handling. @@ -175,8 +245,9 @@ async def batch_process_documents( maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=DEFAULT_CLIENT_INFO, @@ -213,7 +284,7 @@ async def process_document( r"""Processes a single document. Args: - request (:class:`~.document_understanding.ProcessDocumentRequest`): + request (:class:`google.cloud.documentai_v1beta2.types.ProcessDocumentRequest`): The request object. Request to process one document. retry (google.api_core.retry.Retry): Designation of what errors, if any, @@ -223,7 +294,7 @@ async def process_document( sent along with the request as metadata. Returns: - ~.document.Document: + google.cloud.documentai_v1beta2.types.Document: Document represents the canonical document resource in Document Understanding AI. It is an interchange @@ -246,8 +317,9 @@ async def process_document( maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=DEFAULT_CLIENT_INFO, diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/client.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/client.py index 433de5c1..9b8a6edd 100644 --- a/google/cloud/documentai_v1beta2/services/document_understanding_service/client.py +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/client.py @@ -122,6 +122,22 @@ def _get_default_mtls_endpoint(api_endpoint): DEFAULT_ENDPOINT ) + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentUnderstandingServiceClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_info(info) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + @classmethod def from_service_account_file(cls, filename: str, *args, **kwargs): """Creates an instance of this client using the provided credentials @@ -134,7 +150,7 @@ def from_service_account_file(cls, filename: str, *args, **kwargs): kwargs: Additional arguments to pass to the constructor. Returns: - {@api.name}: The constructed client. + DocumentUnderstandingServiceClient: The constructed client. """ credentials = service_account.Credentials.from_service_account_file(filename) kwargs["credentials"] = credentials @@ -142,6 +158,74 @@ def from_service_account_file(cls, filename: str, *args, **kwargs): from_service_account_json = from_service_account_file + @property + def transport(self) -> DocumentUnderstandingServiceTransport: + """Return the transport used by the client instance. + + Returns: + DocumentUnderstandingServiceTransport: The transport used by the client instance. + """ + return self._transport + + @staticmethod + def common_billing_account_path(billing_account: str,) -> str: + """Return a fully-qualified billing_account string.""" + return "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + + @staticmethod + def parse_common_billing_account_path(path: str) -> Dict[str, str]: + """Parse a billing_account path into its component segments.""" + m = re.match(r"^billingAccounts/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_folder_path(folder: str,) -> str: + """Return a fully-qualified folder string.""" + return "folders/{folder}".format(folder=folder,) + + @staticmethod + def parse_common_folder_path(path: str) -> Dict[str, str]: + """Parse a folder path into its component segments.""" + m = re.match(r"^folders/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_organization_path(organization: str,) -> str: + """Return a fully-qualified organization string.""" + return "organizations/{organization}".format(organization=organization,) + + @staticmethod + def parse_common_organization_path(path: str) -> Dict[str, str]: + """Parse a organization path into its component segments.""" + m = re.match(r"^organizations/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_project_path(project: str,) -> str: + """Return a fully-qualified project string.""" + return "projects/{project}".format(project=project,) + + @staticmethod + def parse_common_project_path(path: str) -> Dict[str, str]: + """Parse a project path into its component segments.""" + m = re.match(r"^projects/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_location_path(project: str, location: str,) -> str: + """Return a fully-qualified location string.""" + return "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + + @staticmethod + def parse_common_location_path(path: str) -> Dict[str, str]: + """Parse a location path into its component segments.""" + m = re.match(r"^projects/(?P.+?)/locations/(?P.+?)$", path) + return m.groupdict() if m else {} + def __init__( self, *, @@ -158,10 +242,10 @@ def __init__( credentials identify the application to the service; if none are specified, the client will attempt to ascertain the credentials from the environment. - transport (Union[str, ~.DocumentUnderstandingServiceTransport]): The + transport (Union[str, DocumentUnderstandingServiceTransport]): The transport to use. If set to None, a transport is chosen automatically. - client_options (client_options_lib.ClientOptions): Custom options for the + client_options (google.api_core.client_options.ClientOptions): Custom options for the client. It won't take effect if a ``transport`` instance is provided. (1) The ``api_endpoint`` property can be used to override the default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT @@ -177,10 +261,10 @@ def __init__( not provided, the default SSL client certificate will be used if present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not set, no client certificate will be used. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. Raises: @@ -197,21 +281,17 @@ def __init__( util.strtobool(os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE", "false")) ) - ssl_credentials = None + client_cert_source_func = None is_mtls = False if use_client_cert: if client_options.client_cert_source: - import grpc # type: ignore - - cert, key = client_options.client_cert_source() - ssl_credentials = grpc.ssl_channel_credentials( - certificate_chain=cert, private_key=key - ) is_mtls = True + client_cert_source_func = client_options.client_cert_source else: - creds = SslCredentials() - is_mtls = creds.is_mtls - ssl_credentials = creds.ssl_credentials if is_mtls else None + is_mtls = mtls.has_default_client_cert_source() + client_cert_source_func = ( + mtls.default_client_cert_source() if is_mtls else None + ) # Figure out which api endpoint to use. if client_options.api_endpoint is not None: @@ -254,7 +334,7 @@ def __init__( credentials_file=client_options.credentials_file, host=api_endpoint, scopes=client_options.scopes, - ssl_channel_credentials=ssl_credentials, + client_cert_source_for_mtls=client_cert_source_func, quota_project_id=client_options.quota_project_id, client_info=client_info, ) @@ -272,13 +352,14 @@ def batch_process_documents( written to Cloud Storage as JSON in the [Document] format. Args: - request (:class:`~.document_understanding.BatchProcessDocumentsRequest`): + request (google.cloud.documentai_v1beta2.types.BatchProcessDocumentsRequest): The request object. Request to batch process documents as an asynchronous operation. The output is written to Cloud Storage as JSON in the [Document] format. - requests (:class:`Sequence[~.document_understanding.ProcessDocumentRequest]`): + requests (Sequence[google.cloud.documentai_v1beta2.types.ProcessDocumentRequest]): Required. Individual requests for each document. + This corresponds to the ``requests`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -290,14 +371,11 @@ def batch_process_documents( sent along with the request as metadata. Returns: - ~.operation.Operation: + google.api_core.operation.Operation: An object representing a long-running operation. - The result type for the operation will be - :class:``~.document_understanding.BatchProcessDocumentsResponse``: - Response to an batch document processing request. This - is returned in the LRO Operation after the operation is - complete. + The result type for the operation will be :class:`google.cloud.documentai_v1beta2.types.BatchProcessDocumentsResponse` Response to an batch document processing request. This is returned in + the LRO Operation after the operation is complete. """ # Create or coerce a protobuf request object. @@ -358,7 +436,7 @@ def process_document( r"""Processes a single document. Args: - request (:class:`~.document_understanding.ProcessDocumentRequest`): + request (google.cloud.documentai_v1beta2.types.ProcessDocumentRequest): The request object. Request to process one document. retry (google.api_core.retry.Retry): Designation of what errors, if any, @@ -368,7 +446,7 @@ def process_document( sent along with the request as metadata. Returns: - ~.document.Document: + google.cloud.documentai_v1beta2.types.Document: Document represents the canonical document resource in Document Understanding AI. It is an interchange diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py index ce42f2ab..d296b9d5 100644 --- a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/__init__.py @@ -30,7 +30,6 @@ _transport_registry["grpc"] = DocumentUnderstandingServiceGrpcTransport _transport_registry["grpc_asyncio"] = DocumentUnderstandingServiceGrpcAsyncIOTransport - __all__ = ( "DocumentUnderstandingServiceTransport", "DocumentUnderstandingServiceGrpcTransport", diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py index df52dbcc..38db3690 100644 --- a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/base.py @@ -72,10 +72,10 @@ def __init__( scope (Optional[Sequence[str]]): A list of scopes. quota_project_id (Optional[str]): An optional project to use for billing and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. """ # Save the hostname. Default to port 443 (HTTPS) if none is specified. @@ -83,6 +83,9 @@ def __init__( host += ":443" self._host = host + # Save the scopes. + self._scopes = scopes or self.AUTH_SCOPES + # If no credentials are provided, then determine the appropriate # defaults. if credentials and credentials_file: @@ -92,20 +95,17 @@ def __init__( if credentials_file is not None: credentials, _ = auth.load_credentials_from_file( - credentials_file, scopes=scopes, quota_project_id=quota_project_id + credentials_file, scopes=self._scopes, quota_project_id=quota_project_id ) elif credentials is None: credentials, _ = auth.default( - scopes=scopes, quota_project_id=quota_project_id + scopes=self._scopes, quota_project_id=quota_project_id ) # Save the credentials. self._credentials = credentials - # Lifted into its own function so it can be stubbed out during tests. - self._prep_wrapped_messages(client_info) - def _prep_wrapped_messages(self, client_info): # Precompute the wrapped methods. self._wrapped_methods = { @@ -116,8 +116,9 @@ def _prep_wrapped_messages(self, client_info): maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=client_info, @@ -129,8 +130,9 @@ def _prep_wrapped_messages(self, client_info): maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=client_info, diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py index 60f3e8b8..391fb597 100644 --- a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc.py @@ -62,6 +62,7 @@ def __init__( api_mtls_endpoint: str = None, client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, ssl_channel_credentials: grpc.ChannelCredentials = None, + client_cert_source_for_mtls: Callable[[], Tuple[bytes, bytes]] = None, quota_project_id: Optional[str] = None, client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, ) -> None: @@ -92,12 +93,16 @@ def __init__( ``api_mtls_endpoint`` is None. ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials for grpc channel. It is ignored if ``channel`` is provided. + client_cert_source_for_mtls (Optional[Callable[[], Tuple[bytes, bytes]]]): + A callback to provide client certificate bytes and private key bytes, + both in PEM format. It is used to configure mutual TLS channel. It is + ignored if ``channel`` or ``ssl_channel_credentials`` is provided. quota_project_id (Optional[str]): An optional project to use for billing and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. Raises: @@ -106,79 +111,71 @@ def __init__( google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` and ``credentials_file`` are passed. """ + self._grpc_channel = None + self._ssl_channel_credentials = ssl_channel_credentials + self._stubs: Dict[str, Callable] = {} + self._operations_client = None + + if api_mtls_endpoint: + warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) + if client_cert_source: + warnings.warn("client_cert_source is deprecated", DeprecationWarning) + if channel: - # Sanity check: Ensure that channel and credentials are not both - # provided. + # Ignore credentials if a channel was passed. credentials = False - # If a channel was explicitly provided, set it. self._grpc_channel = channel - elif api_mtls_endpoint: - warnings.warn( - "api_mtls_endpoint and client_cert_source are deprecated", - DeprecationWarning, - ) - - host = ( - api_mtls_endpoint - if ":" in api_mtls_endpoint - else api_mtls_endpoint + ":443" - ) - - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # Create SSL credentials with client_cert_source or application - # default SSL credentials. - if client_cert_source: - cert, key = client_cert_source() - ssl_credentials = grpc.ssl_channel_credentials( - certificate_chain=cert, private_key=key - ) - else: - ssl_credentials = SslCredentials().ssl_credentials + self._ssl_channel_credentials = None - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) else: - host = host if ":" in host else host + ":443" - - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_channel_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) + if api_mtls_endpoint: + host = api_mtls_endpoint + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + self._ssl_channel_credentials = SslCredentials().ssl_credentials - self._stubs = {} # type: Dict[str, Callable] + else: + if client_cert_source_for_mtls and not ssl_channel_credentials: + cert, key = client_cert_source_for_mtls() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) - # Run the base constructor. + # The base transport sets the host, credentials and scopes super().__init__( host=host, credentials=credentials, credentials_file=credentials_file, - scopes=scopes or self.AUTH_SCOPES, + scopes=scopes, quota_project_id=quota_project_id, client_info=client_info, ) + if not self._grpc_channel: + self._grpc_channel = type(self).create_channel( + self._host, + credentials=self._credentials, + credentials_file=credentials_file, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Wrap messages. This must be done after self._grpc_channel exists + self._prep_wrapped_messages(client_info) + @classmethod def create_channel( cls, @@ -191,7 +188,7 @@ def create_channel( ) -> grpc.Channel: """Create and return a gRPC channel object. Args: - address (Optionsl[str]): The host for the channel to use. + host (Optional[str]): The host for the channel to use. credentials (Optional[~.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If @@ -226,12 +223,8 @@ def create_channel( @property def grpc_channel(self) -> grpc.Channel: - """Create the channel designed to connect to this service. - - This property caches on the instance; repeated calls return - the same channel. + """Return the channel designed to connect to this service. """ - # Return the channel from cache. return self._grpc_channel @property @@ -242,13 +235,11 @@ def operations_client(self) -> operations_v1.OperationsClient: client. """ # Sanity check: Only create a new client if we do not already have one. - if "operations_client" not in self.__dict__: - self.__dict__["operations_client"] = operations_v1.OperationsClient( - self.grpc_channel - ) + if self._operations_client is None: + self._operations_client = operations_v1.OperationsClient(self.grpc_channel) # Return the client from cache. - return self.__dict__["operations_client"] + return self._operations_client @property def batch_process_documents( diff --git a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc_asyncio.py b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc_asyncio.py index 315795e5..76cf0816 100644 --- a/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc_asyncio.py +++ b/google/cloud/documentai_v1beta2/services/document_understanding_service/transports/grpc_asyncio.py @@ -68,7 +68,7 @@ def create_channel( ) -> aio.Channel: """Create and return a gRPC AsyncIO channel object. Args: - address (Optional[str]): The host for the channel to use. + host (Optional[str]): The host for the channel to use. credentials (Optional[~.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If @@ -108,6 +108,7 @@ def __init__( api_mtls_endpoint: str = None, client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, ssl_channel_credentials: grpc.ChannelCredentials = None, + client_cert_source_for_mtls: Callable[[], Tuple[bytes, bytes]] = None, quota_project_id=None, client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, ) -> None: @@ -139,12 +140,16 @@ def __init__( ``api_mtls_endpoint`` is None. ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials for grpc channel. It is ignored if ``channel`` is provided. + client_cert_source_for_mtls (Optional[Callable[[], Tuple[bytes, bytes]]]): + A callback to provide client certificate bytes and private key bytes, + both in PEM format. It is used to configure mutual TLS channel. It is + ignored if ``channel`` or ``ssl_channel_credentials`` is provided. quota_project_id (Optional[str]): An optional project to use for billing and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. Raises: @@ -153,78 +158,70 @@ def __init__( google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` and ``credentials_file`` are passed. """ + self._grpc_channel = None + self._ssl_channel_credentials = ssl_channel_credentials + self._stubs: Dict[str, Callable] = {} + self._operations_client = None + + if api_mtls_endpoint: + warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) + if client_cert_source: + warnings.warn("client_cert_source is deprecated", DeprecationWarning) + if channel: - # Sanity check: Ensure that channel and credentials are not both - # provided. + # Ignore credentials if a channel was passed. credentials = False - # If a channel was explicitly provided, set it. self._grpc_channel = channel - elif api_mtls_endpoint: - warnings.warn( - "api_mtls_endpoint and client_cert_source are deprecated", - DeprecationWarning, - ) - - host = ( - api_mtls_endpoint - if ":" in api_mtls_endpoint - else api_mtls_endpoint + ":443" - ) + self._ssl_channel_credentials = None - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # Create SSL credentials with client_cert_source or application - # default SSL credentials. - if client_cert_source: - cert, key = client_cert_source() - ssl_credentials = grpc.ssl_channel_credentials( - certificate_chain=cert, private_key=key - ) - else: - ssl_credentials = SslCredentials().ssl_credentials - - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) else: - host = host if ":" in host else host + ":443" + if api_mtls_endpoint: + host = api_mtls_endpoint + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + self._ssl_channel_credentials = SslCredentials().ssl_credentials - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_channel_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) + else: + if client_cert_source_for_mtls and not ssl_channel_credentials: + cert, key = client_cert_source_for_mtls() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) - # Run the base constructor. + # The base transport sets the host, credentials and scopes super().__init__( host=host, credentials=credentials, credentials_file=credentials_file, - scopes=scopes or self.AUTH_SCOPES, + scopes=scopes, quota_project_id=quota_project_id, client_info=client_info, ) - self._stubs = {} + if not self._grpc_channel: + self._grpc_channel = type(self).create_channel( + self._host, + credentials=self._credentials, + credentials_file=credentials_file, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Wrap messages. This must be done after self._grpc_channel exists + self._prep_wrapped_messages(client_info) @property def grpc_channel(self) -> aio.Channel: @@ -244,13 +241,13 @@ def operations_client(self) -> operations_v1.OperationsAsyncClient: client. """ # Sanity check: Only create a new client if we do not already have one. - if "operations_client" not in self.__dict__: - self.__dict__["operations_client"] = operations_v1.OperationsAsyncClient( + if self._operations_client is None: + self._operations_client = operations_v1.OperationsAsyncClient( self.grpc_channel ) # Return the client from cache. - return self.__dict__["operations_client"] + return self._operations_client @property def batch_process_documents( diff --git a/google/cloud/documentai_v1beta2/types/__init__.py b/google/cloud/documentai_v1beta2/types/__init__.py index 5d05c6b1..e5578fac 100644 --- a/google/cloud/documentai_v1beta2/types/__init__.py +++ b/google/cloud/documentai_v1beta2/types/__init__.py @@ -15,51 +15,50 @@ # limitations under the License. # -from .geometry import ( - Vertex, - NormalizedVertex, - BoundingPoly, -) from .document import Document from .document_understanding import ( + AutoMlParams, BatchProcessDocumentsRequest, - ProcessDocumentRequest, BatchProcessDocumentsResponse, - ProcessDocumentResponse, - OcrParams, - TableExtractionParams, - TableBoundHint, - FormExtractionParams, - KeyValuePairHint, EntityExtractionParams, - AutoMlParams, - InputConfig, - OutputConfig, - GcsSource, + FormExtractionParams, GcsDestination, + GcsSource, + InputConfig, + KeyValuePairHint, + OcrParams, OperationMetadata, + OutputConfig, + ProcessDocumentRequest, + ProcessDocumentResponse, + TableBoundHint, + TableExtractionParams, +) +from .geometry import ( + BoundingPoly, + NormalizedVertex, + Vertex, ) - __all__ = ( - "Vertex", - "NormalizedVertex", - "BoundingPoly", "Document", + "AutoMlParams", "BatchProcessDocumentsRequest", - "ProcessDocumentRequest", "BatchProcessDocumentsResponse", - "ProcessDocumentResponse", - "OcrParams", - "TableExtractionParams", - "TableBoundHint", - "FormExtractionParams", - "KeyValuePairHint", "EntityExtractionParams", - "AutoMlParams", - "InputConfig", - "OutputConfig", - "GcsSource", + "FormExtractionParams", "GcsDestination", + "GcsSource", + "InputConfig", + "KeyValuePairHint", + "OcrParams", "OperationMetadata", + "OutputConfig", + "ProcessDocumentRequest", + "ProcessDocumentResponse", + "TableBoundHint", + "TableExtractionParams", + "BoundingPoly", + "NormalizedVertex", + "Vertex", ) diff --git a/google/cloud/documentai_v1beta2/types/document.py b/google/cloud/documentai_v1beta2/types/document.py index 7b7d15af..e2411002 100644 --- a/google/cloud/documentai_v1beta2/types/document.py +++ b/google/cloud/documentai_v1beta2/types/document.py @@ -55,29 +55,29 @@ class Document(proto.Message): text (str): UTF-8 encoded text in reading order from the document. - text_styles (Sequence[~.document.Document.Style]): + text_styles (Sequence[google.cloud.documentai_v1beta2.types.Document.Style]): Styles for the [Document.text][google.cloud.documentai.v1beta2.Document.text]. - pages (Sequence[~.document.Document.Page]): + pages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page]): Visual page layout for the [Document][google.cloud.documentai.v1beta2.Document]. - entities (Sequence[~.document.Document.Entity]): + entities (Sequence[google.cloud.documentai_v1beta2.types.Document.Entity]): A list of entities detected on [Document.text][google.cloud.documentai.v1beta2.Document.text]. For document shards, entities in this list may cross shard boundaries. - entity_relations (Sequence[~.document.Document.EntityRelation]): + entity_relations (Sequence[google.cloud.documentai_v1beta2.types.Document.EntityRelation]): Relationship among [Document.entities][google.cloud.documentai.v1beta2.Document.entities]. - shard_info (~.document.Document.ShardInfo): + shard_info (google.cloud.documentai_v1beta2.types.Document.ShardInfo): Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified. - labels (Sequence[~.document.Document.Label]): + labels (Sequence[google.cloud.documentai_v1beta2.types.Document.Label]): [Label][google.cloud.documentai.v1beta2.Document.Label]s for this document. - error (~.status.Status): + error (google.rpc.status_pb2.Status): Any error that occurred while processing this document. """ @@ -140,12 +140,12 @@ class Style(proto.Message): CSS conventions as much as possible. Attributes: - text_anchor (~.document.Document.TextAnchor): + text_anchor (google.cloud.documentai_v1beta2.types.Document.TextAnchor): Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text]. - color (~.gt_color.Color): + color (google.type.color_pb2.Color): Text color. - background_color (~.gt_color.Color): + background_color (google.type.color_pb2.Color): Text background color. font_weight (str): Font weight. Possible values are normal, bold, bolder, and @@ -156,7 +156,7 @@ class Style(proto.Message): text_decoration (str): Text decoration. Follows CSS standard. https://www.w3schools.com/cssref/pr_text_text-decoration.asp - font_size (~.document.Document.Style.FontSize): + font_size (google.cloud.documentai_v1beta2.types.Document.Style.FontSize): Font size. """ @@ -204,37 +204,37 @@ class Page(proto.Message): Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta2.Document] for individual processing. - dimension (~.document.Document.Page.Dimension): + dimension (google.cloud.documentai_v1beta2.types.Document.Page.Dimension): Physical dimension of the page. - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the page. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. - blocks (Sequence[~.document.Document.Page.Block]): + blocks (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Block]): A list of visually detected text blocks on the page. A block has a set of lines (collected into paragraphs) that have a common line-spacing and orientation. - paragraphs (Sequence[~.document.Document.Page.Paragraph]): + paragraphs (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Paragraph]): A list of visually detected text paragraphs on the page. A collection of lines that a human would perceive as a paragraph. - lines (Sequence[~.document.Document.Page.Line]): + lines (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Line]): A list of visually detected text lines on the page. A collection of tokens that a human would perceive as a line. - tokens (Sequence[~.document.Document.Page.Token]): + tokens (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Token]): A list of visually detected tokens on the page. - visual_elements (Sequence[~.document.Document.Page.VisualElement]): + visual_elements (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.VisualElement]): A list of detected non-text visual elements e.g. checkbox, signature etc. on the page. - tables (Sequence[~.document.Document.Page.Table]): + tables (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Table]): A list of visually detected tables on the page. - form_fields (Sequence[~.document.Document.Page.FormField]): + form_fields (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.FormField]): A list of visually detected form fields on the page. """ @@ -261,7 +261,7 @@ class Layout(proto.Message): r"""Visual element describing a layout unit on a page. Attributes: - text_anchor (~.document.Document.TextAnchor): + text_anchor (google.cloud.documentai_v1beta2.types.Document.TextAnchor): Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text]. confidence (float): @@ -270,10 +270,10 @@ class Layout(proto.Message): within context of the object this layout is for. e.g. confidence can be for a single token, a table, a visual element, etc. depending on context. Range [0, 1]. - bounding_poly (~.geometry.BoundingPoly): + bounding_poly (google.cloud.documentai_v1beta2.types.BoundingPoly): The bounding polygon for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout]. - orientation (~.document.Document.Page.Layout.Orientation): + orientation (google.cloud.documentai_v1beta2.types.Document.Page.Layout.Orientation): Detected orientation for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout]. id (str): @@ -310,11 +310,11 @@ class Block(proto.Message): have a common line-spacing and orientation. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta2.Document.Page.Block]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -332,11 +332,11 @@ class Paragraph(proto.Message): paragraph. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta2.Document.Page.Paragraph]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -355,11 +355,11 @@ class Line(proto.Message): etc. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta2.Document.Page.Line]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -376,14 +376,14 @@ class Token(proto.Message): r"""A detected token. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. - detected_break (~.document.Document.Page.Token.DetectedBreak): + detected_break (google.cloud.documentai_v1beta2.types.Document.Page.Token.DetectedBreak): Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -393,7 +393,7 @@ class DetectedBreak(proto.Message): [Token][google.cloud.documentai.v1beta2.Document.Page.Token]. Attributes: - type_ (~.document.Document.Page.Token.DetectedBreak.Type): + type_ (google.cloud.documentai_v1beta2.types.Document.Page.Token.DetectedBreak.Type): Detected break type. """ @@ -425,14 +425,14 @@ class VisualElement(proto.Message): etc. on the page. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement]. type_ (str): Type of the [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -451,15 +451,15 @@ class Table(proto.Message): r"""A table representation similar to HTML table structure. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta2.Document.Page.Table]. - header_rows (Sequence[~.document.Document.Page.Table.TableRow]): + header_rows (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Table.TableRow]): Header rows of the table. - body_rows (Sequence[~.document.Document.Page.Table.TableRow]): + body_rows (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Table.TableRow]): Body rows of the table. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -468,7 +468,7 @@ class TableRow(proto.Message): r"""A row of table cells. Attributes: - cells (Sequence[~.document.Document.Page.Table.TableCell]): + cells (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.Table.TableCell]): Cells that make up this row. """ @@ -480,7 +480,7 @@ class TableCell(proto.Message): r"""A cell representation inside the table. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta2.Document.Page.Table.TableCell]. @@ -488,7 +488,7 @@ class TableCell(proto.Message): How many rows this cell spans. col_span (int): How many columns this cell spans. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -525,21 +525,21 @@ class FormField(proto.Message): r"""A form field detected on the page. Attributes: - field_name (~.document.Document.Page.Layout): + field_name (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] name. e.g. ``Address``, ``Email``, ``Grand total``, ``Phone number``, etc. - field_value (~.document.Document.Page.Layout): + field_value (google.cloud.documentai_v1beta2.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] value. - name_detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + name_detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages for name together with confidence. - value_detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + value_detected_languages (Sequence[google.cloud.documentai_v1beta2.types.Document.Page.DetectedLanguage]): A list of detected languages for value together with confidence. value_type (str): @@ -640,7 +640,7 @@ class Entity(proto.Message): person, an organization, or location. Attributes: - text_anchor (~.document.Document.TextAnchor): + text_anchor (google.cloud.documentai_v1beta2.types.Document.TextAnchor): Provenance of the entity. Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text]. type_ (str): @@ -652,14 +652,14 @@ class Entity(proto.Message): confidence (float): Optional. Confidence of detected Schema entity. Range [0, 1]. - page_anchor (~.document.Document.PageAnchor): + page_anchor (google.cloud.documentai_v1beta2.types.Document.PageAnchor): Optional. Represents the provenance of this entity wrt. the location on the page where it was found. id (str): Optional. Canonical id. This will be a unique value in the entity list for this document. - bounding_poly_for_demo_frontend (~.geometry.BoundingPoly): + bounding_poly_for_demo_frontend (google.cloud.documentai_v1beta2.types.BoundingPoly): Optional. Temporary field to store the bounding poly for short-term POCs. Used by the frontend only. Do not use before you talk to @@ -712,7 +712,7 @@ class TextAnchor(proto.Message): [Document.text][google.cloud.documentai.v1beta2.Document.text]. Attributes: - text_segments (Sequence[~.document.Document.TextAnchor.TextSegment]): + text_segments (Sequence[google.cloud.documentai_v1beta2.types.Document.TextAnchor.TextSegment]): The text segments from the [Document.text][google.cloud.documentai.v1beta2.Document.text]. """ @@ -748,7 +748,7 @@ class PageAnchor(proto.Message): [Document.pages][google.cloud.documentai.v1beta2.Document.pages]. Attributes: - page_refs (Sequence[~.document.Document.PageAnchor.PageRef]): + page_refs (Sequence[google.cloud.documentai_v1beta2.types.Document.PageAnchor.PageRef]): One or more references to visual page elements """ @@ -762,7 +762,7 @@ class PageRef(proto.Message): Required. Index into the [Document.pages][google.cloud.documentai.v1beta2.Document.pages] element - layout_type (~.document.Document.PageAnchor.PageRef.LayoutType): + layout_type (google.cloud.documentai_v1beta2.types.Document.PageAnchor.PageRef.LayoutType): Optional. The type of the layout element that is being referenced. If not specified the whole page is assumed to be referenced. diff --git a/google/cloud/documentai_v1beta2/types/document_understanding.py b/google/cloud/documentai_v1beta2/types/document_understanding.py index bdf2299f..dd80f269 100644 --- a/google/cloud/documentai_v1beta2/types/document_understanding.py +++ b/google/cloud/documentai_v1beta2/types/document_understanding.py @@ -50,7 +50,7 @@ class BatchProcessDocumentsRequest(proto.Message): output is written to Cloud Storage as JSON in the [Document] format. Attributes: - requests (Sequence[~.document_understanding.ProcessDocumentRequest]): + requests (Sequence[google.cloud.documentai_v1beta2.types.ProcessDocumentRequest]): Required. Individual requests for each document. parent (str): @@ -81,9 +81,9 @@ class ProcessDocumentRequest(proto.Message): If no location is specified, a region will be chosen automatically. This field is only populated when used in ProcessDocument method. - input_config (~.document_understanding.InputConfig): + input_config (google.cloud.documentai_v1beta2.types.InputConfig): Required. Information about the input file. - output_config (~.document_understanding.OutputConfig): + output_config (google.cloud.documentai_v1beta2.types.OutputConfig): Optional. The desired output location. This field is only needed in BatchProcessDocumentsRequest. @@ -93,22 +93,22 @@ class ProcessDocumentRequest(proto.Message): "general" and "invoice". If not provided, "general"\ is used as default. If any other value is given, the request is rejected. - table_extraction_params (~.document_understanding.TableExtractionParams): + table_extraction_params (google.cloud.documentai_v1beta2.types.TableExtractionParams): Controls table extraction behavior. If not specified, the system will decide reasonable defaults. - form_extraction_params (~.document_understanding.FormExtractionParams): + form_extraction_params (google.cloud.documentai_v1beta2.types.FormExtractionParams): Controls form extraction behavior. If not specified, the system will decide reasonable defaults. - entity_extraction_params (~.document_understanding.EntityExtractionParams): + entity_extraction_params (google.cloud.documentai_v1beta2.types.EntityExtractionParams): Controls entity extraction behavior. If not specified, the system will decide reasonable defaults. - ocr_params (~.document_understanding.OcrParams): + ocr_params (google.cloud.documentai_v1beta2.types.OcrParams): Controls OCR behavior. If not specified, the system will decide reasonable defaults. - automl_params (~.document_understanding.AutoMlParams): + automl_params (google.cloud.documentai_v1beta2.types.AutoMlParams): Controls AutoML model prediction behavior. AutoMlParams cannot be used together with other Params. @@ -144,7 +144,7 @@ class BatchProcessDocumentsResponse(proto.Message): returned in the LRO Operation after the operation is complete. Attributes: - responses (Sequence[~.document_understanding.ProcessDocumentResponse]): + responses (Sequence[google.cloud.documentai_v1beta2.types.ProcessDocumentResponse]): Responses for each individual document. """ @@ -157,11 +157,11 @@ class ProcessDocumentResponse(proto.Message): r"""Response to a single document processing request. Attributes: - input_config (~.document_understanding.InputConfig): + input_config (google.cloud.documentai_v1beta2.types.InputConfig): Information about the input file. This is the same as the corresponding input config in the request. - output_config (~.document_understanding.OutputConfig): + output_config (google.cloud.documentai_v1beta2.types.OutputConfig): The output location of the parsed responses. The responses are written to this location as JSON-serialized ``Document`` objects. @@ -198,7 +198,7 @@ class TableExtractionParams(proto.Message): Attributes: enabled (bool): Whether to enable table extraction. - table_bound_hints (Sequence[~.document_understanding.TableBoundHint]): + table_bound_hints (Sequence[google.cloud.documentai_v1beta2.types.TableBoundHint]): Optional. Table bounding box hints that can be provided to complex cases which our algorithm cannot locate the table(s) in. @@ -233,7 +233,7 @@ class TableBoundHint(proto.Message): this hint applies to. If not provided, this hint will apply to all pages by default. This value is 1-based. - bounding_box (~.geometry.BoundingPoly): + bounding_box (google.cloud.documentai_v1beta2.types.BoundingPoly): Bounding box hint for a table on this page. The coordinates must be normalized to [0,1] and the bounding box must be an axis-aligned rectangle. @@ -250,7 +250,7 @@ class FormExtractionParams(proto.Message): Attributes: enabled (bool): Whether to enable form extraction. - key_value_pair_hints (Sequence[~.document_understanding.KeyValuePairHint]): + key_value_pair_hints (Sequence[google.cloud.documentai_v1beta2.types.KeyValuePairHint]): User can provide pairs of (key text, value type) to improve the parsing result. @@ -336,7 +336,7 @@ class InputConfig(proto.Message): r"""The desired input location and metadata. Attributes: - gcs_source (~.document_understanding.GcsSource): + gcs_source (google.cloud.documentai_v1beta2.types.GcsSource): The Google Cloud Storage location to read the input from. This must be a single file. contents (bytes): @@ -369,7 +369,7 @@ class OutputConfig(proto.Message): r"""The desired output location and metadata. Attributes: - gcs_destination (~.document_understanding.GcsDestination): + gcs_destination (google.cloud.documentai_v1beta2.types.GcsDestination): The Google Cloud Storage location to write the output to. pages_per_shard (int): @@ -427,14 +427,14 @@ class OperationMetadata(proto.Message): r"""Contains metadata for the BatchProcessDocuments operation. Attributes: - state (~.document_understanding.OperationMetadata.State): + state (google.cloud.documentai_v1beta2.types.OperationMetadata.State): The state of the current batch processing. state_message (str): A message providing more details about the current state of processing. - create_time (~.timestamp.Timestamp): + create_time (google.protobuf.timestamp_pb2.Timestamp): The creation time of the operation. - update_time (~.timestamp.Timestamp): + update_time (google.protobuf.timestamp_pb2.Timestamp): The last update time of the operation. """ diff --git a/google/cloud/documentai_v1beta2/types/geometry.py b/google/cloud/documentai_v1beta2/types/geometry.py index 12d63f90..38ae138a 100644 --- a/google/cloud/documentai_v1beta2/types/geometry.py +++ b/google/cloud/documentai_v1beta2/types/geometry.py @@ -62,16 +62,16 @@ class BoundingPoly(proto.Message): r"""A bounding polygon for the detected image annotation. Attributes: - vertices (Sequence[~.geometry.Vertex]): + vertices (Sequence[google.cloud.documentai_v1beta2.types.Vertex]): The bounding polygon vertices. - normalized_vertices (Sequence[~.geometry.NormalizedVertex]): + normalized_vertices (Sequence[google.cloud.documentai_v1beta2.types.NormalizedVertex]): The bounding polygon normalized vertices. """ - vertices = proto.RepeatedField(proto.MESSAGE, number=1, message=Vertex,) + vertices = proto.RepeatedField(proto.MESSAGE, number=1, message="Vertex",) normalized_vertices = proto.RepeatedField( - proto.MESSAGE, number=2, message=NormalizedVertex, + proto.MESSAGE, number=2, message="NormalizedVertex", ) diff --git a/google/cloud/documentai_v1beta3/__init__.py b/google/cloud/documentai_v1beta3/__init__.py index c93f255b..84d917be 100644 --- a/google/cloud/documentai_v1beta3/__init__.py +++ b/google/cloud/documentai_v1beta3/__init__.py @@ -17,9 +17,17 @@ from .services.document_processor_service import DocumentProcessorServiceClient from .types.document import Document +from .types.document_io import BatchDocumentsInputConfig +from .types.document_io import DocumentOutputConfig +from .types.document_io import GcsDocument +from .types.document_io import GcsDocuments +from .types.document_io import GcsPrefix +from .types.document_io import RawDocument from .types.document_processor_service import BatchProcessMetadata from .types.document_processor_service import BatchProcessRequest from .types.document_processor_service import BatchProcessResponse +from .types.document_processor_service import CommonOperationMetadata +from .types.document_processor_service import HumanReviewStatus from .types.document_processor_service import ProcessRequest from .types.document_processor_service import ProcessResponse from .types.document_processor_service import ReviewDocumentOperationMetadata @@ -31,14 +39,22 @@ __all__ = ( + "BatchDocumentsInputConfig", "BatchProcessMetadata", "BatchProcessRequest", "BatchProcessResponse", "BoundingPoly", + "CommonOperationMetadata", "Document", + "DocumentOutputConfig", + "GcsDocument", + "GcsDocuments", + "GcsPrefix", + "HumanReviewStatus", "NormalizedVertex", "ProcessRequest", "ProcessResponse", + "RawDocument", "ReviewDocumentOperationMetadata", "ReviewDocumentRequest", "ReviewDocumentResponse", diff --git a/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py b/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py index 7ba80ac2..e6c4f780 100644 --- a/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py +++ b/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py @@ -51,9 +51,91 @@ class DocumentProcessorServiceAsyncClient: DEFAULT_ENDPOINT = DocumentProcessorServiceClient.DEFAULT_ENDPOINT DEFAULT_MTLS_ENDPOINT = DocumentProcessorServiceClient.DEFAULT_MTLS_ENDPOINT - from_service_account_file = DocumentProcessorServiceClient.from_service_account_file + human_review_config_path = staticmethod( + DocumentProcessorServiceClient.human_review_config_path + ) + parse_human_review_config_path = staticmethod( + DocumentProcessorServiceClient.parse_human_review_config_path + ) + processor_path = staticmethod(DocumentProcessorServiceClient.processor_path) + parse_processor_path = staticmethod( + DocumentProcessorServiceClient.parse_processor_path + ) + + common_billing_account_path = staticmethod( + DocumentProcessorServiceClient.common_billing_account_path + ) + parse_common_billing_account_path = staticmethod( + DocumentProcessorServiceClient.parse_common_billing_account_path + ) + + common_folder_path = staticmethod(DocumentProcessorServiceClient.common_folder_path) + parse_common_folder_path = staticmethod( + DocumentProcessorServiceClient.parse_common_folder_path + ) + + common_organization_path = staticmethod( + DocumentProcessorServiceClient.common_organization_path + ) + parse_common_organization_path = staticmethod( + DocumentProcessorServiceClient.parse_common_organization_path + ) + + common_project_path = staticmethod( + DocumentProcessorServiceClient.common_project_path + ) + parse_common_project_path = staticmethod( + DocumentProcessorServiceClient.parse_common_project_path + ) + + common_location_path = staticmethod( + DocumentProcessorServiceClient.common_location_path + ) + parse_common_location_path = staticmethod( + DocumentProcessorServiceClient.parse_common_location_path + ) + + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentProcessorServiceAsyncClient: The constructed client. + """ + return DocumentProcessorServiceClient.from_service_account_info.__func__(DocumentProcessorServiceAsyncClient, info, *args, **kwargs) # type: ignore + + @classmethod + def from_service_account_file(cls, filename: str, *args, **kwargs): + """Creates an instance of this client using the provided credentials + file. + + Args: + filename (str): The path to the service account private key json + file. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentProcessorServiceAsyncClient: The constructed client. + """ + return DocumentProcessorServiceClient.from_service_account_file.__func__(DocumentProcessorServiceAsyncClient, filename, *args, **kwargs) # type: ignore + from_service_account_json = from_service_account_file + @property + def transport(self) -> DocumentProcessorServiceTransport: + """Return the transport used by the client instance. + + Returns: + DocumentProcessorServiceTransport: The transport used by the client instance. + """ + return self._client.transport + get_transport_class = functools.partial( type(DocumentProcessorServiceClient).get_transport_class, type(DocumentProcessorServiceClient), @@ -119,12 +201,13 @@ async def process_document( r"""Processes a single document. Args: - request (:class:`~.document_processor_service.ProcessRequest`): + request (:class:`google.cloud.documentai_v1beta3.types.ProcessRequest`): The request object. Request message for the process document method. name (:class:`str`): Required. The processor resource name. + This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -136,7 +219,7 @@ async def process_document( sent along with the request as metadata. Returns: - ~.document_processor_service.ProcessResponse: + google.cloud.documentai_v1beta3.types.ProcessResponse: Response message for the process document method. @@ -144,7 +227,8 @@ async def process_document( # Create or coerce a protobuf request object. # Sanity check: If we got a request object, we should *not* have # gotten any keyword arguments that map to the request. - if request is not None and any([name]): + has_flattened_params = any([name]) + if request is not None and has_flattened_params: raise ValueError( "If the `request` argument is set, then none of " "the individual field arguments should be set." @@ -167,8 +251,9 @@ async def process_document( maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=DEFAULT_CLIENT_INFO, @@ -199,12 +284,13 @@ async def batch_process_documents( written to Cloud Storage as JSON in the [Document] format. Args: - request (:class:`~.document_processor_service.BatchProcessRequest`): + request (:class:`google.cloud.documentai_v1beta3.types.BatchProcessRequest`): The request object. Request message for batch process document method. name (:class:`str`): Required. The processor resource name. + This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -216,18 +302,19 @@ async def batch_process_documents( sent along with the request as metadata. Returns: - ~.operation_async.AsyncOperation: + google.api_core.operation_async.AsyncOperation: An object representing a long-running operation. The result type for the operation will be - :class:``~.document_processor_service.BatchProcessResponse``: + :class:`google.cloud.documentai_v1beta3.types.BatchProcessResponse` Response message for batch process document method. """ # Create or coerce a protobuf request object. # Sanity check: If we got a request object, we should *not* have # gotten any keyword arguments that map to the request. - if request is not None and any([name]): + has_flattened_params = any([name]) + if request is not None and has_flattened_params: raise ValueError( "If the `request` argument is set, then none of " "the individual field arguments should be set." @@ -250,8 +337,9 @@ async def batch_process_documents( maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=DEFAULT_CLIENT_INFO, @@ -290,13 +378,14 @@ async def review_document( should be processed by the specified processor. Args: - request (:class:`~.document_processor_service.ReviewDocumentRequest`): + request (:class:`google.cloud.documentai_v1beta3.types.ReviewDocumentRequest`): The request object. Request message for review document method. human_review_config (:class:`str`): Required. The resource name of the HumanReviewConfig that the document will be reviewed with. + This corresponds to the ``human_review_config`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -308,18 +397,19 @@ async def review_document( sent along with the request as metadata. Returns: - ~.operation_async.AsyncOperation: + google.api_core.operation_async.AsyncOperation: An object representing a long-running operation. The result type for the operation will be - :class:``~.document_processor_service.ReviewDocumentResponse``: + :class:`google.cloud.documentai_v1beta3.types.ReviewDocumentResponse` Response message for review document method. """ # Create or coerce a protobuf request object. # Sanity check: If we got a request object, we should *not* have # gotten any keyword arguments that map to the request. - if request is not None and any([human_review_config]): + has_flattened_params = any([human_review_config]) + if request is not None and has_flattened_params: raise ValueError( "If the `request` argument is set, then none of " "the individual field arguments should be set." @@ -342,8 +432,9 @@ async def review_document( maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=DEFAULT_CLIENT_INFO, diff --git a/google/cloud/documentai_v1beta3/services/document_processor_service/client.py b/google/cloud/documentai_v1beta3/services/document_processor_service/client.py index 84b57f36..9064c7d5 100644 --- a/google/cloud/documentai_v1beta3/services/document_processor_service/client.py +++ b/google/cloud/documentai_v1beta3/services/document_processor_service/client.py @@ -119,6 +119,22 @@ def _get_default_mtls_endpoint(api_endpoint): DEFAULT_ENDPOINT ) + @classmethod + def from_service_account_info(cls, info: dict, *args, **kwargs): + """Creates an instance of this client using the provided credentials info. + + Args: + info (dict): The service account private key info. + args: Additional arguments to pass to the constructor. + kwargs: Additional arguments to pass to the constructor. + + Returns: + DocumentProcessorServiceClient: The constructed client. + """ + credentials = service_account.Credentials.from_service_account_info(info) + kwargs["credentials"] = credentials + return cls(*args, **kwargs) + @classmethod def from_service_account_file(cls, filename: str, *args, **kwargs): """Creates an instance of this client using the provided credentials @@ -131,7 +147,7 @@ def from_service_account_file(cls, filename: str, *args, **kwargs): kwargs: Additional arguments to pass to the constructor. Returns: - {@api.name}: The constructed client. + DocumentProcessorServiceClient: The constructed client. """ credentials = service_account.Credentials.from_service_account_file(filename) kwargs["credentials"] = credentials @@ -139,6 +155,106 @@ def from_service_account_file(cls, filename: str, *args, **kwargs): from_service_account_json = from_service_account_file + @property + def transport(self) -> DocumentProcessorServiceTransport: + """Return the transport used by the client instance. + + Returns: + DocumentProcessorServiceTransport: The transport used by the client instance. + """ + return self._transport + + @staticmethod + def human_review_config_path(project: str, location: str, processor: str,) -> str: + """Return a fully-qualified human_review_config string.""" + return "projects/{project}/locations/{location}/processors/{processor}/humanReviewConfig".format( + project=project, location=location, processor=processor, + ) + + @staticmethod + def parse_human_review_config_path(path: str) -> Dict[str, str]: + """Parse a human_review_config path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/processors/(?P.+?)/humanReviewConfig$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def processor_path(project: str, location: str, processor: str,) -> str: + """Return a fully-qualified processor string.""" + return "projects/{project}/locations/{location}/processors/{processor}".format( + project=project, location=location, processor=processor, + ) + + @staticmethod + def parse_processor_path(path: str) -> Dict[str, str]: + """Parse a processor path into its component segments.""" + m = re.match( + r"^projects/(?P.+?)/locations/(?P.+?)/processors/(?P.+?)$", + path, + ) + return m.groupdict() if m else {} + + @staticmethod + def common_billing_account_path(billing_account: str,) -> str: + """Return a fully-qualified billing_account string.""" + return "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + + @staticmethod + def parse_common_billing_account_path(path: str) -> Dict[str, str]: + """Parse a billing_account path into its component segments.""" + m = re.match(r"^billingAccounts/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_folder_path(folder: str,) -> str: + """Return a fully-qualified folder string.""" + return "folders/{folder}".format(folder=folder,) + + @staticmethod + def parse_common_folder_path(path: str) -> Dict[str, str]: + """Parse a folder path into its component segments.""" + m = re.match(r"^folders/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_organization_path(organization: str,) -> str: + """Return a fully-qualified organization string.""" + return "organizations/{organization}".format(organization=organization,) + + @staticmethod + def parse_common_organization_path(path: str) -> Dict[str, str]: + """Parse a organization path into its component segments.""" + m = re.match(r"^organizations/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_project_path(project: str,) -> str: + """Return a fully-qualified project string.""" + return "projects/{project}".format(project=project,) + + @staticmethod + def parse_common_project_path(path: str) -> Dict[str, str]: + """Parse a project path into its component segments.""" + m = re.match(r"^projects/(?P.+?)$", path) + return m.groupdict() if m else {} + + @staticmethod + def common_location_path(project: str, location: str,) -> str: + """Return a fully-qualified location string.""" + return "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + + @staticmethod + def parse_common_location_path(path: str) -> Dict[str, str]: + """Parse a location path into its component segments.""" + m = re.match(r"^projects/(?P.+?)/locations/(?P.+?)$", path) + return m.groupdict() if m else {} + def __init__( self, *, @@ -155,10 +271,10 @@ def __init__( credentials identify the application to the service; if none are specified, the client will attempt to ascertain the credentials from the environment. - transport (Union[str, ~.DocumentProcessorServiceTransport]): The + transport (Union[str, DocumentProcessorServiceTransport]): The transport to use. If set to None, a transport is chosen automatically. - client_options (client_options_lib.ClientOptions): Custom options for the + client_options (google.api_core.client_options.ClientOptions): Custom options for the client. It won't take effect if a ``transport`` instance is provided. (1) The ``api_endpoint`` property can be used to override the default endpoint provided by the client. GOOGLE_API_USE_MTLS_ENDPOINT @@ -174,10 +290,10 @@ def __init__( not provided, the default SSL client certificate will be used if present. If GOOGLE_API_USE_CLIENT_CERTIFICATE is "false" or not set, no client certificate will be used. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. Raises: @@ -194,21 +310,17 @@ def __init__( util.strtobool(os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE", "false")) ) - ssl_credentials = None + client_cert_source_func = None is_mtls = False if use_client_cert: if client_options.client_cert_source: - import grpc # type: ignore - - cert, key = client_options.client_cert_source() - ssl_credentials = grpc.ssl_channel_credentials( - certificate_chain=cert, private_key=key - ) is_mtls = True + client_cert_source_func = client_options.client_cert_source else: - creds = SslCredentials() - is_mtls = creds.is_mtls - ssl_credentials = creds.ssl_credentials if is_mtls else None + is_mtls = mtls.has_default_client_cert_source() + client_cert_source_func = ( + mtls.default_client_cert_source() if is_mtls else None + ) # Figure out which api endpoint to use. if client_options.api_endpoint is not None: @@ -251,7 +363,7 @@ def __init__( credentials_file=client_options.credentials_file, host=api_endpoint, scopes=client_options.scopes, - ssl_channel_credentials=ssl_credentials, + client_cert_source_for_mtls=client_cert_source_func, quota_project_id=client_options.quota_project_id, client_info=client_info, ) @@ -268,12 +380,13 @@ def process_document( r"""Processes a single document. Args: - request (:class:`~.document_processor_service.ProcessRequest`): + request (google.cloud.documentai_v1beta3.types.ProcessRequest): The request object. Request message for the process document method. - name (:class:`str`): + name (str): Required. The processor resource name. + This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -285,7 +398,7 @@ def process_document( sent along with the request as metadata. Returns: - ~.document_processor_service.ProcessResponse: + google.cloud.documentai_v1beta3.types.ProcessResponse: Response message for the process document method. @@ -342,12 +455,13 @@ def batch_process_documents( written to Cloud Storage as JSON in the [Document] format. Args: - request (:class:`~.document_processor_service.BatchProcessRequest`): + request (google.cloud.documentai_v1beta3.types.BatchProcessRequest): The request object. Request message for batch process document method. - name (:class:`str`): + name (str): Required. The processor resource name. + This corresponds to the ``name`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -359,11 +473,11 @@ def batch_process_documents( sent along with the request as metadata. Returns: - ~.operation.Operation: + google.api_core.operation.Operation: An object representing a long-running operation. The result type for the operation will be - :class:``~.document_processor_service.BatchProcessResponse``: + :class:`google.cloud.documentai_v1beta3.types.BatchProcessResponse` Response message for batch process document method. """ @@ -427,13 +541,14 @@ def review_document( should be processed by the specified processor. Args: - request (:class:`~.document_processor_service.ReviewDocumentRequest`): + request (google.cloud.documentai_v1beta3.types.ReviewDocumentRequest): The request object. Request message for review document method. - human_review_config (:class:`str`): + human_review_config (str): Required. The resource name of the HumanReviewConfig that the document will be reviewed with. + This corresponds to the ``human_review_config`` field on the ``request`` instance; if ``request`` is provided, this should not be set. @@ -445,11 +560,11 @@ def review_document( sent along with the request as metadata. Returns: - ~.operation.Operation: + google.api_core.operation.Operation: An object representing a long-running operation. The result type for the operation will be - :class:``~.document_processor_service.ReviewDocumentResponse``: + :class:`google.cloud.documentai_v1beta3.types.ReviewDocumentResponse` Response message for review document method. """ diff --git a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/__init__.py b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/__init__.py index a613297c..e3e820b3 100644 --- a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/__init__.py +++ b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/__init__.py @@ -30,7 +30,6 @@ _transport_registry["grpc"] = DocumentProcessorServiceGrpcTransport _transport_registry["grpc_asyncio"] = DocumentProcessorServiceGrpcAsyncIOTransport - __all__ = ( "DocumentProcessorServiceTransport", "DocumentProcessorServiceGrpcTransport", diff --git a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/base.py b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/base.py index dfa32e24..ebcbc6a9 100644 --- a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/base.py +++ b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/base.py @@ -71,10 +71,10 @@ def __init__( scope (Optional[Sequence[str]]): A list of scopes. quota_project_id (Optional[str]): An optional project to use for billing and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. """ # Save the hostname. Default to port 443 (HTTPS) if none is specified. @@ -82,6 +82,9 @@ def __init__( host += ":443" self._host = host + # Save the scopes. + self._scopes = scopes or self.AUTH_SCOPES + # If no credentials are provided, then determine the appropriate # defaults. if credentials and credentials_file: @@ -91,20 +94,17 @@ def __init__( if credentials_file is not None: credentials, _ = auth.load_credentials_from_file( - credentials_file, scopes=scopes, quota_project_id=quota_project_id + credentials_file, scopes=self._scopes, quota_project_id=quota_project_id ) elif credentials is None: credentials, _ = auth.default( - scopes=scopes, quota_project_id=quota_project_id + scopes=self._scopes, quota_project_id=quota_project_id ) # Save the credentials. self._credentials = credentials - # Lifted into its own function so it can be stubbed out during tests. - self._prep_wrapped_messages(client_info) - def _prep_wrapped_messages(self, client_info): # Precompute the wrapped methods. self._wrapped_methods = { @@ -115,8 +115,9 @@ def _prep_wrapped_messages(self, client_info): maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=client_info, @@ -128,8 +129,9 @@ def _prep_wrapped_messages(self, client_info): maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=client_info, @@ -141,8 +143,9 @@ def _prep_wrapped_messages(self, client_info): maximum=60.0, multiplier=1.3, predicate=retries.if_exception_type( - exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, + exceptions.DeadlineExceeded, exceptions.ServiceUnavailable, ), + deadline=120.0, ), default_timeout=120.0, client_info=client_info, diff --git a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc.py b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc.py index d7220126..24a3a7d4 100644 --- a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc.py +++ b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc.py @@ -63,6 +63,7 @@ def __init__( api_mtls_endpoint: str = None, client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, ssl_channel_credentials: grpc.ChannelCredentials = None, + client_cert_source_for_mtls: Callable[[], Tuple[bytes, bytes]] = None, quota_project_id: Optional[str] = None, client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, ) -> None: @@ -93,12 +94,16 @@ def __init__( ``api_mtls_endpoint`` is None. ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials for grpc channel. It is ignored if ``channel`` is provided. + client_cert_source_for_mtls (Optional[Callable[[], Tuple[bytes, bytes]]]): + A callback to provide client certificate bytes and private key bytes, + both in PEM format. It is used to configure mutual TLS channel. It is + ignored if ``channel`` or ``ssl_channel_credentials`` is provided. quota_project_id (Optional[str]): An optional project to use for billing and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. Raises: @@ -107,79 +112,71 @@ def __init__( google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` and ``credentials_file`` are passed. """ + self._grpc_channel = None + self._ssl_channel_credentials = ssl_channel_credentials + self._stubs: Dict[str, Callable] = {} + self._operations_client = None + + if api_mtls_endpoint: + warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) + if client_cert_source: + warnings.warn("client_cert_source is deprecated", DeprecationWarning) + if channel: - # Sanity check: Ensure that channel and credentials are not both - # provided. + # Ignore credentials if a channel was passed. credentials = False - # If a channel was explicitly provided, set it. self._grpc_channel = channel - elif api_mtls_endpoint: - warnings.warn( - "api_mtls_endpoint and client_cert_source are deprecated", - DeprecationWarning, - ) - - host = ( - api_mtls_endpoint - if ":" in api_mtls_endpoint - else api_mtls_endpoint + ":443" - ) - - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # Create SSL credentials with client_cert_source or application - # default SSL credentials. - if client_cert_source: - cert, key = client_cert_source() - ssl_credentials = grpc.ssl_channel_credentials( - certificate_chain=cert, private_key=key - ) - else: - ssl_credentials = SslCredentials().ssl_credentials + self._ssl_channel_credentials = None - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) else: - host = host if ":" in host else host + ":443" - - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_channel_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) + if api_mtls_endpoint: + host = api_mtls_endpoint + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + self._ssl_channel_credentials = SslCredentials().ssl_credentials - self._stubs = {} # type: Dict[str, Callable] + else: + if client_cert_source_for_mtls and not ssl_channel_credentials: + cert, key = client_cert_source_for_mtls() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) - # Run the base constructor. + # The base transport sets the host, credentials and scopes super().__init__( host=host, credentials=credentials, credentials_file=credentials_file, - scopes=scopes or self.AUTH_SCOPES, + scopes=scopes, quota_project_id=quota_project_id, client_info=client_info, ) + if not self._grpc_channel: + self._grpc_channel = type(self).create_channel( + self._host, + credentials=self._credentials, + credentials_file=credentials_file, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Wrap messages. This must be done after self._grpc_channel exists + self._prep_wrapped_messages(client_info) + @classmethod def create_channel( cls, @@ -192,7 +189,7 @@ def create_channel( ) -> grpc.Channel: """Create and return a gRPC channel object. Args: - address (Optionsl[str]): The host for the channel to use. + host (Optional[str]): The host for the channel to use. credentials (Optional[~.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If @@ -227,12 +224,8 @@ def create_channel( @property def grpc_channel(self) -> grpc.Channel: - """Create the channel designed to connect to this service. - - This property caches on the instance; repeated calls return - the same channel. + """Return the channel designed to connect to this service. """ - # Return the channel from cache. return self._grpc_channel @property @@ -243,13 +236,11 @@ def operations_client(self) -> operations_v1.OperationsClient: client. """ # Sanity check: Only create a new client if we do not already have one. - if "operations_client" not in self.__dict__: - self.__dict__["operations_client"] = operations_v1.OperationsClient( - self.grpc_channel - ) + if self._operations_client is None: + self._operations_client = operations_v1.OperationsClient(self.grpc_channel) # Return the client from cache. - return self.__dict__["operations_client"] + return self._operations_client @property def process_document( diff --git a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc_asyncio.py b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc_asyncio.py index 391819cf..99249da3 100644 --- a/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc_asyncio.py +++ b/google/cloud/documentai_v1beta3/services/document_processor_service/transports/grpc_asyncio.py @@ -67,7 +67,7 @@ def create_channel( ) -> aio.Channel: """Create and return a gRPC AsyncIO channel object. Args: - address (Optional[str]): The host for the channel to use. + host (Optional[str]): The host for the channel to use. credentials (Optional[~.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If @@ -107,6 +107,7 @@ def __init__( api_mtls_endpoint: str = None, client_cert_source: Callable[[], Tuple[bytes, bytes]] = None, ssl_channel_credentials: grpc.ChannelCredentials = None, + client_cert_source_for_mtls: Callable[[], Tuple[bytes, bytes]] = None, quota_project_id=None, client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO, ) -> None: @@ -138,12 +139,16 @@ def __init__( ``api_mtls_endpoint`` is None. ssl_channel_credentials (grpc.ChannelCredentials): SSL credentials for grpc channel. It is ignored if ``channel`` is provided. + client_cert_source_for_mtls (Optional[Callable[[], Tuple[bytes, bytes]]]): + A callback to provide client certificate bytes and private key bytes, + both in PEM format. It is used to configure mutual TLS channel. It is + ignored if ``channel`` or ``ssl_channel_credentials`` is provided. quota_project_id (Optional[str]): An optional project to use for billing and quota. - client_info (google.api_core.gapic_v1.client_info.ClientInfo): - The client info used to send a user-agent string along with - API requests. If ``None``, then default info will be used. - Generally, you only need to set this if you're developing + client_info (google.api_core.gapic_v1.client_info.ClientInfo): + The client info used to send a user-agent string along with + API requests. If ``None``, then default info will be used. + Generally, you only need to set this if you're developing your own client library. Raises: @@ -152,78 +157,70 @@ def __init__( google.api_core.exceptions.DuplicateCredentialArgs: If both ``credentials`` and ``credentials_file`` are passed. """ + self._grpc_channel = None + self._ssl_channel_credentials = ssl_channel_credentials + self._stubs: Dict[str, Callable] = {} + self._operations_client = None + + if api_mtls_endpoint: + warnings.warn("api_mtls_endpoint is deprecated", DeprecationWarning) + if client_cert_source: + warnings.warn("client_cert_source is deprecated", DeprecationWarning) + if channel: - # Sanity check: Ensure that channel and credentials are not both - # provided. + # Ignore credentials if a channel was passed. credentials = False - # If a channel was explicitly provided, set it. self._grpc_channel = channel - elif api_mtls_endpoint: - warnings.warn( - "api_mtls_endpoint and client_cert_source are deprecated", - DeprecationWarning, - ) - - host = ( - api_mtls_endpoint - if ":" in api_mtls_endpoint - else api_mtls_endpoint + ":443" - ) + self._ssl_channel_credentials = None - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # Create SSL credentials with client_cert_source or application - # default SSL credentials. - if client_cert_source: - cert, key = client_cert_source() - ssl_credentials = grpc.ssl_channel_credentials( - certificate_chain=cert, private_key=key - ) - else: - ssl_credentials = SslCredentials().ssl_credentials - - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) else: - host = host if ":" in host else host + ":443" + if api_mtls_endpoint: + host = api_mtls_endpoint + + # Create SSL credentials with client_cert_source or application + # default SSL credentials. + if client_cert_source: + cert, key = client_cert_source() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) + else: + self._ssl_channel_credentials = SslCredentials().ssl_credentials - if credentials is None: - credentials, _ = auth.default( - scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id - ) - - # create a new channel. The provided one is ignored. - self._grpc_channel = type(self).create_channel( - host, - credentials=credentials, - credentials_file=credentials_file, - ssl_credentials=ssl_channel_credentials, - scopes=scopes or self.AUTH_SCOPES, - quota_project_id=quota_project_id, - ) + else: + if client_cert_source_for_mtls and not ssl_channel_credentials: + cert, key = client_cert_source_for_mtls() + self._ssl_channel_credentials = grpc.ssl_channel_credentials( + certificate_chain=cert, private_key=key + ) - # Run the base constructor. + # The base transport sets the host, credentials and scopes super().__init__( host=host, credentials=credentials, credentials_file=credentials_file, - scopes=scopes or self.AUTH_SCOPES, + scopes=scopes, quota_project_id=quota_project_id, client_info=client_info, ) - self._stubs = {} + if not self._grpc_channel: + self._grpc_channel = type(self).create_channel( + self._host, + credentials=self._credentials, + credentials_file=credentials_file, + scopes=self._scopes, + ssl_credentials=self._ssl_channel_credentials, + quota_project_id=quota_project_id, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Wrap messages. This must be done after self._grpc_channel exists + self._prep_wrapped_messages(client_info) @property def grpc_channel(self) -> aio.Channel: @@ -243,13 +240,13 @@ def operations_client(self) -> operations_v1.OperationsAsyncClient: client. """ # Sanity check: Only create a new client if we do not already have one. - if "operations_client" not in self.__dict__: - self.__dict__["operations_client"] = operations_v1.OperationsAsyncClient( + if self._operations_client is None: + self._operations_client = operations_v1.OperationsAsyncClient( self.grpc_channel ) # Return the client from cache. - return self.__dict__["operations_client"] + return self._operations_client @property def process_document( diff --git a/google/cloud/documentai_v1beta3/types/__init__.py b/google/cloud/documentai_v1beta3/types/__init__.py index 4b5768f7..0d60bd37 100644 --- a/google/cloud/documentai_v1beta3/types/__init__.py +++ b/google/cloud/documentai_v1beta3/types/__init__.py @@ -15,35 +15,52 @@ # limitations under the License. # -from .geometry import ( - Vertex, - NormalizedVertex, - BoundingPoly, -) from .document import Document +from .document_io import ( + BatchDocumentsInputConfig, + DocumentOutputConfig, + GcsDocument, + GcsDocuments, + GcsPrefix, + RawDocument, +) from .document_processor_service import ( - ProcessRequest, - ProcessResponse, + BatchProcessMetadata, BatchProcessRequest, BatchProcessResponse, - BatchProcessMetadata, + CommonOperationMetadata, + HumanReviewStatus, + ProcessRequest, + ProcessResponse, + ReviewDocumentOperationMetadata, ReviewDocumentRequest, ReviewDocumentResponse, - ReviewDocumentOperationMetadata, ) - +from .geometry import ( + BoundingPoly, + NormalizedVertex, + Vertex, +) __all__ = ( - "Vertex", - "NormalizedVertex", - "BoundingPoly", "Document", - "ProcessRequest", - "ProcessResponse", + "BatchDocumentsInputConfig", + "DocumentOutputConfig", + "GcsDocument", + "GcsDocuments", + "GcsPrefix", + "RawDocument", + "BatchProcessMetadata", "BatchProcessRequest", "BatchProcessResponse", - "BatchProcessMetadata", + "CommonOperationMetadata", + "HumanReviewStatus", + "ProcessRequest", + "ProcessResponse", + "ReviewDocumentOperationMetadata", "ReviewDocumentRequest", "ReviewDocumentResponse", - "ReviewDocumentOperationMetadata", + "BoundingPoly", + "NormalizedVertex", + "Vertex", ) diff --git a/google/cloud/documentai_v1beta3/types/document.py b/google/cloud/documentai_v1beta3/types/document.py index 104a366c..3290e2dc 100644 --- a/google/cloud/documentai_v1beta3/types/document.py +++ b/google/cloud/documentai_v1beta3/types/document.py @@ -42,57 +42,52 @@ class Document(proto.Message): Attributes: uri (str): - Currently supports Google Cloud Storage URI of the form - ``gs://bucket_name/object_name``. Object versioning is not - supported. See `Google Cloud Storage Request + Optional. Currently supports Google Cloud Storage URI of the + form ``gs://bucket_name/object_name``. Object versioning is + not supported. See `Google Cloud Storage Request URIs `__ for more info. content (bytes): - Inline document content, represented as a stream of bytes. - Note: As with all ``bytes`` fields, protobuffers use a pure - binary representation, whereas JSON representations use - base64. + Optional. Inline document content, represented as a stream + of bytes. Note: As with all ``bytes`` fields, protobuffers + use a pure binary representation, whereas JSON + representations use base64. mime_type (str): An IANA published MIME type (also referred to as media type). For more information, see https://www.iana.org/assignments/media- types/media-types.xhtml. text (str): - UTF-8 encoded text in reading order from the - document. - text_styles (Sequence[~.document.Document.Style]): + Optional. UTF-8 encoded text in reading order + from the document. + text_styles (Sequence[google.cloud.documentai_v1beta3.types.Document.Style]): Styles for the [Document.text][google.cloud.documentai.v1beta3.Document.text]. - pages (Sequence[~.document.Document.Page]): + pages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page]): Visual page layout for the [Document][google.cloud.documentai.v1beta3.Document]. - entities (Sequence[~.document.Document.Entity]): + entities (Sequence[google.cloud.documentai_v1beta3.types.Document.Entity]): A list of entities detected on [Document.text][google.cloud.documentai.v1beta3.Document.text]. For document shards, entities in this list may cross shard boundaries. - entity_relations (Sequence[~.document.Document.EntityRelation]): + entity_relations (Sequence[google.cloud.documentai_v1beta3.types.Document.EntityRelation]): Relationship among [Document.entities][google.cloud.documentai.v1beta3.Document.entities]. - translations (Sequence[~.document.Document.Translation]): - A list of translations on - [Document.text][google.cloud.documentai.v1beta3.Document.text]. - For document shards, translations in this list may cross - shard boundaries. - text_changes (Sequence[~.document.Document.TextChange]): + text_changes (Sequence[google.cloud.documentai_v1beta3.types.Document.TextChange]): A list of text corrections made to [Document.text]. This is usually used for annotating corrections to OCR mistakes. Text changes for a given revision may not overlap with each other. - shard_info (~.document.Document.ShardInfo): + shard_info (google.cloud.documentai_v1beta3.types.Document.ShardInfo): Information about the sharding if this document is sharded part of a larger document. If the document is not sharded, this message is not specified. - error (~.status.Status): + error (google.rpc.status_pb2.Status): Any error that occurred while processing this document. - revisions (Sequence[~.document.Document.Revision]): + revisions (Sequence[google.cloud.documentai_v1beta3.types.Document.Revision]): Revision history of this document. """ @@ -123,12 +118,12 @@ class Style(proto.Message): CSS conventions as much as possible. Attributes: - text_anchor (~.document.Document.TextAnchor): + text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor): Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text]. - color (~.gt_color.Color): + color (google.type.color_pb2.Color): Text color. - background_color (~.gt_color.Color): + background_color (google.type.color_pb2.Color): Text background color. font_weight (str): Font weight. Possible values are normal, bold, bolder, and @@ -139,7 +134,7 @@ class Style(proto.Message): text_decoration (str): Text decoration. Follows CSS standard. https://www.w3schools.com/cssref/pr_text_text-decoration.asp - font_size (~.document.Document.Style.FontSize): + font_size (google.cloud.documentai_v1beta3.types.Document.Style.FontSize): Font size. """ @@ -187,46 +182,46 @@ class Page(proto.Message): Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta3.Document] for individual processing. - image (~.document.Document.Page.Image): + image (google.cloud.documentai_v1beta3.types.Document.Page.Image): Rendered image for this page. This image is preprocessed to remove any skew, rotation, and distortions such that the annotation bounding boxes can be upright and axis-aligned. - transforms (Sequence[~.document.Document.Page.Matrix]): + transforms (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Matrix]): Transformation matrices that were applied to the original document image to produce [Page.image][google.cloud.documentai.v1beta3.Document.Page.image]. - dimension (~.document.Document.Page.Dimension): + dimension (google.cloud.documentai_v1beta3.types.Document.Page.Dimension): Physical dimension of the page. - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the page. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. - blocks (Sequence[~.document.Document.Page.Block]): + blocks (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Block]): A list of visually detected text blocks on the page. A block has a set of lines (collected into paragraphs) that have a common line-spacing and orientation. - paragraphs (Sequence[~.document.Document.Page.Paragraph]): + paragraphs (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Paragraph]): A list of visually detected text paragraphs on the page. A collection of lines that a human would perceive as a paragraph. - lines (Sequence[~.document.Document.Page.Line]): + lines (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Line]): A list of visually detected text lines on the page. A collection of tokens that a human would perceive as a line. - tokens (Sequence[~.document.Document.Page.Token]): + tokens (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Token]): A list of visually detected tokens on the page. - visual_elements (Sequence[~.document.Document.Page.VisualElement]): + visual_elements (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.VisualElement]): A list of detected non-text visual elements e.g. checkbox, signature etc. on the page. - tables (Sequence[~.document.Document.Page.Table]): + tables (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Table]): A list of visually detected tables on the page. - form_fields (Sequence[~.document.Document.Page.FormField]): + form_fields (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.FormField]): A list of visually detected form fields on the page. """ @@ -302,7 +297,7 @@ class Layout(proto.Message): r"""Visual element describing a layout unit on a page. Attributes: - text_anchor (~.document.Document.TextAnchor): + text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor): Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text]. confidence (float): @@ -311,10 +306,10 @@ class Layout(proto.Message): within context of the object this layout is for. e.g. confidence can be for a single token, a table, a visual element, etc. depending on context. Range [0, 1]. - bounding_poly (~.geometry.BoundingPoly): + bounding_poly (google.cloud.documentai_v1beta3.types.BoundingPoly): The bounding polygon for the [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]. - orientation (~.document.Document.Page.Layout.Orientation): + orientation (google.cloud.documentai_v1beta3.types.Document.Page.Layout.Orientation): Detected orientation for the [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]. """ @@ -346,14 +341,14 @@ class Block(proto.Message): have a common line-spacing and orientation. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta3.Document.Page.Block]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. - provenance (~.document.Document.Provenance): + provenance (google.cloud.documentai_v1beta3.types.Document.Provenance): The history of this annotation. """ @@ -374,14 +369,14 @@ class Paragraph(proto.Message): paragraph. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta3.Document.Page.Paragraph]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. - provenance (~.document.Document.Provenance): + provenance (google.cloud.documentai_v1beta3.types.Document.Provenance): The history of this annotation. """ @@ -403,14 +398,14 @@ class Line(proto.Message): etc. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta3.Document.Page.Line]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. - provenance (~.document.Document.Provenance): + provenance (google.cloud.documentai_v1beta3.types.Document.Provenance): The history of this annotation. """ @@ -430,17 +425,17 @@ class Token(proto.Message): r"""A detected token. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. - detected_break (~.document.Document.Page.Token.DetectedBreak): + detected_break (google.cloud.documentai_v1beta3.types.Document.Page.Token.DetectedBreak): Detected break at the end of a [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. - provenance (~.document.Document.Provenance): + provenance (google.cloud.documentai_v1beta3.types.Document.Provenance): The history of this annotation. """ @@ -449,7 +444,7 @@ class DetectedBreak(proto.Message): [Token][google.cloud.documentai.v1beta3.Document.Page.Token]. Attributes: - type_ (~.document.Document.Page.Token.DetectedBreak.Type): + type_ (google.cloud.documentai_v1beta3.types.Document.Page.Token.DetectedBreak.Type): Detected break type. """ @@ -485,14 +480,14 @@ class VisualElement(proto.Message): etc. on the page. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement]. type_ (str): Type of the [VisualElement][google.cloud.documentai.v1beta3.Document.Page.VisualElement]. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -511,15 +506,15 @@ class Table(proto.Message): r"""A table representation similar to HTML table structure. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta3.Document.Page.Table]. - header_rows (Sequence[~.document.Document.Page.Table.TableRow]): + header_rows (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Table.TableRow]): Header rows of the table. - body_rows (Sequence[~.document.Document.Page.Table.TableRow]): + body_rows (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Table.TableRow]): Body rows of the table. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -528,7 +523,7 @@ class TableRow(proto.Message): r"""A row of table cells. Attributes: - cells (Sequence[~.document.Document.Page.Table.TableCell]): + cells (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Table.TableCell]): Cells that make up this row. """ @@ -540,7 +535,7 @@ class TableCell(proto.Message): r"""A cell representation inside the table. Attributes: - layout (~.document.Document.Page.Layout): + layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta3.Document.Page.Table.TableCell]. @@ -548,7 +543,7 @@ class TableCell(proto.Message): How many rows this cell spans. col_span (int): How many columns this cell spans. - detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages together with confidence. """ @@ -585,21 +580,21 @@ class FormField(proto.Message): r"""A form field detected on the page. Attributes: - field_name (~.document.Document.Page.Layout): + field_name (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] name. e.g. ``Address``, ``Email``, ``Grand total``, ``Phone number``, etc. - field_value (~.document.Document.Page.Layout): + field_value (google.cloud.documentai_v1beta3.types.Document.Page.Layout): [Layout][google.cloud.documentai.v1beta3.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta3.Document.Page.FormField] value. - name_detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + name_detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages for name together with confidence. - value_detected_languages (Sequence[~.document.Document.Page.DetectedLanguage]): + value_detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]): A list of detected languages for value together with confidence. value_type (str): @@ -696,36 +691,38 @@ class Entity(proto.Message): person, an organization, or location. Attributes: - text_anchor (~.document.Document.TextAnchor): - Provenance of the entity. Text anchor indexing into the + text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor): + Optional. Provenance of the entity. Text anchor indexing + into the [Document.text][google.cloud.documentai.v1beta3.Document.text]. type_ (str): Entity type from a schema e.g. ``Address``. mention_text (str): - Text value in the document e.g. ``1600 Amphitheatre Pkwy``. + Optional. Text value in the document e.g. + ``1600 Amphitheatre Pkwy``. mention_id (str): - Deprecated. Use ``id`` field instead. + Optional. Deprecated. Use ``id`` field instead. confidence (float): Optional. Confidence of detected Schema entity. Range [0, 1]. - page_anchor (~.document.Document.PageAnchor): + page_anchor (google.cloud.documentai_v1beta3.types.Document.PageAnchor): Optional. Represents the provenance of this entity wrt. the location on the page where it was found. id (str): - Canonical id. This will be a unique value in - the entity list for this document. - normalized_value (~.document.Document.Entity.NormalizedValue): + Optional. Canonical id. This will be a unique + value in the entity list for this document. + normalized_value (google.cloud.documentai_v1beta3.types.Document.Entity.NormalizedValue): Optional. Normalized entity value. Absent if the extracted value could not be converted or the type (e.g. address) is not supported for certain parsers. This field is also only populated for certain supported document types. - properties (Sequence[~.document.Document.Entity]): + properties (Sequence[google.cloud.documentai_v1beta3.types.Document.Entity]): Optional. Entities can be nested to form a hierarchical data structure representing the content in the document. - provenance (~.document.Document.Provenance): + provenance (google.cloud.documentai_v1beta3.types.Document.Provenance): Optional. The history of this annotation. redacted (bool): Optional. Whether the entity will be redacted @@ -736,25 +733,23 @@ class NormalizedValue(proto.Message): r"""Parsed and normalized entity value. Attributes: - money_value (~.money.Money): + money_value (google.type.money_pb2.Money): Money value. See also: - https: - github.com/googleapis/googleapis/blob/master/google/type/money.proto - date_value (~.date.Date): + https://github.com/googleapis/googleapis/blob/master/google/type/money.proto + date_value (google.type.date_pb2.Date): Date value. Includes year, month, day. See also: - https: - github.com/googleapis/googleapis/blob/master/google/type/date.proto - datetime_value (~.datetime.DateTime): + https://github.com/googleapis/googleapis/blob/master/google/type/date.proto + datetime_value (google.type.datetime_pb2.DateTime): DateTime value. Includes date, time, and timezone. See also: - https: - github.com/googleapis/googleapis/blob/master/google/type/datetime.proto - address_value (~.postal_address.PostalAddress): + https://github.com/googleapis/googleapis/blob/master/google/type/datetime.proto + address_value (google.type.postal_address_pb2.PostalAddress): Postal address. See also: - - https: - github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto + https://github.com/googleapis/googleapis/blob/master/google/type/postal_address.proto + boolean_value (bool): + Boolean value. Can be used for entities with + binary values, or for checkboxes. text (str): Required. Normalized entity value stored as a string. This field is populated for supported document type (e.g. @@ -791,6 +786,8 @@ class NormalizedValue(proto.Message): message=postal_address.PostalAddress, ) + boolean_value = proto.Field(proto.BOOL, number=6, oneof="structured_value") + text = proto.Field(proto.STRING, number=1) text_anchor = proto.Field( @@ -844,44 +841,12 @@ class EntityRelation(proto.Message): relation = proto.Field(proto.STRING, number=3) - class Translation(proto.Message): - r"""A translation of the text segment. - - Attributes: - text_anchor (~.document.Document.TextAnchor): - Provenance of the translation. Text anchor indexing into the - [Document.text][google.cloud.documentai.v1beta3.Document.text]. - There can only be a single ``TextAnchor.text_segments`` - element. If the start and end index of the text segment are - the same, the text change is inserted before that index. - language_code (str): - The BCP-47 language code, such as "en-US" or "sr-Latn". For - more information, see - http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. - translated_text (str): - Text translated into the target language. - provenance (Sequence[~.document.Document.Provenance]): - The history of this annotation. - """ - - text_anchor = proto.Field( - proto.MESSAGE, number=1, message="Document.TextAnchor", - ) - - language_code = proto.Field(proto.STRING, number=2) - - translated_text = proto.Field(proto.STRING, number=3) - - provenance = proto.RepeatedField( - proto.MESSAGE, number=4, message="Document.Provenance", - ) - class TextAnchor(proto.Message): r"""Text reference indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text]. Attributes: - text_segments (Sequence[~.document.Document.TextAnchor.TextSegment]): + text_segments (Sequence[google.cloud.documentai_v1beta3.types.Document.TextAnchor.TextSegment]): The text segments from the [Document.text][google.cloud.documentai.v1beta3.Document.text]. content (str): @@ -924,7 +889,7 @@ class PageAnchor(proto.Message): polygons and optionally reference specific layout element types. Attributes: - page_refs (Sequence[~.document.Document.PageAnchor.PageRef]): + page_refs (Sequence[google.cloud.documentai_v1beta3.types.Document.PageAnchor.PageRef]): One or more references to visual page elements """ @@ -937,15 +902,16 @@ class PageRef(proto.Message): page (int): Required. Index into the [Document.pages][google.cloud.documentai.v1beta3.Document.pages] - element - layout_type (~.document.Document.PageAnchor.PageRef.LayoutType): + element, for example using [Document.pages][page_refs.page] + to locate the related page element. + layout_type (google.cloud.documentai_v1beta3.types.Document.PageAnchor.PageRef.LayoutType): Optional. The type of the layout element that is being referenced if any. layout_id (str): Optional. Deprecated. Use [PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly] instead. - bounding_poly (~.geometry.BoundingPoly): + bounding_poly (google.cloud.documentai_v1beta3.types.BoundingPoly): Optional. Identifies the bounding polygon of a layout element on the page. """ @@ -988,10 +954,10 @@ class Provenance(proto.Message): id (int): The Id of this operation. Needs to be unique within the scope of the revision. - parents (Sequence[~.document.Document.Provenance.Parent]): + parents (Sequence[google.cloud.documentai_v1beta3.types.Document.Provenance.Parent]): References to the original elements that are replaced. - type_ (~.document.Document.Provenance.OperationType): + type_ (google.cloud.documentai_v1beta3.types.Document.Provenance.OperationType): The type of provenance operation. """ @@ -1005,6 +971,7 @@ class OperationType(proto.Enum): REPLACE = 3 EVAL_REQUESTED = 4 EVAL_APPROVED = 5 + EVAL_SKIPPED = 6 class Parent(proto.Message): r"""Structure for referencing parent provenances. When an @@ -1052,9 +1019,9 @@ class Revision(proto.Message): The revisions that this revision is based on. This can include one or more parent (when documents are merged.) This field represents the index into the ``revisions`` field. - create_time (~.timestamp.Timestamp): + create_time (google.protobuf.timestamp_pb2.Timestamp): The time that the revision was created. - human_review (~.document.Document.Revision.HumanReview): + human_review (google.cloud.documentai_v1beta3.types.Document.Revision.HumanReview): Human Review information of this revision. """ @@ -1093,7 +1060,7 @@ class TextChange(proto.Message): r"""This message is used for text changes aka. OCR corrections. Attributes: - text_anchor (~.document.Document.TextAnchor): + text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor): Provenance of the correction. Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta3.Document.text]. There can only be a single ``TextAnchor.text_segments`` @@ -1102,7 +1069,7 @@ class TextChange(proto.Message): changed_text (str): The text that replaces the text identified in the ``text_anchor``. - provenance (Sequence[~.document.Document.Provenance]): + provenance (Sequence[google.cloud.documentai_v1beta3.types.Document.Provenance]): The history of this annotation. """ @@ -1134,8 +1101,6 @@ class TextChange(proto.Message): proto.MESSAGE, number=8, message=EntityRelation, ) - translations = proto.RepeatedField(proto.MESSAGE, number=12, message=Translation,) - text_changes = proto.RepeatedField(proto.MESSAGE, number=14, message=TextChange,) shard_info = proto.Field(proto.MESSAGE, number=9, message=ShardInfo,) diff --git a/google/cloud/documentai_v1beta3/types/document_io.py b/google/cloud/documentai_v1beta3/types/document_io.py new file mode 100644 index 00000000..37928c86 --- /dev/null +++ b/google/cloud/documentai_v1beta3/types/document_io.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.documentai.v1beta3", + manifest={ + "RawDocument", + "GcsDocument", + "GcsDocuments", + "GcsPrefix", + "BatchDocumentsInputConfig", + "DocumentOutputConfig", + }, +) + + +class RawDocument(proto.Message): + r"""Payload message of raw document content (bytes). + + Attributes: + content (bytes): + Inline document content. + mime_type (str): + An IANA MIME type (RFC6838) indicating the nature and format + of the [content]. + """ + + content = proto.Field(proto.BYTES, number=1) + + mime_type = proto.Field(proto.STRING, number=2) + + +class GcsDocument(proto.Message): + r"""Specifies a document stored on Cloud Storage. + + Attributes: + gcs_uri (str): + The Cloud Storage object uri. + mime_type (str): + An IANA MIME type (RFC6838) of the content. + """ + + gcs_uri = proto.Field(proto.STRING, number=1) + + mime_type = proto.Field(proto.STRING, number=2) + + +class GcsDocuments(proto.Message): + r"""Specifies a set of documents on Cloud Storage. + + Attributes: + documents (Sequence[google.cloud.documentai_v1beta3.types.GcsDocument]): + The list of documents. + """ + + documents = proto.RepeatedField(proto.MESSAGE, number=1, message="GcsDocument",) + + +class GcsPrefix(proto.Message): + r"""Specifies all documents on Cloud Storage with a common + prefix. + + Attributes: + gcs_uri_prefix (str): + The URI prefix. + """ + + gcs_uri_prefix = proto.Field(proto.STRING, number=1) + + +class BatchDocumentsInputConfig(proto.Message): + r"""The common config to specify a set of documents used as + input. + + Attributes: + gcs_prefix (google.cloud.documentai_v1beta3.types.GcsPrefix): + The set of documents that match the specified Cloud Storage + [gcs_prefix]. + gcs_documents (google.cloud.documentai_v1beta3.types.GcsDocuments): + The set of documents individually specified + on Cloud Storage. + """ + + gcs_prefix = proto.Field( + proto.MESSAGE, number=1, oneof="source", message="GcsPrefix", + ) + + gcs_documents = proto.Field( + proto.MESSAGE, number=2, oneof="source", message="GcsDocuments", + ) + + +class DocumentOutputConfig(proto.Message): + r"""Config that controls the output of documents. All documents + will be written as a JSON file. + + Attributes: + gcs_output_config (google.cloud.documentai_v1beta3.types.DocumentOutputConfig.GcsOutputConfig): + Output config to write the results to Cloud + Storage. + """ + + class GcsOutputConfig(proto.Message): + r"""The configuration used when outputting documents. + + Attributes: + gcs_uri (str): + The Cloud Storage uri (a directory) of the + output. + """ + + gcs_uri = proto.Field(proto.STRING, number=1) + + gcs_output_config = proto.Field( + proto.MESSAGE, number=1, oneof="destination", message=GcsOutputConfig, + ) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/documentai_v1beta3/types/document_processor_service.py b/google/cloud/documentai_v1beta3/types/document_processor_service.py index 7d235c25..3e025e8a 100644 --- a/google/cloud/documentai_v1beta3/types/document_processor_service.py +++ b/google/cloud/documentai_v1beta3/types/document_processor_service.py @@ -19,6 +19,7 @@ from google.cloud.documentai_v1beta3.types import document as gcd_document +from google.cloud.documentai_v1beta3.types import document_io from google.protobuf import timestamp_pb2 as timestamp # type: ignore from google.rpc import status_pb2 as gr_status # type: ignore @@ -27,6 +28,7 @@ package="google.cloud.documentai.v1beta3", manifest={ "ProcessRequest", + "HumanReviewStatus", "ProcessResponse", "BatchProcessRequest", "BatchProcessResponse", @@ -34,6 +36,7 @@ "ReviewDocumentRequest", "ReviewDocumentResponse", "ReviewDocumentOperationMetadata", + "CommonOperationMetadata", }, ) @@ -42,9 +45,13 @@ class ProcessRequest(proto.Message): r"""Request message for the process document method. Attributes: + inline_document (google.cloud.documentai_v1beta3.types.Document): + An inline document proto. + raw_document (google.cloud.documentai_v1beta3.types.RawDocument): + A raw document content (bytes). name (str): Required. The processor resource name. - document (~.gcd_document.Document): + document (google.cloud.documentai_v1beta3.types.Document): The document payload, the [content] and [mime_type] fields must be set. skip_human_review (bool): @@ -52,6 +59,14 @@ class ProcessRequest(proto.Message): skipped for this request. Default to false. """ + inline_document = proto.Field( + proto.MESSAGE, number=4, oneof="source", message=gcd_document.Document, + ) + + raw_document = proto.Field( + proto.MESSAGE, number=5, oneof="source", message=document_io.RawDocument, + ) + name = proto.Field(proto.STRING, number=1) document = proto.Field(proto.MESSAGE, number=2, message=gcd_document.Document,) @@ -59,11 +74,44 @@ class ProcessRequest(proto.Message): skip_human_review = proto.Field(proto.BOOL, number=3) +class HumanReviewStatus(proto.Message): + r"""The status of human review on a processed document. + + Attributes: + state (google.cloud.documentai_v1beta3.types.HumanReviewStatus.State): + The state of human review on the processing + request. + state_message (str): + A message providing more details about the + human review state. + human_review_operation (str): + The name of the operation triggered by the processed + document. This field is populated only when the [state] is + [HUMAN_REVIEW_IN_PROGRESS]. It has the same response type + and metadata as the long running operation returned by + [ReviewDocument] method. + """ + + class State(proto.Enum): + r"""The final state of human review on a processed document.""" + STATE_UNSPECIFIED = 0 + SKIPPED = 1 + VALIDATION_PASSED = 2 + IN_PROGRESS = 3 + ERROR = 4 + + state = proto.Field(proto.ENUM, number=1, enum=State,) + + state_message = proto.Field(proto.STRING, number=2) + + human_review_operation = proto.Field(proto.STRING, number=3) + + class ProcessResponse(proto.Message): r"""Response message for the process document method. Attributes: - document (~.gcd_document.Document): + document (google.cloud.documentai_v1beta3.types.Document): The document payload, will populate fields based on the processor's behavior. human_review_operation (str): @@ -73,12 +121,19 @@ class ProcessResponse(proto.Message): has the same response type and metadata as the long running operation returned by ReviewDocument method. + human_review_status (google.cloud.documentai_v1beta3.types.HumanReviewStatus): + The status of human review on the processed + document. """ document = proto.Field(proto.MESSAGE, number=1, message=gcd_document.Document,) human_review_operation = proto.Field(proto.STRING, number=2) + human_review_status = proto.Field( + proto.MESSAGE, number=3, message="HumanReviewStatus", + ) + class BatchProcessRequest(proto.Message): r"""Request message for batch process document method. @@ -86,11 +141,18 @@ class BatchProcessRequest(proto.Message): Attributes: name (str): Required. The processor resource name. - input_configs (Sequence[~.document_processor_service.BatchProcessRequest.BatchInputConfig]): + input_configs (Sequence[google.cloud.documentai_v1beta3.types.BatchProcessRequest.BatchInputConfig]): The input config for each single document in the batch process. - output_config (~.document_processor_service.BatchProcessRequest.BatchOutputConfig): + output_config (google.cloud.documentai_v1beta3.types.BatchProcessRequest.BatchOutputConfig): + The overall output config for batch process. + input_documents (google.cloud.documentai_v1beta3.types.BatchDocumentsInputConfig): + The input documents for batch process. + document_output_config (google.cloud.documentai_v1beta3.types.DocumentOutputConfig): The overall output config for batch process. + skip_human_review (bool): + Whether Human Review feature should be + skipped for this request. Default to false. """ class BatchInputConfig(proto.Message): @@ -130,6 +192,16 @@ class BatchOutputConfig(proto.Message): output_config = proto.Field(proto.MESSAGE, number=3, message=BatchOutputConfig,) + input_documents = proto.Field( + proto.MESSAGE, number=5, message=document_io.BatchDocumentsInputConfig, + ) + + document_output_config = proto.Field( + proto.MESSAGE, number=6, message=document_io.DocumentOutputConfig, + ) + + skip_human_review = proto.Field(proto.BOOL, number=4) + class BatchProcessResponse(proto.Message): r"""Response message for batch process document method.""" @@ -139,17 +211,17 @@ class BatchProcessMetadata(proto.Message): r"""The long running operation metadata for batch process method. Attributes: - state (~.document_processor_service.BatchProcessMetadata.State): + state (google.cloud.documentai_v1beta3.types.BatchProcessMetadata.State): The state of the current batch processing. state_message (str): A message providing more details about the current state of processing. For example, the error message if the operation is failed. - create_time (~.timestamp.Timestamp): + create_time (google.protobuf.timestamp_pb2.Timestamp): The creation time of the operation. - update_time (~.timestamp.Timestamp): + update_time (google.protobuf.timestamp_pb2.Timestamp): The last update time of the operation. - individual_process_statuses (Sequence[~.document_processor_service.BatchProcessMetadata.IndividualProcessStatus]): + individual_process_statuses (Sequence[google.cloud.documentai_v1beta3.types.BatchProcessMetadata.IndividualProcessStatus]): The list of response details of each document. """ @@ -175,7 +247,7 @@ class IndividualProcessStatus(proto.Message): batch process is started by take snapshot of that document, since a user can move or change that document during the process. - status (~.gr_status.Status): + status (google.rpc.status_pb2.Status): The status of the processing of the document. output_gcs_destination (str): The output_gcs_destination (in the request as @@ -188,6 +260,9 @@ class IndividualProcessStatus(proto.Message): has the same response type and metadata as the long running operation returned by ReviewDocument method. + human_review_status (google.cloud.documentai_v1beta3.types.HumanReviewStatus): + The status of human review on the processed + document. """ input_gcs_source = proto.Field(proto.STRING, number=1) @@ -198,6 +273,10 @@ class IndividualProcessStatus(proto.Message): human_review_operation = proto.Field(proto.STRING, number=4) + human_review_status = proto.Field( + proto.MESSAGE, number=5, message="HumanReviewStatus", + ) + state = proto.Field(proto.ENUM, number=1, enum=State,) state_message = proto.Field(proto.STRING, number=2) @@ -215,14 +294,20 @@ class ReviewDocumentRequest(proto.Message): r"""Request message for review document method. Attributes: + inline_document (google.cloud.documentai_v1beta3.types.Document): + An inline document proto. human_review_config (str): Required. The resource name of the HumanReviewConfig that the document will be reviewed with. - document (~.gcd_document.Document): + document (google.cloud.documentai_v1beta3.types.Document): The document that needs human review. """ + inline_document = proto.Field( + proto.MESSAGE, number=4, oneof="source", message=gcd_document.Document, + ) + human_review_config = proto.Field(proto.STRING, number=1) document = proto.Field(proto.MESSAGE, number=2, message=gcd_document.Document,) @@ -245,15 +330,55 @@ class ReviewDocumentOperationMetadata(proto.Message): method. Attributes: - state (~.document_processor_service.ReviewDocumentOperationMetadata.State): + state (google.cloud.documentai_v1beta3.types.ReviewDocumentOperationMetadata.State): Used only when Operation.done is false. state_message (str): A message providing more details about the current state of processing. For example, the error message if the operation is failed. - create_time (~.timestamp.Timestamp): + create_time (google.protobuf.timestamp_pb2.Timestamp): + The creation time of the operation. + update_time (google.protobuf.timestamp_pb2.Timestamp): + The last update time of the operation. + common_metadata (google.cloud.documentai_v1beta3.types.CommonOperationMetadata): + The basic metadata of the long running + operation. + """ + + class State(proto.Enum): + r"""State of the longrunning operation.""" + STATE_UNSPECIFIED = 0 + RUNNING = 1 + CANCELLING = 2 + SUCCEEDED = 3 + FAILED = 4 + CANCELLED = 5 + + state = proto.Field(proto.ENUM, number=1, enum=State,) + + state_message = proto.Field(proto.STRING, number=2) + + create_time = proto.Field(proto.MESSAGE, number=3, message=timestamp.Timestamp,) + + update_time = proto.Field(proto.MESSAGE, number=4, message=timestamp.Timestamp,) + + common_metadata = proto.Field( + proto.MESSAGE, number=5, message="CommonOperationMetadata", + ) + + +class CommonOperationMetadata(proto.Message): + r"""The common metadata for long running operations. + + Attributes: + state (google.cloud.documentai_v1beta3.types.CommonOperationMetadata.State): + The state of the operation. + state_message (str): + A message providing more details about the + current state of processing. + create_time (google.protobuf.timestamp_pb2.Timestamp): The creation time of the operation. - update_time (~.timestamp.Timestamp): + update_time (google.protobuf.timestamp_pb2.Timestamp): The last update time of the operation. """ diff --git a/google/cloud/documentai_v1beta3/types/geometry.py b/google/cloud/documentai_v1beta3/types/geometry.py index e87b87c7..53c3b9b0 100644 --- a/google/cloud/documentai_v1beta3/types/geometry.py +++ b/google/cloud/documentai_v1beta3/types/geometry.py @@ -62,16 +62,16 @@ class BoundingPoly(proto.Message): r"""A bounding polygon for the detected image annotation. Attributes: - vertices (Sequence[~.geometry.Vertex]): + vertices (Sequence[google.cloud.documentai_v1beta3.types.Vertex]): The bounding polygon vertices. - normalized_vertices (Sequence[~.geometry.NormalizedVertex]): + normalized_vertices (Sequence[google.cloud.documentai_v1beta3.types.NormalizedVertex]): The bounding polygon normalized vertices. """ - vertices = proto.RepeatedField(proto.MESSAGE, number=1, message=Vertex,) + vertices = proto.RepeatedField(proto.MESSAGE, number=1, message="Vertex",) normalized_vertices = proto.RepeatedField( - proto.MESSAGE, number=2, message=NormalizedVertex, + proto.MESSAGE, number=2, message="NormalizedVertex", ) diff --git a/noxfile.py b/noxfile.py index e446dd8d..ae8392be 100644 --- a/noxfile.py +++ b/noxfile.py @@ -18,6 +18,7 @@ from __future__ import absolute_import import os +import pathlib import shutil import nox @@ -28,7 +29,23 @@ DEFAULT_PYTHON_VERSION = "3.8" SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] -UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8"] +UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] + +CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() + +# 'docfx' is excluded since it only needs to run in 'docs-presubmit' +nox.options.sessions = [ + "unit", + "system", + "cover", + "lint", + "lint_setup_py", + "blacken", + "docs", +] + +# Error if a python version is missing +nox.options.error_on_missing_interpreters = True @nox.session(python=DEFAULT_PYTHON_VERSION) @@ -70,18 +87,23 @@ def lint_setup_py(session): def default(session): # Install all test dependencies, then install this package in-place. - session.install("asyncmock", "pytest-asyncio") - session.install("mock", "pytest", "pytest-cov") - session.install("-e", ".") + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + session.install("asyncmock", "pytest-asyncio", "-c", constraints_path) + + session.install("mock", "pytest", "pytest-cov", "-c", constraints_path) + + session.install("-e", ".", "-c", constraints_path) # Run py.test against the unit tests. session.run( "py.test", "--quiet", - "--cov=google.cloud.documentai", - "--cov=google.cloud", - "--cov=tests.unit", + f"--junitxml=unit_{session.python}_sponge_log.xml", + "--cov=google/cloud", + "--cov=tests/unit", "--cov-append", "--cov-config=.coveragerc", "--cov-report=", @@ -100,6 +122,9 @@ def unit(session): @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) system_test_path = os.path.join("tests", "system.py") system_test_folder_path = os.path.join("tests", "system") @@ -109,6 +134,9 @@ def system(session): # Sanity check: Only run tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable") + # Install pyopenssl for mTLS testing. + if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "false") == "true": + session.install("pyopenssl") system_test_exists = os.path.exists(system_test_path) system_test_folder_exists = os.path.exists(system_test_folder_path) @@ -121,16 +149,26 @@ def system(session): # Install all test dependencies, then install this package into the # virtualenv's dist-packages. - session.install( - "mock", "pytest", "google-cloud-testutils", - ) - session.install("-e", ".") + session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) + session.install("-e", ".", "-c", constraints_path) # Run py.test against the system tests. if system_test_exists: - session.run("py.test", "--quiet", system_test_path, *session.posargs) + session.run( + "py.test", + "--quiet", + f"--junitxml=system_{session.python}_sponge_log.xml", + system_test_path, + *session.posargs, + ) if system_test_folder_exists: - session.run("py.test", "--quiet", system_test_folder_path, *session.posargs) + session.run( + "py.test", + "--quiet", + f"--junitxml=system_{session.python}_sponge_log.xml", + system_test_folder_path, + *session.posargs, + ) @nox.session(python=DEFAULT_PYTHON_VERSION) @@ -141,7 +179,7 @@ def cover(session): test runs (not system test runs), and then erases coverage data. """ session.install("coverage", "pytest-cov") - session.run("coverage", "report", "--show-missing", "--fail-under=100") + session.run("coverage", "report", "--show-missing", "--fail-under=99") session.run("coverage", "erase") diff --git a/renovate.json b/renovate.json index 4fa94931..f08bc22c 100644 --- a/renovate.json +++ b/renovate.json @@ -1,5 +1,6 @@ { "extends": [ "config:base", ":preserveSemverRanges" - ] + ], + "ignorePaths": [".pre-commit-config.yaml"] } diff --git a/samples/__init__.py b/samples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/samples/snippets/__init__.py b/samples/snippets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/samples/snippets/batch_parse_form_v1beta2.py b/samples/snippets/batch_parse_form_v1beta2.py new file mode 100644 index 00000000..ae60fd63 --- /dev/null +++ b/samples/snippets/batch_parse_form_v1beta2.py @@ -0,0 +1,100 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_batch_parse_form_beta] +import re + +from google.cloud import documentai_v1beta2 as documentai +from google.cloud import storage + + +def batch_parse_form( + project_id="YOUR_PROJECT_ID", + input_uri="gs://cloud-samples-data/documentai/form.pdf", + destination_uri="gs://your-bucket-id/path/to/save/results/", + timeout=90 +): + """Parse a form""" + + client = documentai.DocumentUnderstandingServiceClient() + + gcs_source = documentai.types.GcsSource(uri=input_uri) + + # mime_type can be application/pdf, image/tiff, + # and image/gif, or application/json + input_config = documentai.types.InputConfig( + gcs_source=gcs_source, mime_type="application/pdf" + ) + + # where to write results + output_config = documentai.types.OutputConfig( + gcs_destination=documentai.types.GcsDestination(uri=destination_uri), + pages_per_shard=1, # Map one doc page to one output page + ) + + # Improve form parsing results by providing key-value pair hints. + # For each key hint, key is text that is likely to appear in the + # document as a form field name (i.e. "DOB"). + # Value types are optional, but can be one or more of: + # ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER, ID, + # NUMBER, EMAIL, PRICE, TERMS, DATE, NAME + key_value_pair_hints = [ + documentai.types.KeyValuePairHint( + key="Emergency Contact", value_types=["NAME"] + ), + documentai.types.KeyValuePairHint(key="Referred By"), + ] + + # Setting enabled=True enables form extraction + form_extraction_params = documentai.types.FormExtractionParams( + enabled=True, key_value_pair_hints=key_value_pair_hints + ) + + # Location can be 'us' or 'eu' + parent = "projects/{}/locations/us".format(project_id) + request = documentai.types.ProcessDocumentRequest( + input_config=input_config, + output_config=output_config, + form_extraction_params=form_extraction_params, + ) + + # Add each ProcessDocumentRequest to the batch request + requests = [] + requests.append(request) + + batch_request = documentai.types.BatchProcessDocumentsRequest( + parent=parent, requests=requests + ) + + operation = client.batch_process_documents(batch_request) + + # Wait for the operation to finish + operation.result(timeout) + + # Results are written to GCS. Use a regex to find + # output files + match = re.match(r"gs://([^/]+)/(.+)", destination_uri) + output_bucket = match.group(1) + prefix = match.group(2) + + storage_client = storage.client.Client() + bucket = storage_client.get_bucket(output_bucket) + blob_list = list(bucket.list_blobs(prefix=prefix)) + print("Output files:") + for blob in blob_list: + print(blob.name) + + +# [END documentai_batch_parse_form_beta] diff --git a/samples/snippets/batch_parse_form_v1beta2_test.py b/samples/snippets/batch_parse_form_v1beta2_test.py new file mode 100644 index 00000000..6abd19a2 --- /dev/null +++ b/samples/snippets/batch_parse_form_v1beta2_test.py @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific ladnguage governing permissions and +# limitations under the License. + +import os +import uuid + +from google.cloud import storage + +import pytest + +from samples.snippets import batch_parse_form_v1beta2 + + +BUCKET = "document-ai-{}".format(uuid.uuid4()) +OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) +PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"] +INPUT_URI = "gs://cloud-samples-data/documentai/invoice.pdf" +BATCH_OUTPUT_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) + + +@pytest.fixture(autouse=True) +def setup_teardown(): + """Create a temporary bucket to store annotation output.""" + storage_client = storage.Client() + bucket = storage_client.create_bucket(BUCKET) + + yield + + bucket.delete(force=True) + + +def test_batch_parse_form(capsys): + batch_parse_form_v1beta2.batch_parse_form(PROJECT_ID, INPUT_URI, BATCH_OUTPUT_URI, 120) + out, _ = capsys.readouterr() + assert "Output files" in out diff --git a/samples/snippets/batch_parse_table_v1beta2.py b/samples/snippets/batch_parse_table_v1beta2.py new file mode 100644 index 00000000..f62495b4 --- /dev/null +++ b/samples/snippets/batch_parse_table_v1beta2.py @@ -0,0 +1,108 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_batch_parse_table_beta] +import re + +from google.cloud import documentai_v1beta2 as documentai +from google.cloud import storage + + +def batch_parse_table( + project_id="YOUR_PROJECT_ID", + input_uri="gs://cloud-samples-data/documentai/form.pdf", + destination_uri="gs://your-bucket-id/path/to/save/results/", + timeout=90 +): + """Parse a form""" + + client = documentai.DocumentUnderstandingServiceClient() + + gcs_source = documentai.types.GcsSource(uri=input_uri) + + # mime_type can be application/pdf, image/tiff, + # and image/gif, or application/json + input_config = documentai.types.InputConfig( + gcs_source=gcs_source, mime_type="application/pdf" + ) + + # where to write results + output_config = documentai.types.OutputConfig( + gcs_destination=documentai.types.GcsDestination(uri=destination_uri), + pages_per_shard=1, # Map one doc page to one output page + ) + + # Improve table parsing results by providing bounding boxes + # specifying where the box appears in the document (optional) + table_bound_hints = [ + documentai.types.TableBoundHint( + page_number=1, + bounding_box=documentai.types.BoundingPoly( + # Define a polygon around tables to detect + # Each vertice coordinate must be a number between 0 and 1 + normalized_vertices=[ + # Top left + documentai.types.geometry.NormalizedVertex(x=0, y=0), + # Top right + documentai.types.geometry.NormalizedVertex(x=1, y=0), + # Bottom right + documentai.types.geometry.NormalizedVertex(x=1, y=1), + # Bottom left + documentai.types.geometry.NormalizedVertex(x=0, y=1), + ] + ), + ) + ] + + # Setting enabled=True enables form extraction + table_extraction_params = documentai.types.TableExtractionParams( + enabled=True, table_bound_hints=table_bound_hints + ) + + # Location can be 'us' or 'eu' + parent = "projects/{}/locations/us".format(project_id) + request = documentai.types.ProcessDocumentRequest( + input_config=input_config, + output_config=output_config, + table_extraction_params=table_extraction_params, + ) + + requests = [] + requests.append(request) + + batch_request = documentai.types.BatchProcessDocumentsRequest( + parent=parent, requests=requests + ) + + operation = client.batch_process_documents(batch_request) + + # Wait for the operation to finish + operation.result(timeout) + + # Results are written to GCS. Use a regex to find + # output files + match = re.match(r"gs://([^/]+)/(.+)", destination_uri) + output_bucket = match.group(1) + prefix = match.group(2) + + storage_client = storage.client.Client() + bucket = storage_client.get_bucket(output_bucket) + blob_list = list(bucket.list_blobs(prefix=prefix)) + print("Output files:") + for blob in blob_list: + print(blob.name) + + +# [END documentai_batch_parse_table_beta] diff --git a/samples/snippets/batch_parse_table_v1beta2_test.py b/samples/snippets/batch_parse_table_v1beta2_test.py new file mode 100644 index 00000000..aa890520 --- /dev/null +++ b/samples/snippets/batch_parse_table_v1beta2_test.py @@ -0,0 +1,46 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific ladnguage governing permissions and +# limitations under the License. + +import os +import uuid + +from google.cloud import storage + +import pytest + +from samples.snippets import batch_parse_table_v1beta2 + + +BUCKET = "document-ai-{}".format(uuid.uuid4()) +OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4()) +PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"] +INPUT_URI = "gs://cloud-samples-data/documentai/invoice.pdf" +BATCH_OUTPUT_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX) + + +@pytest.fixture(autouse=True) +def setup_teardown(): + """Create a temporary bucket to store annotation output.""" + storage_client = storage.Client() + bucket = storage_client.create_bucket(BUCKET) + + yield + + bucket.delete(force=True) + + +def test_batch_parse_table(capsys): + batch_parse_table_v1beta2.batch_parse_table(PROJECT_ID, INPUT_URI, BATCH_OUTPUT_URI, 120) + out, _ = capsys.readouterr() + assert "Output files:" in out diff --git a/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py new file mode 100644 index 00000000..e0a7e468 --- /dev/null +++ b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py @@ -0,0 +1,44 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from samples.snippets import batch_process_documents_sample_v1beta3 + +location = "us" +project_id = os.getenv("GOOGLE_CLOUD_PROJECT") +processor_id = "90484cfdedb024f6" +gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf" +# following bucket contains .csv file which will cause the sample to fail. +gcs_output_full_uri_with_wrong_type = "gs://documentai-beta-samples" +BUCKET_NAME = f"document-ai-python-{uuid4()}" + + +def test_batch_process_documents_with_bad_input(capsys): + try: + batch_process_documents_sample_v1beta3.batch_process_documents( + project_id=project_id, + location=location, + processor_id=processor_id, + gcs_input_uri=gcs_input_uri, + gcs_output_uri=gcs_output_full_uri_with_wrong_type, + gcs_output_uri_prefix="test", + timeout=450, + ) + out, _ = capsys.readouterr() + assert "Failed to process" in out + except Exception as e: + assert "Failed to process" in e.message diff --git a/samples/snippets/batch_process_documents_sample_v1beta3.py b/samples/snippets/batch_process_documents_sample_v1beta3.py new file mode 100644 index 00000000..b1ed3226 --- /dev/null +++ b/samples/snippets/batch_process_documents_sample_v1beta3.py @@ -0,0 +1,131 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_batch_process_document] +import re + +from google.cloud import documentai_v1beta3 as documentai +from google.cloud import storage + +# TODO(developer): Uncomment these variables before running the sample. +# project_id= 'YOUR_PROJECT_ID' +# location = 'YOUR_PROJECT_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console +# gcs_input_uri = "YOUR_INPUT_URI" +# gcs_output_uri = "YOUR_OUTPUT_BUCKET_URI" +# gcs_output_uri_prefix = "YOUR_OUTPUT_URI_PREFIX" + + +def batch_process_documents( + project_id, + location, + processor_id, + gcs_input_uri, + gcs_output_uri, + gcs_output_uri_prefix, + timeout: int = 300, +): + + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = {} + if location == "eu": + opts = {"api_endpoint": "eu-documentai.googleapis.com"} + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + destination_uri = f"{gcs_output_uri}/{gcs_output_uri_prefix}/" + + # 'mime_type' can be 'application/pdf', 'image/tiff', + # and 'image/gif', or 'application/json' + input_config = documentai.types.document_processor_service.BatchProcessRequest.BatchInputConfig( + gcs_source=gcs_input_uri, mime_type="application/pdf" + ) + + # Where to write results + output_config = documentai.types.document_processor_service.BatchProcessRequest.BatchOutputConfig( + gcs_destination=destination_uri + ) + + # Location can be 'us' or 'eu' + name = f"projects/{project_id}/locations/{location}/processors/{processor_id}" + request = documentai.types.document_processor_service.BatchProcessRequest( + name=name, + input_configs=[input_config], + output_config=output_config, + ) + + operation = client.batch_process_documents(request) + + # Wait for the operation to finish + operation.result(timeout=timeout) + + # Results are written to GCS. Use a regex to find + # output files + match = re.match(r"gs://([^/]+)/(.+)", destination_uri) + output_bucket = match.group(1) + prefix = match.group(2) + + storage_client = storage.Client() + bucket = storage_client.get_bucket(output_bucket) + blob_list = list(bucket.list_blobs(prefix=prefix)) + print("Output files:") + + for i, blob in enumerate(blob_list): + # If JSON file, download the contents of this blob as a bytes object. + if ".json" in blob.name: + blob_as_bytes = blob.download_as_bytes() + + document = documentai.types.Document.from_json(blob_as_bytes) + print(f"Fetched file {i + 1}") + + # For a full list of Document object attributes, please reference this page: + # https://cloud.google.com/document-ai/docs/reference/rpc/google.cloud.documentai.v1beta3#document + + # Read the text recognition output from the processor + for page in document.pages: + for form_field in page.form_fields: + field_name = get_text(form_field.field_name, document) + field_value = get_text(form_field.field_value, document) + print("Extracted key value pair:") + print(f"\t{field_name}, {field_value}") + for paragraph in document.pages: + paragraph_text = get_text(paragraph.layout, document) + print(f"Paragraph text:\n{paragraph_text}") + else: + print(f"Skipping non-supported file type {blob.name}") + + +# Extract shards from the text field +def get_text(doc_element: dict, document: dict): + """ + Document AI identifies form fields by their offsets + in document text. This function converts offsets + to text snippets. + """ + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in doc_element.text_anchor.text_segments: + start_index = ( + int(segment.start_index) + if segment in doc_element.text_anchor.text_segments + else 0 + ) + end_index = int(segment.end_index) + response += document.text[start_index:end_index] + return response + + +# [END documentai_batch_process_document] diff --git a/samples/snippets/batch_process_documents_sample_v1beta3_test.py b/samples/snippets/batch_process_documents_sample_v1beta3_test.py new file mode 100644 index 00000000..dcb63567 --- /dev/null +++ b/samples/snippets/batch_process_documents_sample_v1beta3_test.py @@ -0,0 +1,62 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from google.cloud import storage +from google.cloud.exceptions import NotFound + +import pytest + +from samples.snippets import batch_process_documents_sample_v1beta3 + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf" +gcs_output_uri_prefix = uuid4() +BUCKET_NAME = f"document-ai-python-{uuid4()}" + + +@pytest.fixture(scope="module") +def test_bucket(): + storage_client = storage.Client() + bucket = storage_client.create_bucket(BUCKET_NAME) + yield bucket.name + + try: + blobs = list(bucket.list_blobs()) + for blob in blobs: + blob.delete() + bucket.delete() + except NotFound: + print("Bucket already deleted.") + + +def test_batch_process_documents(capsys, test_bucket): + batch_process_documents_sample_v1beta3.batch_process_documents( + project_id=project_id, + location=location, + processor_id=processor_id, + gcs_input_uri=gcs_input_uri, + gcs_output_uri=f"gs://{test_bucket}", + gcs_output_uri_prefix=gcs_output_uri_prefix, + ) + out, _ = capsys.readouterr() + + assert "Extracted" in out + assert "Paragraph" in out + assert "Invoice" in out diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py new file mode 100644 index 00000000..97bf7da8 --- /dev/null +++ b/samples/snippets/noxfile.py @@ -0,0 +1,247 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys +from typing import Callable, Dict, List, Optional + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +# Copy `noxfile_config.py` to your directory and modify it instead. + + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + 'ignored_versions': ["2.7"], + + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + 'enforce_type_hints': False, + + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + 'envs': {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append('.') + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars() -> Dict[str, str]: + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG['gcloud_project_env'] + # This should error out if not set. + ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG['envs']) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to tested samples. +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG['ignored_versions'] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir: str) -> List[str]: + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG['enforce_type_hints']: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + "." + ] + session.run("flake8", *args) +# +# Black +# + + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + session.install("black") + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None: + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars() + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session: nox.sessions.Session) -> None: + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip("SKIPPED: {} tests are disabled for this sample.".format( + session.python + )) + + +# +# Readmegen +# + + +def _get_repo_root() -> Optional[str]: + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session: nox.sessions.Session, path: str) -> None: + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/samples/snippets/parse_form_v1beta2.py b/samples/snippets/parse_form_v1beta2.py new file mode 100644 index 00000000..27c99811 --- /dev/null +++ b/samples/snippets/parse_form_v1beta2.py @@ -0,0 +1,92 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START documentai_parse_form_beta] +from google.cloud import documentai_v1beta2 as documentai + + +def parse_form( + project_id="YOUR_PROJECT_ID", + input_uri="gs://cloud-samples-data/documentai/form.pdf", +): + """Parse a form""" + + client = documentai.DocumentUnderstandingServiceClient() + + gcs_source = documentai.types.GcsSource(uri=input_uri) + + # mime_type can be application/pdf, image/tiff, + # and image/gif, or application/json + input_config = documentai.types.InputConfig( + gcs_source=gcs_source, mime_type="application/pdf" + ) + + # Improve form parsing results by providing key-value pair hints. + # For each key hint, key is text that is likely to appear in the + # document as a form field name (i.e. "DOB"). + # Value types are optional, but can be one or more of: + # ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER, ID, + # NUMBER, EMAIL, PRICE, TERMS, DATE, NAME + key_value_pair_hints = [ + documentai.types.KeyValuePairHint( + key="Emergency Contact", value_types=["NAME"] + ), + documentai.types.KeyValuePairHint(key="Referred By"), + ] + + # Setting enabled=True enables form extraction + form_extraction_params = documentai.types.FormExtractionParams( + enabled=True, key_value_pair_hints=key_value_pair_hints + ) + + # Location can be 'us' or 'eu' + parent = "projects/{}/locations/us".format(project_id) + request = documentai.types.ProcessDocumentRequest( + parent=parent, + input_config=input_config, + form_extraction_params=form_extraction_params, + ) + + document = client.process_document(request=request) + + def _get_text(el): + """Doc AI identifies form fields by their offsets + in document text. This function converts offsets + to text snippets. + """ + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in el.text_anchor.text_segments: + start_index = segment.start_index + end_index = segment.end_index + response += document.text[start_index:end_index] + return response + + for page in document.pages: + print("Page number: {}".format(page.page_number)) + for form_field in page.form_fields: + print( + "Field Name: {}\tConfidence: {}".format( + _get_text(form_field.field_name), form_field.field_name.confidence + ) + ) + print( + "Field Value: {}\tConfidence: {}".format( + _get_text(form_field.field_value), form_field.field_value.confidence + ) + ) + + +# [END documentai_parse_form_beta] diff --git a/samples/snippets/parse_form_v1beta2_test.py b/samples/snippets/parse_form_v1beta2_test.py new file mode 100644 index 00000000..6987612a --- /dev/null +++ b/samples/snippets/parse_form_v1beta2_test.py @@ -0,0 +1,28 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific ladnguage governing permissions and +# limitations under the License. + +import os + +from samples.snippets import parse_form_v1beta2 + + +PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"] +INPUT_URI = "gs://cloud-samples-data/documentai/form.pdf" + + +def test_parse_form(capsys): + parse_form_v1beta2.parse_form(PROJECT_ID, INPUT_URI) + out, _ = capsys.readouterr() + assert "Field Name" in out + assert "Field Value" in out diff --git a/samples/snippets/parse_table_v1beta2.py b/samples/snippets/parse_table_v1beta2.py new file mode 100644 index 00000000..ac8f5d11 --- /dev/null +++ b/samples/snippets/parse_table_v1beta2.py @@ -0,0 +1,95 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START documentai_parse_table_beta] +from google.cloud import documentai_v1beta2 as documentai + + +def parse_table( + project_id="YOUR_PROJECT_ID", + input_uri="gs://cloud-samples-data/documentai/invoice.pdf", +): + """Parse a form""" + + client = documentai.DocumentUnderstandingServiceClient() + + gcs_source = documentai.types.GcsSource(uri=input_uri) + + # mime_type can be application/pdf, image/tiff, + # and image/gif, or application/json + input_config = documentai.types.InputConfig( + gcs_source=gcs_source, mime_type="application/pdf" + ) + + # Improve table parsing results by providing bounding boxes + # specifying where the box appears in the document (optional) + table_bound_hints = [ + documentai.types.TableBoundHint( + page_number=1, + bounding_box=documentai.types.BoundingPoly( + # Define a polygon around tables to detect + # Each vertice coordinate must be a number between 0 and 1 + normalized_vertices=[ + # Top left + documentai.types.geometry.NormalizedVertex(x=0, y=0), + # Top right + documentai.types.geometry.NormalizedVertex(x=1, y=0), + # Bottom right + documentai.types.geometry.NormalizedVertex(x=1, y=1), + # Bottom left + documentai.types.geometry.NormalizedVertex(x=0, y=1), + ] + ), + ) + ] + + # Setting enabled=True enables form extraction + table_extraction_params = documentai.types.TableExtractionParams( + enabled=True, table_bound_hints=table_bound_hints + ) + + # Location can be 'us' or 'eu' + parent = "projects/{}/locations/us".format(project_id) + request = documentai.types.ProcessDocumentRequest( + parent=parent, + input_config=input_config, + table_extraction_params=table_extraction_params, + ) + + document = client.process_document(request=request) + + def _get_text(el): + """Convert text offset indexes into text snippets.""" + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in el.text_anchor.text_segments: + start_index = segment.start_index + end_index = segment.end_index + response += document.text[start_index:end_index] + return response + + for page in document.pages: + print("Page number: {}".format(page.page_number)) + for table_num, table in enumerate(page.tables): + print("Table {}: ".format(table_num)) + for row_num, row in enumerate(table.header_rows): + cells = "\t".join([_get_text(cell.layout) for cell in row.cells]) + print("Header Row {}: {}".format(row_num, cells)) + for row_num, row in enumerate(table.body_rows): + cells = "\t".join([_get_text(cell.layout) for cell in row.cells]) + print("Row {}: {}".format(row_num, cells)) + + +# [END documentai_parse_table_beta] diff --git a/samples/snippets/parse_table_v1beta2_test.py b/samples/snippets/parse_table_v1beta2_test.py new file mode 100644 index 00000000..4102c926 --- /dev/null +++ b/samples/snippets/parse_table_v1beta2_test.py @@ -0,0 +1,28 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific ladnguage governing permissions and +# limitations under the License. + +import os + +from samples.snippets import parse_table_v1beta2 + + +PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"] +INPUT_URI = "gs://cloud-samples-data/documentai/invoice.pdf" + + +def test_parse_table(capsys): + parse_table_v1beta2.parse_table(PROJECT_ID, INPUT_URI) + out, _ = capsys.readouterr() + assert "Table" in out + assert "Header Row" in out diff --git a/samples/snippets/parse_with_model_v1beta2.py b/samples/snippets/parse_with_model_v1beta2.py new file mode 100644 index 00000000..59265c4f --- /dev/null +++ b/samples/snippets/parse_with_model_v1beta2.py @@ -0,0 +1,60 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_parse_with_model_beta] +from google.cloud import documentai_v1beta2 as documentai + + +def parse_with_model( + project_id="YOUR_PROJECT_ID", + input_uri="gs://cloud-samples-data/documentai/invoice.pdf", + automl_model_name="YOUR_AUTOML_MODEL_NAME", +): + """Process a single document with the Document AI API. + + Args: + project_id: your Google Cloud project id + input_uri: the Cloud Storage URI of your input PDF + automl_model_name: the AutoML model name formatted as: + `projects/[PROJECT_ID]/locations/[LOCATION]/models/[MODEL_ID] + where LOCATION is a Compute Engine region, e.g. `us-central1` + """ + + client = documentai.DocumentUnderstandingServiceClient() + + gcs_source = documentai.types.GcsSource(uri=input_uri) + + # mime_type can be application/pdf, image/tiff, + # and image/gif, or application/json + input_config = documentai.types.InputConfig( + gcs_source=gcs_source, mime_type="application/pdf" + ) + + automl_params = documentai.types.AutoMlParams(model=automl_model_name) + + # Location can be 'us' or 'eu' + parent = "projects/{}/locations/us".format(project_id) + request = documentai.types.ProcessDocumentRequest( + parent=parent, input_config=input_config, automl_params=automl_params + ) + + document = client.process_document(request=request) + + for label in document.labels: + print("Label detected: {}".format(label.name)) + print("Confidence: {}".format(label.confidence)) + + +# [END documentai_parse_with_model_beta] diff --git a/samples/snippets/parse_with_model_v1beta2_test.py b/samples/snippets/parse_with_model_v1beta2_test.py new file mode 100644 index 00000000..4b5d3ca5 --- /dev/null +++ b/samples/snippets/parse_with_model_v1beta2_test.py @@ -0,0 +1,36 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific ladnguage governing permissions and +# limitations under the License. + +import os + +from samples.snippets import parse_with_model_v1beta2 + + +PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"] +INPUT_URI = "gs://cloud-samples-data/documentai/invoice.pdf" +AUTOML_NL_MODEL_ID = "TCN3472481026502981088" + +if "AUTOML_NL_MODEL_ID" in os.environ: + AUTOML_NL_MODEL_ID = os.environ["AUTOML_NL_MODEL_ID"] + +MODEL_NAME = "projects/{}/locations/us-central1/models/{}".format( + PROJECT_ID, AUTOML_NL_MODEL_ID +) + + +def test_parse_with_model(capsys): + parse_with_model_v1beta2.parse_with_model(PROJECT_ID, INPUT_URI, MODEL_NAME) + out, _ = capsys.readouterr() + assert "Label detected" in out + assert "Confidence" in out diff --git a/samples/snippets/process_document_sample_v1beta3.py b/samples/snippets/process_document_sample_v1beta3.py new file mode 100644 index 00000000..ab69d073 --- /dev/null +++ b/samples/snippets/process_document_sample_v1beta3.py @@ -0,0 +1,92 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# [START documentai_process_document] + +# TODO(developer): Uncomment these variables before running the sample. +# project_id= 'YOUR_PROJECT_ID' +# location = 'YOUR_PROJECT_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console +# file_path = '/path/to/local/pdf' + + +def process_document_sample( + project_id: str, location: str, processor_id: str, file_path: str +): + from google.cloud import documentai_v1beta3 as documentai + + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = {} + if location == "eu": + opts = {"api_endpoint": "eu-documentai.googleapis.com"} + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project-id/locations/location/processor/processor-id + # You must create new processors in the Cloud Console first + name = f"projects/{project_id}/locations/{location}/processors/{processor_id}" + + with open(file_path, "rb") as image: + image_content = image.read() + + # Read the file into memory + document = {"content": image_content, "mime_type": "application/pdf"} + + # Configure the process request + request = {"name": name, "document": document} + + # Recognizes text entities in the PDF document + result = client.process_document(request=request) + + document = result.document + + print("Document processing complete.") + + # For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document + + document_pages = document.pages + + # Read the text recognition output from the processor + print("The document contains the following paragraphs:") + for page in document_pages: + paragraphs = page.paragraphs + for paragraph in paragraphs: + paragraph_text = get_text(paragraph.layout, document) + print(f"Paragraph text: {paragraph_text}") + + +# Extract shards from the text field +def get_text(doc_element: dict, document: dict): + """ + Document AI identifies form fields by their offsets + in document text. This function converts offsets + to text snippets. + """ + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in doc_element.text_anchor.text_segments: + start_index = ( + int(segment.start_index) + if segment in doc_element.text_anchor.text_segments + else 0 + ) + end_index = int(segment.end_index) + response += document.text[start_index:end_index] + return response + + +# [END documentai_process_document] diff --git a/samples/snippets/process_document_sample_v1beta3_test.py b/samples/snippets/process_document_sample_v1beta3_test.py new file mode 100644 index 00000000..58b11b22 --- /dev/null +++ b/samples/snippets/process_document_sample_v1beta3_test.py @@ -0,0 +1,37 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import process_document_sample_v1beta3 + + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +file_path = "resources/invoice.pdf" + + +def test_process_documents(capsys): + process_document_sample_v1beta3.process_document_sample( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + ) + out, _ = capsys.readouterr() + + assert "Paragraph" in out + assert "Invoice" in out diff --git a/samples/snippets/quickstart_sample_v1beta3.py b/samples/snippets/quickstart_sample_v1beta3.py new file mode 100644 index 00000000..884b412c --- /dev/null +++ b/samples/snippets/quickstart_sample_v1beta3.py @@ -0,0 +1,87 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from google.cloud import documentai_v1beta3 as documentai + +# [START documentai_quickstart] + +# TODO(developer): Uncomment these variables before running the sample. +# project_id= 'YOUR_PROJECT_ID' +# location = 'YOUR_PROJECT_LOCATION' # Format is 'us' or 'eu' +# processor_id = 'YOUR_PROCESSOR_ID' # Create processor in Cloud Console +# file_path = '/path/to/local/pdf' + + +def quickstart(project_id: str, location: str, processor_id: str, file_path: str): + + # You must set the api_endpoint if you use a location other than 'us', e.g.: + opts = {} + if location == "eu": + opts = {"api_endpoint": "eu-documentai.googleapis.com"} + + client = documentai.DocumentProcessorServiceClient(client_options=opts) + + # The full resource name of the processor, e.g.: + # projects/project-id/locations/location/processor/processor-id + # You must create new processors in the Cloud Console first + name = f"projects/{project_id}/locations/{location}/processors/{processor_id}" + + # Read the file into memory + with open(file_path, "rb") as image: + image_content = image.read() + + document = {"content": image_content, "mime_type": "application/pdf"} + + # Configure the process request + request = {"name": name, "document": document} + + result = client.process_document(request=request) + document = result.document + + document_pages = document.pages + + # For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document + + # Read the text recognition output from the processor + print("The document contains the following paragraphs:") + for page in document_pages: + paragraphs = page.paragraphs + for paragraph in paragraphs: + print(paragraph) + paragraph_text = get_text(paragraph.layout, document) + print(f"Paragraph text: {paragraph_text}") + + +def get_text(doc_element: dict, document: dict): + """ + Document AI identifies form fields by their offsets + in document text. This function converts offsets + to text snippets. + """ + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in doc_element.text_anchor.text_segments: + start_index = ( + int(segment.start_index) + if segment in doc_element.text_anchor.text_segments + else 0 + ) + end_index = int(segment.end_index) + response += document.text[start_index:end_index] + return response + + +# [END documentai_quickstart] diff --git a/samples/snippets/quickstart_sample_v1beta3_test.py b/samples/snippets/quickstart_sample_v1beta3_test.py new file mode 100644 index 00000000..4badc1f7 --- /dev/null +++ b/samples/snippets/quickstart_sample_v1beta3_test.py @@ -0,0 +1,36 @@ +# # Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os + +from samples.snippets import quickstart_sample_v1beta3 + +location = "us" +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +processor_id = "90484cfdedb024f6" +file_path = "resources/invoice.pdf" + + +def test_quickstart(capsys): + quickstart_sample_v1beta3.quickstart( + project_id=project_id, + location=location, + processor_id=processor_id, + file_path=file_path, + ) + out, _ = capsys.readouterr() + + assert "Paragraph" in out + assert "Invoice" in out diff --git a/samples/snippets/quickstart_v1beta2.py b/samples/snippets/quickstart_v1beta2.py new file mode 100644 index 00000000..34f58820 --- /dev/null +++ b/samples/snippets/quickstart_v1beta2.py @@ -0,0 +1,65 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START documentai_quickstart_beta] +from google.cloud import documentai_v1beta2 as documentai + + +def main( + project_id="YOUR_PROJECT_ID", + input_uri="gs://cloud-samples-data/documentai/invoice.pdf", +): + """Process a single document with the Document AI API, including + text extraction and entity extraction.""" + + client = documentai.DocumentUnderstandingServiceClient() + + gcs_source = documentai.types.GcsSource(uri=input_uri) + + # mime_type can be application/pdf, image/tiff, + # and image/gif, or application/json + input_config = documentai.types.InputConfig( + gcs_source=gcs_source, mime_type="application/pdf" + ) + + # Location can be 'us' or 'eu' + parent = "projects/{}/locations/us".format(project_id) + request = documentai.types.ProcessDocumentRequest( + parent=parent, input_config=input_config + ) + + document = client.process_document(request=request) + + # All text extracted from the document + print("Document Text: {}".format(document.text)) + + def _get_text(el): + """Convert text offset indexes into text snippets.""" + response = "" + # If a text segment spans several lines, it will + # be stored in different text segments. + for segment in el.text_anchor.text_segments: + start_index = segment.start_index + end_index = segment.end_index + response += document.text[start_index:end_index] + return response + + for entity in document.entities: + print("Entity type: {}".format(entity.type_)) + print("Text: {}".format(_get_text(entity))) + print("Mention text: {}\n".format(entity.mention_text)) + + +# [END documentai_quickstart_beta] diff --git a/samples/snippets/quickstart_v1beta2_test.py b/samples/snippets/quickstart_v1beta2_test.py new file mode 100644 index 00000000..1868788d --- /dev/null +++ b/samples/snippets/quickstart_v1beta2_test.py @@ -0,0 +1,28 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific ladnguage governing permissions and +# limitations under the License. + +import os + +from samples.snippets import quickstart_v1beta2 + + +PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"] +INPUT_URI = "gs://cloud-samples-data/documentai/invoice.pdf" + + +def test_quickstart(capsys): + quickstart_v1beta2.main(PROJECT_ID, INPUT_URI) + out, _ = capsys.readouterr() + assert "Entity type" in out + assert "Mention text" in out diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt new file mode 100644 index 00000000..be53becf --- /dev/null +++ b/samples/snippets/requirements-test.txt @@ -0,0 +1 @@ +pytest==6.1.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt new file mode 100644 index 00000000..ce6670de --- /dev/null +++ b/samples/snippets/requirements.txt @@ -0,0 +1,2 @@ +google-cloud-documentai==0.3.0 +google-cloud-storage==1.36.2 diff --git a/samples/snippets/resources/invoice.pdf b/samples/snippets/resources/invoice.pdf new file mode 100644 index 00000000..7722734a Binary files /dev/null and b/samples/snippets/resources/invoice.pdf differ diff --git a/samples/snippets/set_endpoint_v1beta2.py b/samples/snippets/set_endpoint_v1beta2.py new file mode 100644 index 00000000..0fa9921b --- /dev/null +++ b/samples/snippets/set_endpoint_v1beta2.py @@ -0,0 +1,48 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def set_endpoint( + project_id="YOUR_PROJECT_ID", + input_uri="gs://cloud-samples-data/documentai/invoice.pdf", +): + """Process a single document with the Document AI API, including + text extraction and entity extraction.""" + + # [START documentai_set_endpoint_beta] + from google.cloud import documentai_v1beta2 as documentai + + client = documentai.DocumentUnderstandingServiceClient( + client_options={"api_endpoint": "eu-documentai.googleapis.com"} + ) + # [END documentai_set_endpoint_beta] + + gcs_source = documentai.types.GcsSource(uri=input_uri) + + # mime_type can be application/pdf, image/tiff, + # and image/gif, or application/json + input_config = documentai.types.InputConfig( + gcs_source=gcs_source, mime_type="application/pdf" + ) + + # Location can be 'us' or 'eu' + parent = "projects/{}/locations/eu".format(project_id) + request = documentai.types.ProcessDocumentRequest( + parent=parent, input_config=input_config + ) + + document = client.process_document(request=request) + + # All text extracted from the document + print("Document Text: {}".format(document.text)) diff --git a/samples/snippets/set_endpoint_v1beta2_test.py b/samples/snippets/set_endpoint_v1beta2_test.py new file mode 100644 index 00000000..be535a28 --- /dev/null +++ b/samples/snippets/set_endpoint_v1beta2_test.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific ladnguage governing permissions and +# limitations under the License. + +import os + +from samples.snippets import set_endpoint_v1beta2 + + +PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"] +INPUT_URI = "gs://cloud-samples-data/documentai/invoice.pdf" + + +def test_set_endpoint(capsys): + set_endpoint_v1beta2.set_endpoint(PROJECT_ID, INPUT_URI) + out, _ = capsys.readouterr() + assert "Document Text" in out diff --git a/scripts/fixup_documentai_v1beta2_keywords.py b/scripts/fixup_documentai_v1beta2_keywords.py deleted file mode 100644 index 0cb9fcbf..00000000 --- a/scripts/fixup_documentai_v1beta2_keywords.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import libcst as cst -import pathlib -import sys -from typing import (Any, Callable, Dict, List, Sequence, Tuple) - - -def partition( - predicate: Callable[[Any], bool], - iterator: Sequence[Any] -) -> Tuple[List[Any], List[Any]]: - """A stable, out-of-place partition.""" - results = ([], []) - - for i in iterator: - results[int(predicate(i))].append(i) - - # Returns trueList, falseList - return results[1], results[0] - - -class documentaiCallTransformer(cst.CSTTransformer): - CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') - METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { - 'batch_process_documents': ('requests', 'parent', ), - 'process_document': ('input_config', 'parent', 'output_config', 'document_type', 'table_extraction_params', 'form_extraction_params', 'entity_extraction_params', 'ocr_params', 'automl_params', ), - - } - - def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: - try: - key = original.func.attr.value - kword_params = self.METHOD_TO_PARAMS[key] - except (AttributeError, KeyError): - # Either not a method from the API or too convoluted to be sure. - return updated - - # If the existing code is valid, keyword args come after positional args. - # Therefore, all positional args must map to the first parameters. - args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) - if any(k.keyword.value == "request" for k in kwargs): - # We've already fixed this file, don't fix it again. - return updated - - kwargs, ctrl_kwargs = partition( - lambda a: not a.keyword.value in self.CTRL_PARAMS, - kwargs - ) - - args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] - ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) - for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) - - request_arg = cst.Arg( - value=cst.Dict([ - cst.DictElement( - cst.SimpleString("'{}'".format(name)), - cst.Element(value=arg.value) - ) - # Note: the args + kwargs looks silly, but keep in mind that - # the control parameters had to be stripped out, and that - # those could have been passed positionally or by keyword. - for name, arg in zip(kword_params, args + kwargs)]), - keyword=cst.Name("request") - ) - - return updated.with_changes( - args=[request_arg] + ctrl_kwargs - ) - - -def fix_files( - in_dir: pathlib.Path, - out_dir: pathlib.Path, - *, - transformer=documentaiCallTransformer(), -): - """Duplicate the input dir to the output dir, fixing file method calls. - - Preconditions: - * in_dir is a real directory - * out_dir is a real, empty directory - """ - pyfile_gen = ( - pathlib.Path(os.path.join(root, f)) - for root, _, files in os.walk(in_dir) - for f in files if os.path.splitext(f)[1] == ".py" - ) - - for fpath in pyfile_gen: - with open(fpath, 'r') as f: - src = f.read() - - # Parse the code and insert method call fixes. - tree = cst.parse_module(src) - updated = tree.visit(transformer) - - # Create the path and directory structure for the new file. - updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) - updated_path.parent.mkdir(parents=True, exist_ok=True) - - # Generate the updated source file at the corresponding path. - with open(updated_path, 'w') as f: - f.write(updated.code) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="""Fix up source that uses the documentai client library. - -The existing sources are NOT overwritten but are copied to output_dir with changes made. - -Note: This tool operates at a best-effort level at converting positional - parameters in client method calls to keyword based parameters. - Cases where it WILL FAIL include - A) * or ** expansion in a method call. - B) Calls via function or method alias (includes free function calls) - C) Indirect or dispatched calls (e.g. the method is looked up dynamically) - - These all constitute false negatives. The tool will also detect false - positives when an API method shares a name with another method. -""") - parser.add_argument( - '-d', - '--input-directory', - required=True, - dest='input_dir', - help='the input directory to walk for python files to fix up', - ) - parser.add_argument( - '-o', - '--output-directory', - required=True, - dest='output_dir', - help='the directory to output files fixed via un-flattening', - ) - args = parser.parse_args() - input_dir = pathlib.Path(args.input_dir) - output_dir = pathlib.Path(args.output_dir) - if not input_dir.is_dir(): - print( - f"input directory '{input_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if not output_dir.is_dir(): - print( - f"output directory '{output_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if os.listdir(output_dir): - print( - f"output directory '{output_dir}' is not empty", - file=sys.stderr, - ) - sys.exit(-1) - - fix_files(input_dir, output_dir) diff --git a/scripts/fixup_documentai_v1beta3_keywords.py b/scripts/fixup_documentai_v1beta3_keywords.py deleted file mode 100644 index 2b689522..00000000 --- a/scripts/fixup_documentai_v1beta3_keywords.py +++ /dev/null @@ -1,180 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import libcst as cst -import pathlib -import sys -from typing import (Any, Callable, Dict, List, Sequence, Tuple) - - -def partition( - predicate: Callable[[Any], bool], - iterator: Sequence[Any] -) -> Tuple[List[Any], List[Any]]: - """A stable, out-of-place partition.""" - results = ([], []) - - for i in iterator: - results[int(predicate(i))].append(i) - - # Returns trueList, falseList - return results[1], results[0] - - -class documentaiCallTransformer(cst.CSTTransformer): - CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') - METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { - 'batch_process_documents': ('name', 'input_configs', 'output_config', ), - 'process_document': ('name', 'document', 'skip_human_review', ), - 'review_document': ('human_review_config', 'document', ), - - } - - def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: - try: - key = original.func.attr.value - kword_params = self.METHOD_TO_PARAMS[key] - except (AttributeError, KeyError): - # Either not a method from the API or too convoluted to be sure. - return updated - - # If the existing code is valid, keyword args come after positional args. - # Therefore, all positional args must map to the first parameters. - args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) - if any(k.keyword.value == "request" for k in kwargs): - # We've already fixed this file, don't fix it again. - return updated - - kwargs, ctrl_kwargs = partition( - lambda a: not a.keyword.value in self.CTRL_PARAMS, - kwargs - ) - - args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] - ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) - for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) - - request_arg = cst.Arg( - value=cst.Dict([ - cst.DictElement( - cst.SimpleString("'{}'".format(name)), - cst.Element(value=arg.value) - ) - # Note: the args + kwargs looks silly, but keep in mind that - # the control parameters had to be stripped out, and that - # those could have been passed positionally or by keyword. - for name, arg in zip(kword_params, args + kwargs)]), - keyword=cst.Name("request") - ) - - return updated.with_changes( - args=[request_arg] + ctrl_kwargs - ) - - -def fix_files( - in_dir: pathlib.Path, - out_dir: pathlib.Path, - *, - transformer=documentaiCallTransformer(), -): - """Duplicate the input dir to the output dir, fixing file method calls. - - Preconditions: - * in_dir is a real directory - * out_dir is a real, empty directory - """ - pyfile_gen = ( - pathlib.Path(os.path.join(root, f)) - for root, _, files in os.walk(in_dir) - for f in files if os.path.splitext(f)[1] == ".py" - ) - - for fpath in pyfile_gen: - with open(fpath, 'r') as f: - src = f.read() - - # Parse the code and insert method call fixes. - tree = cst.parse_module(src) - updated = tree.visit(transformer) - - # Create the path and directory structure for the new file. - updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) - updated_path.parent.mkdir(parents=True, exist_ok=True) - - # Generate the updated source file at the corresponding path. - with open(updated_path, 'w') as f: - f.write(updated.code) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="""Fix up source that uses the documentai client library. - -The existing sources are NOT overwritten but are copied to output_dir with changes made. - -Note: This tool operates at a best-effort level at converting positional - parameters in client method calls to keyword based parameters. - Cases where it WILL FAIL include - A) * or ** expansion in a method call. - B) Calls via function or method alias (includes free function calls) - C) Indirect or dispatched calls (e.g. the method is looked up dynamically) - - These all constitute false negatives. The tool will also detect false - positives when an API method shares a name with another method. -""") - parser.add_argument( - '-d', - '--input-directory', - required=True, - dest='input_dir', - help='the input directory to walk for python files to fix up', - ) - parser.add_argument( - '-o', - '--output-directory', - required=True, - dest='output_dir', - help='the directory to output files fixed via un-flattening', - ) - args = parser.parse_args() - input_dir = pathlib.Path(args.input_dir) - output_dir = pathlib.Path(args.output_dir) - if not input_dir.is_dir(): - print( - f"input directory '{input_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if not output_dir.is_dir(): - print( - f"output directory '{output_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if os.listdir(output_dir): - print( - f"output directory '{output_dir}' is not empty", - file=sys.stderr, - ) - sys.exit(-1) - - fix_files(input_dir, output_dir) diff --git a/scripts/fixup_keywords.py b/scripts/fixup_keywords.py deleted file mode 100644 index 18b62b1c..00000000 --- a/scripts/fixup_keywords.py +++ /dev/null @@ -1,178 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import libcst as cst -import pathlib -import sys -from typing import (Any, Callable, Dict, List, Sequence, Tuple) - - -def partition( - predicate: Callable[[Any], bool], - iterator: Sequence[Any] -) -> Tuple[List[Any], List[Any]]: - """A stable, out-of-place partition.""" - results = ([], []) - - for i in iterator: - results[int(predicate(i))].append(i) - - # Returns trueList, falseList - return results[1], results[0] - - -class documentaiCallTransformer(cst.CSTTransformer): - CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') - METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { - 'batch_process_documents': ('requests', 'parent', ), - 'process_document': ('input_config', 'parent', 'output_config', 'document_type', 'table_extraction_params', 'form_extraction_params', 'entity_extraction_params', 'ocr_params', 'automl_params', ), - } - - def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: - try: - key = original.func.attr.value - kword_params = self.METHOD_TO_PARAMS[key] - except (AttributeError, KeyError): - # Either not a method from the API or too convoluted to be sure. - return updated - - # If the existing code is valid, keyword args come after positional args. - # Therefore, all positional args must map to the first parameters. - args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) - if any(k.keyword.value == "request" for k in kwargs): - # We've already fixed this file, don't fix it again. - return updated - - kwargs, ctrl_kwargs = partition( - lambda a: not a.keyword.value in self.CTRL_PARAMS, - kwargs - ) - - args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] - ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) - for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) - - request_arg = cst.Arg( - value=cst.Dict([ - cst.DictElement( - cst.SimpleString("'{}'".format(name)), - cst.Element(value=arg.value) - ) - # Note: the args + kwargs looks silly, but keep in mind that - # the control parameters had to be stripped out, and that - # those could have been passed positionally or by keyword. - for name, arg in zip(kword_params, args + kwargs)]), - keyword=cst.Name("request") - ) - - return updated.with_changes( - args=[request_arg] + ctrl_kwargs - ) - - -def fix_files( - in_dir: pathlib.Path, - out_dir: pathlib.Path, - *, - transformer=documentaiCallTransformer(), -): - """Duplicate the input dir to the output dir, fixing file method calls. - - Preconditions: - * in_dir is a real directory - * out_dir is a real, empty directory - """ - pyfile_gen = ( - pathlib.Path(os.path.join(root, f)) - for root, _, files in os.walk(in_dir) - for f in files if os.path.splitext(f)[1] == ".py" - ) - - for fpath in pyfile_gen: - with open(fpath, 'r') as f: - src = f.read() - - # Parse the code and insert method call fixes. - tree = cst.parse_module(src) - updated = tree.visit(transformer) - - # Create the path and directory structure for the new file. - updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) - updated_path.parent.mkdir(parents=True, exist_ok=True) - - # Generate the updated source file at the corresponding path. - with open(updated_path, 'w') as f: - f.write(updated.code) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="""Fix up source that uses the documentai client library. - -The existing sources are NOT overwritten but are copied to output_dir with changes made. - -Note: This tool operates at a best-effort level at converting positional - parameters in client method calls to keyword based parameters. - Cases where it WILL FAIL include - A) * or ** expansion in a method call. - B) Calls via function or method alias (includes free function calls) - C) Indirect or dispatched calls (e.g. the method is looked up dynamically) - - These all constitute false negatives. The tool will also detect false - positives when an API method shares a name with another method. -""") - parser.add_argument( - '-d', - '--input-directory', - required=True, - dest='input_dir', - help='the input directory to walk for python files to fix up', - ) - parser.add_argument( - '-o', - '--output-directory', - required=True, - dest='output_dir', - help='the directory to output files fixed via un-flattening', - ) - args = parser.parse_args() - input_dir = pathlib.Path(args.input_dir) - output_dir = pathlib.Path(args.output_dir) - if not input_dir.is_dir(): - print( - f"input directory '{input_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if not output_dir.is_dir(): - print( - f"output directory '{output_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if os.listdir(output_dir): - print( - f"output directory '{output_dir}' is not empty", - file=sys.stderr, - ) - sys.exit(-1) - - fix_files(input_dir, output_dir) diff --git a/setup.py b/setup.py index 90005d59..4cba6749 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ import os import setuptools # type: ignore -version = "0.3.0" +version = "0.4.0" package_root = os.path.abspath(os.path.dirname(__file__)) @@ -41,12 +41,11 @@ platforms="Posix; MacOS X; Windows", include_package_data=True, install_requires=( - "google-api-core[grpc] >= 1.22.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", "proto-plus >= 1.10.0", ), python_requires=">=3.6", setup_requires=["libcst >= 0.2.5"], - scripts=["scripts/fixup_keywords.py"], classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", diff --git a/synth.metadata b/synth.metadata index 3a7962ec..1d63eab3 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,21 +4,29 @@ "git": { "name": ".", "remote": "git@github.com:googleapis/python-documentai", - "sha": "ec70a8cec0f1fbd0f8ec18189139e632ec28b025" + "sha": "9fd02a6b9ba34a6762a762f12de9948daf1ea9bb" + } + }, + { + "git": { + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "551ddbb55b96147012c00b66250dd5907556807c", + "internalRef": "364734171" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "e6168630be3e31eede633ba2c6f1cd64248dec1c" + "sha": "7a3df8832c7c64c482874c5dbebfd0a732b4938b" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "e6168630be3e31eede633ba2c6f1cd64248dec1c" + "sha": "7a3df8832c7c64c482874c5dbebfd0a732b4938b" } } ], @@ -40,6 +48,15 @@ "language": "python", "generator": "bazel" } + }, + { + "client": { + "source": "googleapis", + "apiName": "documentai", + "apiVersion": "v1", + "language": "python", + "generator": "bazel" + } } ] } \ No newline at end of file diff --git a/synth.py b/synth.py index 3ae8445c..34c054d6 100644 --- a/synth.py +++ b/synth.py @@ -25,12 +25,11 @@ gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() +# add the highest stable version to the end +versions = ["v1beta2", "v1beta3", "v1"] # ---------------------------------------------------------------------------- # Generate document AI GAPIC layer # ---------------------------------------------------------------------------- - -versions = ["v1beta2", "v1beta3"] - for version in versions: library = gapic.py_library( service="documentai", @@ -38,22 +37,27 @@ bazel_target=f"//google/cloud/documentai/{version}:documentai-{version}-py", ) - excludes = ["README.rst", "nox.py", "docs/index.rst", "setup.py"] + excludes = [ + "README.rst", + "nox.py", + "docs/index.rst", + "setup.py", + "scripts/fixup_documentai_v*", # this library was always generated with the microgenerator + ] s.move(library, excludes=excludes) # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- templated_files = common.py_library( - cov_level=100, - microgenerator=True, - samples=False, # set to true if there are samples + cov_level=99, microgenerator=True, samples=False, # set to true if there are samples ) + s.move( templated_files, excludes=[".coveragerc"], # microgenerator has a good .coveragerc file -) +) -python.py_samples() +python.py_samples(skip_readmes=True) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt new file mode 100644 index 00000000..69e1c139 --- /dev/null +++ b/testing/constraints-3.6.txt @@ -0,0 +1,9 @@ +# This constraints file is used to check that lower bounds +# are correct in setup.py +# List *all* library dependencies and extras in this file. +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +google-api-core==1.22.2 +proto-plus==1.10.0 diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt new file mode 100644 index 00000000..e69de29b diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/gapic/documentai_v1/__init__.py b/tests/unit/gapic/documentai_v1/__init__.py new file mode 100644 index 00000000..42ffdf2b --- /dev/null +++ b/tests/unit/gapic/documentai_v1/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/unit/gapic/documentai_v1/test_document_processor_service.py b/tests/unit/gapic/documentai_v1/test_document_processor_service.py new file mode 100644 index 00000000..b48bdc60 --- /dev/null +++ b/tests/unit/gapic/documentai_v1/test_document_processor_service.py @@ -0,0 +1,1723 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import mock + +import grpc +from grpc.experimental import aio +import math +import pytest +from proto.marshal.rules.dates import DurationRule, TimestampRule + +from google import auth +from google.api_core import client_options +from google.api_core import exceptions +from google.api_core import future +from google.api_core import gapic_v1 +from google.api_core import grpc_helpers +from google.api_core import grpc_helpers_async +from google.api_core import operation_async # type: ignore +from google.api_core import operations_v1 +from google.auth import credentials +from google.auth.exceptions import MutualTLSChannelError +from google.cloud.documentai_v1.services.document_processor_service import ( + DocumentProcessorServiceAsyncClient, +) +from google.cloud.documentai_v1.services.document_processor_service import ( + DocumentProcessorServiceClient, +) +from google.cloud.documentai_v1.services.document_processor_service import transports +from google.cloud.documentai_v1.types import document +from google.cloud.documentai_v1.types import document_io +from google.cloud.documentai_v1.types import document_processor_service +from google.cloud.documentai_v1.types import geometry +from google.longrunning import operations_pb2 +from google.oauth2 import service_account +from google.protobuf import any_pb2 as gp_any # type: ignore +from google.protobuf import duration_pb2 as duration # type: ignore +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.rpc import status_pb2 as status # type: ignore +from google.type import color_pb2 as color # type: ignore +from google.type import date_pb2 as date # type: ignore +from google.type import datetime_pb2 as datetime # type: ignore +from google.type import money_pb2 as money # type: ignore +from google.type import postal_address_pb2 as postal_address # type: ignore + + +def client_cert_source_callback(): + return b"cert bytes", b"key bytes" + + +# If default endpoint is localhost, then default mtls endpoint will be the same. +# This method modifies the default endpoint so the client can produce a different +# mtls endpoint for endpoint testing purposes. +def modify_default_endpoint(client): + return ( + "foo.googleapis.com" + if ("localhost" in client.DEFAULT_ENDPOINT) + else client.DEFAULT_ENDPOINT + ) + + +def test__get_default_mtls_endpoint(): + api_endpoint = "example.googleapis.com" + api_mtls_endpoint = "example.mtls.googleapis.com" + sandbox_endpoint = "example.sandbox.googleapis.com" + sandbox_mtls_endpoint = "example.mtls.sandbox.googleapis.com" + non_googleapi = "api.example.com" + + assert DocumentProcessorServiceClient._get_default_mtls_endpoint(None) is None + assert ( + DocumentProcessorServiceClient._get_default_mtls_endpoint(api_endpoint) + == api_mtls_endpoint + ) + assert ( + DocumentProcessorServiceClient._get_default_mtls_endpoint(api_mtls_endpoint) + == api_mtls_endpoint + ) + assert ( + DocumentProcessorServiceClient._get_default_mtls_endpoint(sandbox_endpoint) + == sandbox_mtls_endpoint + ) + assert ( + DocumentProcessorServiceClient._get_default_mtls_endpoint(sandbox_mtls_endpoint) + == sandbox_mtls_endpoint + ) + assert ( + DocumentProcessorServiceClient._get_default_mtls_endpoint(non_googleapi) + == non_googleapi + ) + + +@pytest.mark.parametrize( + "client_class", + [DocumentProcessorServiceClient, DocumentProcessorServiceAsyncClient,], +) +def test_document_processor_service_client_from_service_account_info(client_class): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_info" + ) as factory: + factory.return_value = creds + info = {"valid": True} + client = client_class.from_service_account_info(info) + assert client.transport._credentials == creds + assert isinstance(client, client_class) + + assert client.transport._host == "us-documentai.googleapis.com:443" + + +@pytest.mark.parametrize( + "client_class", + [DocumentProcessorServiceClient, DocumentProcessorServiceAsyncClient,], +) +def test_document_processor_service_client_from_service_account_file(client_class): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_file" + ) as factory: + factory.return_value = creds + client = client_class.from_service_account_file("dummy/file/path.json") + assert client.transport._credentials == creds + assert isinstance(client, client_class) + + client = client_class.from_service_account_json("dummy/file/path.json") + assert client.transport._credentials == creds + assert isinstance(client, client_class) + + assert client.transport._host == "us-documentai.googleapis.com:443" + + +def test_document_processor_service_client_get_transport_class(): + transport = DocumentProcessorServiceClient.get_transport_class() + available_transports = [ + transports.DocumentProcessorServiceGrpcTransport, + ] + assert transport in available_transports + + transport = DocumentProcessorServiceClient.get_transport_class("grpc") + assert transport == transports.DocumentProcessorServiceGrpcTransport + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + ( + DocumentProcessorServiceClient, + transports.DocumentProcessorServiceGrpcTransport, + "grpc", + ), + ( + DocumentProcessorServiceAsyncClient, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +@mock.patch.object( + DocumentProcessorServiceClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(DocumentProcessorServiceClient), +) +@mock.patch.object( + DocumentProcessorServiceAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(DocumentProcessorServiceAsyncClient), +) +def test_document_processor_service_client_client_options( + client_class, transport_class, transport_name +): + # Check that if channel is provided we won't create a new one. + with mock.patch.object( + DocumentProcessorServiceClient, "get_transport_class" + ) as gtc: + transport = transport_class(credentials=credentials.AnonymousCredentials()) + client = client_class(transport=transport) + gtc.assert_not_called() + + # Check that if channel is provided via str we will create a new one. + with mock.patch.object( + DocumentProcessorServiceClient, "get_transport_class" + ) as gtc: + client = client_class(transport=transport_name) + gtc.assert_called() + + # Check the case api_endpoint is provided. + options = client_options.ClientOptions(api_endpoint="squid.clam.whelk") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "never". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "never"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT is + # "always". + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "always"}): + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_MTLS_ENDPOINT, + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case api_endpoint is not provided and GOOGLE_API_USE_MTLS_ENDPOINT has + # unsupported value. + with mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "Unsupported"}): + with pytest.raises(MutualTLSChannelError): + client = client_class() + + # Check the case GOOGLE_API_USE_CLIENT_CERTIFICATE has unsupported value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": "Unsupported"} + ): + with pytest.raises(ValueError): + client = client_class() + + # Check the case quota_project_id is provided + options = client_options.ClientOptions(quota_project_id="octopus") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id="octopus", + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name,use_client_cert_env", + [ + ( + DocumentProcessorServiceClient, + transports.DocumentProcessorServiceGrpcTransport, + "grpc", + "true", + ), + ( + DocumentProcessorServiceAsyncClient, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + "grpc_asyncio", + "true", + ), + ( + DocumentProcessorServiceClient, + transports.DocumentProcessorServiceGrpcTransport, + "grpc", + "false", + ), + ( + DocumentProcessorServiceAsyncClient, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + "grpc_asyncio", + "false", + ), + ], +) +@mock.patch.object( + DocumentProcessorServiceClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(DocumentProcessorServiceClient), +) +@mock.patch.object( + DocumentProcessorServiceAsyncClient, + "DEFAULT_ENDPOINT", + modify_default_endpoint(DocumentProcessorServiceAsyncClient), +) +@mock.patch.dict(os.environ, {"GOOGLE_API_USE_MTLS_ENDPOINT": "auto"}) +def test_document_processor_service_client_mtls_env_auto( + client_class, transport_class, transport_name, use_client_cert_env +): + # This tests the endpoint autoswitch behavior. Endpoint is autoswitched to the default + # mtls endpoint, if GOOGLE_API_USE_CLIENT_CERTIFICATE is "true" and client cert exists. + + # Check the case client_cert_source is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + options = client_options.ClientOptions( + client_cert_source=client_cert_source_callback + ) + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + + if use_client_cert_env == "false": + expected_client_cert_source = None + expected_host = client.DEFAULT_ENDPOINT + else: + expected_client_cert_source = client_cert_source_callback + expected_host = client.DEFAULT_MTLS_ENDPOINT + + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + client_cert_source_for_mtls=expected_client_cert_source, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case ADC client cert is provided. Whether client cert is used depends on + # GOOGLE_API_USE_CLIENT_CERTIFICATE value. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.mtls.has_default_client_cert_source", + return_value=True, + ): + with mock.patch( + "google.auth.transport.mtls.default_client_cert_source", + return_value=client_cert_source_callback, + ): + if use_client_cert_env == "false": + expected_host = client.DEFAULT_ENDPOINT + expected_client_cert_source = None + else: + expected_host = client.DEFAULT_MTLS_ENDPOINT + expected_client_cert_source = client_cert_source_callback + + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + client_cert_source_for_mtls=expected_client_cert_source, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + # Check the case client_cert_source and ADC client cert are not provided. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.mtls.has_default_client_cert_source", + return_value=False, + ): + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + ( + DocumentProcessorServiceClient, + transports.DocumentProcessorServiceGrpcTransport, + "grpc", + ), + ( + DocumentProcessorServiceAsyncClient, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_document_processor_service_client_client_options_scopes( + client_class, transport_class, transport_name +): + # Check the case scopes are provided. + options = client_options.ClientOptions(scopes=["1", "2"],) + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=["1", "2"], + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +@pytest.mark.parametrize( + "client_class,transport_class,transport_name", + [ + ( + DocumentProcessorServiceClient, + transports.DocumentProcessorServiceGrpcTransport, + "grpc", + ), + ( + DocumentProcessorServiceAsyncClient, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + "grpc_asyncio", + ), + ], +) +def test_document_processor_service_client_client_options_credentials_file( + client_class, transport_class, transport_name +): + # Check the case credentials file is provided. + options = client_options.ClientOptions(credentials_file="credentials.json") + with mock.patch.object(transport_class, "__init__") as patched: + patched.return_value = None + client = client_class(client_options=options) + patched.assert_called_once_with( + credentials=None, + credentials_file="credentials.json", + host=client.DEFAULT_ENDPOINT, + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_document_processor_service_client_client_options_from_dict(): + with mock.patch( + "google.cloud.documentai_v1.services.document_processor_service.transports.DocumentProcessorServiceGrpcTransport.__init__" + ) as grpc_transport: + grpc_transport.return_value = None + client = DocumentProcessorServiceClient( + client_options={"api_endpoint": "squid.clam.whelk"} + ) + grpc_transport.assert_called_once_with( + credentials=None, + credentials_file=None, + host="squid.clam.whelk", + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + + +def test_process_document( + transport: str = "grpc", request_type=document_processor_service.ProcessRequest +): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = document_processor_service.ProcessResponse() + + response = client.process_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ProcessRequest() + + # Establish that the response is the type that we expect. + + assert isinstance(response, document_processor_service.ProcessResponse) + + +def test_process_document_from_dict(): + test_process_document(request_type=dict) + + +def test_process_document_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + client.process_document() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ProcessRequest() + + +@pytest.mark.asyncio +async def test_process_document_async( + transport: str = "grpc_asyncio", + request_type=document_processor_service.ProcessRequest, +): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + document_processor_service.ProcessResponse() + ) + + response = await client.process_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ProcessRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, document_processor_service.ProcessResponse) + + +@pytest.mark.asyncio +async def test_process_document_async_from_dict(): + await test_process_document_async(request_type=dict) + + +def test_process_document_field_headers(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_processor_service.ProcessRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + call.return_value = document_processor_service.ProcessResponse() + + client.process_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_process_document_field_headers_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_processor_service.ProcessRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + document_processor_service.ProcessResponse() + ) + + await client.process_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +def test_process_document_flattened(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = document_processor_service.ProcessResponse() + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.process_document(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +def test_process_document_flattened_error(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.process_document( + document_processor_service.ProcessRequest(), name="name_value", + ) + + +@pytest.mark.asyncio +async def test_process_document_flattened_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = document_processor_service.ProcessResponse() + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + document_processor_service.ProcessResponse() + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.process_document(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +@pytest.mark.asyncio +async def test_process_document_flattened_error_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.process_document( + document_processor_service.ProcessRequest(), name="name_value", + ) + + +def test_batch_process_documents( + transport: str = "grpc", request_type=document_processor_service.BatchProcessRequest +): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/spam") + + response = client.batch_process_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.BatchProcessRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, future.Future) + + +def test_batch_process_documents_from_dict(): + test_batch_process_documents(request_type=dict) + + +def test_batch_process_documents_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + client.batch_process_documents() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.BatchProcessRequest() + + +@pytest.mark.asyncio +async def test_batch_process_documents_async( + transport: str = "grpc_asyncio", + request_type=document_processor_service.BatchProcessRequest, +): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/spam") + ) + + response = await client.batch_process_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.BatchProcessRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, future.Future) + + +@pytest.mark.asyncio +async def test_batch_process_documents_async_from_dict(): + await test_batch_process_documents_async(request_type=dict) + + +def test_batch_process_documents_field_headers(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_processor_service.BatchProcessRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + call.return_value = operations_pb2.Operation(name="operations/op") + + client.batch_process_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_batch_process_documents_field_headers_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_processor_service.BatchProcessRequest() + request.name = "name/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/op") + ) + + await client.batch_process_documents(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ("x-goog-request-params", "name=name/value",) in kw["metadata"] + + +def test_batch_process_documents_flattened(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/op") + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.batch_process_documents(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +def test_batch_process_documents_flattened_error(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.batch_process_documents( + document_processor_service.BatchProcessRequest(), name="name_value", + ) + + +@pytest.mark.asyncio +async def test_batch_process_documents_flattened_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/op") + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/spam") + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.batch_process_documents(name="name_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].name == "name_value" + + +@pytest.mark.asyncio +async def test_batch_process_documents_flattened_error_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.batch_process_documents( + document_processor_service.BatchProcessRequest(), name="name_value", + ) + + +def test_review_document( + transport: str = "grpc", + request_type=document_processor_service.ReviewDocumentRequest, +): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/spam") + + response = client.review_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ReviewDocumentRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, future.Future) + + +def test_review_document_from_dict(): + test_review_document(request_type=dict) + + +def test_review_document_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + client.review_document() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ReviewDocumentRequest() + + +@pytest.mark.asyncio +async def test_review_document_async( + transport: str = "grpc_asyncio", + request_type=document_processor_service.ReviewDocumentRequest, +): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # Everything is optional in proto3 as far as the runtime is concerned, + # and we are mocking out the actual API, so just send an empty request. + request = request_type() + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/spam") + ) + + response = await client.review_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ReviewDocumentRequest() + + # Establish that the response is the type that we expect. + assert isinstance(response, future.Future) + + +@pytest.mark.asyncio +async def test_review_document_async_from_dict(): + await test_review_document_async(request_type=dict) + + +def test_review_document_field_headers(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_processor_service.ReviewDocumentRequest() + request.human_review_config = "human_review_config/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + call.return_value = operations_pb2.Operation(name="operations/op") + + client.review_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "human_review_config=human_review_config/value", + ) in kw["metadata"] + + +@pytest.mark.asyncio +async def test_review_document_field_headers_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Any value that is part of the HTTP/1.1 URI should be sent as + # a field header. Set these to a non-empty value. + request = document_processor_service.ReviewDocumentRequest() + request.human_review_config = "human_review_config/value" + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/op") + ) + + await client.review_document(request) + + # Establish that the underlying gRPC stub method was called. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + assert args[0] == request + + # Establish that the field header was sent. + _, _, kw = call.mock_calls[0] + assert ( + "x-goog-request-params", + "human_review_config=human_review_config/value", + ) in kw["metadata"] + + +def test_review_document_flattened(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/op") + + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + client.review_document(human_review_config="human_review_config_value",) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) == 1 + _, args, _ = call.mock_calls[0] + + assert args[0].human_review_config == "human_review_config_value" + + +def test_review_document_flattened_error(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + client.review_document( + document_processor_service.ReviewDocumentRequest(), + human_review_config="human_review_config_value", + ) + + +@pytest.mark.asyncio +async def test_review_document_flattened_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + # Designate an appropriate return value for the call. + call.return_value = operations_pb2.Operation(name="operations/op") + + call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( + operations_pb2.Operation(name="operations/spam") + ) + # Call the method with a truthy value for each flattened field, + # using the keyword arguments to the method. + response = await client.review_document( + human_review_config="human_review_config_value", + ) + + # Establish that the underlying call was made with the expected + # request object values. + assert len(call.mock_calls) + _, args, _ = call.mock_calls[0] + + assert args[0].human_review_config == "human_review_config_value" + + +@pytest.mark.asyncio +async def test_review_document_flattened_error_async(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), + ) + + # Attempting to call a method with both a request object and flattened + # fields is an error. + with pytest.raises(ValueError): + await client.review_document( + document_processor_service.ReviewDocumentRequest(), + human_review_config="human_review_config_value", + ) + + +def test_credentials_transport_error(): + # It is an error to provide credentials and a transport instance. + transport = transports.DocumentProcessorServiceGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport=transport, + ) + + # It is an error to provide a credentials file and a transport instance. + transport = transports.DocumentProcessorServiceGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = DocumentProcessorServiceClient( + client_options={"credentials_file": "credentials.json"}, + transport=transport, + ) + + # It is an error to provide scopes and a transport instance. + transport = transports.DocumentProcessorServiceGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + with pytest.raises(ValueError): + client = DocumentProcessorServiceClient( + client_options={"scopes": ["1", "2"]}, transport=transport, + ) + + +def test_transport_instance(): + # A client may be instantiated with a custom transport instance. + transport = transports.DocumentProcessorServiceGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + client = DocumentProcessorServiceClient(transport=transport) + assert client.transport is transport + + +def test_transport_get_channel(): + # A client may be instantiated with a custom transport instance. + transport = transports.DocumentProcessorServiceGrpcTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + transport = transports.DocumentProcessorServiceGrpcAsyncIOTransport( + credentials=credentials.AnonymousCredentials(), + ) + channel = transport.grpc_channel + assert channel + + +@pytest.mark.parametrize( + "transport_class", + [ + transports.DocumentProcessorServiceGrpcTransport, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + ], +) +def test_transport_adc(transport_class): + # Test default credentials are used if not provided. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transport_class() + adc.assert_called_once() + + +def test_transport_grpc_default(): + # A client should use the gRPC transport by default. + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + ) + assert isinstance( + client.transport, transports.DocumentProcessorServiceGrpcTransport, + ) + + +def test_document_processor_service_base_transport_error(): + # Passing both a credentials object and credentials_file should raise an error + with pytest.raises(exceptions.DuplicateCredentialArgs): + transport = transports.DocumentProcessorServiceTransport( + credentials=credentials.AnonymousCredentials(), + credentials_file="credentials.json", + ) + + +def test_document_processor_service_base_transport(): + # Instantiate the base transport. + with mock.patch( + "google.cloud.documentai_v1.services.document_processor_service.transports.DocumentProcessorServiceTransport.__init__" + ) as Transport: + Transport.return_value = None + transport = transports.DocumentProcessorServiceTransport( + credentials=credentials.AnonymousCredentials(), + ) + + # Every method on the transport should just blindly + # raise NotImplementedError. + methods = ( + "process_document", + "batch_process_documents", + "review_document", + ) + for method in methods: + with pytest.raises(NotImplementedError): + getattr(transport, method)(request=object()) + + # Additionally, the LRO client (a property) should + # also raise NotImplementedError + with pytest.raises(NotImplementedError): + transport.operations_client + + +def test_document_processor_service_base_transport_with_credentials_file(): + # Instantiate the base transport with a credentials file + with mock.patch.object( + auth, "load_credentials_from_file" + ) as load_creds, mock.patch( + "google.cloud.documentai_v1.services.document_processor_service.transports.DocumentProcessorServiceTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + load_creds.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.DocumentProcessorServiceTransport( + credentials_file="credentials.json", quota_project_id="octopus", + ) + load_creds.assert_called_once_with( + "credentials.json", + scopes=("https://www.googleapis.com/auth/cloud-platform",), + quota_project_id="octopus", + ) + + +def test_document_processor_service_base_transport_with_adc(): + # Test the default credentials are used if credentials and credentials_file are None. + with mock.patch.object(auth, "default") as adc, mock.patch( + "google.cloud.documentai_v1.services.document_processor_service.transports.DocumentProcessorServiceTransport._prep_wrapped_messages" + ) as Transport: + Transport.return_value = None + adc.return_value = (credentials.AnonymousCredentials(), None) + transport = transports.DocumentProcessorServiceTransport() + adc.assert_called_once() + + +def test_document_processor_service_auth_adc(): + # If no credentials are provided, we should use ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + DocumentProcessorServiceClient() + adc.assert_called_once_with( + scopes=("https://www.googleapis.com/auth/cloud-platform",), + quota_project_id=None, + ) + + +def test_document_processor_service_transport_auth_adc(): + # If credentials and host are not provided, the transport class should use + # ADC credentials. + with mock.patch.object(auth, "default") as adc: + adc.return_value = (credentials.AnonymousCredentials(), None) + transports.DocumentProcessorServiceGrpcTransport( + host="squid.clam.whelk", quota_project_id="octopus" + ) + adc.assert_called_once_with( + scopes=("https://www.googleapis.com/auth/cloud-platform",), + quota_project_id="octopus", + ) + + +@pytest.mark.parametrize( + "transport_class", + [ + transports.DocumentProcessorServiceGrpcTransport, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + ], +) +def test_document_processor_service_grpc_transport_client_cert_source_for_mtls( + transport_class, +): + cred = credentials.AnonymousCredentials() + + # Check ssl_channel_credentials is used if provided. + with mock.patch.object(transport_class, "create_channel") as mock_create_channel: + mock_ssl_channel_creds = mock.Mock() + transport_class( + host="squid.clam.whelk", + credentials=cred, + ssl_channel_credentials=mock_ssl_channel_creds, + ) + mock_create_channel.assert_called_once_with( + "squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=("https://www.googleapis.com/auth/cloud-platform",), + ssl_credentials=mock_ssl_channel_creds, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Check if ssl_channel_credentials is not provided, then client_cert_source_for_mtls + # is used. + with mock.patch.object(transport_class, "create_channel", return_value=mock.Mock()): + with mock.patch("grpc.ssl_channel_credentials") as mock_ssl_cred: + transport_class( + credentials=cred, + client_cert_source_for_mtls=client_cert_source_callback, + ) + expected_cert, expected_key = client_cert_source_callback() + mock_ssl_cred.assert_called_once_with( + certificate_chain=expected_cert, private_key=expected_key + ) + + +def test_document_processor_service_host_no_port(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="us-documentai.googleapis.com" + ), + ) + assert client.transport._host == "us-documentai.googleapis.com:443" + + +def test_document_processor_service_host_with_port(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), + client_options=client_options.ClientOptions( + api_endpoint="us-documentai.googleapis.com:8000" + ), + ) + assert client.transport._host == "us-documentai.googleapis.com:8000" + + +def test_document_processor_service_grpc_transport_channel(): + channel = grpc.secure_channel("http://localhost/", grpc.local_channel_credentials()) + + # Check that channel is used if provided. + transport = transports.DocumentProcessorServiceGrpcTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None + + +def test_document_processor_service_grpc_asyncio_transport_channel(): + channel = aio.secure_channel("http://localhost/", grpc.local_channel_credentials()) + + # Check that channel is used if provided. + transport = transports.DocumentProcessorServiceGrpcAsyncIOTransport( + host="squid.clam.whelk", channel=channel, + ) + assert transport.grpc_channel == channel + assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None + + +# Remove this test when deprecated arguments (api_mtls_endpoint, client_cert_source) are +# removed from grpc/grpc_asyncio transport constructor. +@pytest.mark.parametrize( + "transport_class", + [ + transports.DocumentProcessorServiceGrpcTransport, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + ], +) +def test_document_processor_service_transport_channel_mtls_with_client_cert_source( + transport_class, +): + with mock.patch( + "grpc.ssl_channel_credentials", autospec=True + ) as grpc_ssl_channel_cred: + with mock.patch.object( + transport_class, "create_channel" + ) as grpc_create_channel: + mock_ssl_cred = mock.Mock() + grpc_ssl_channel_cred.return_value = mock_ssl_cred + + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + + cred = credentials.AnonymousCredentials() + with pytest.warns(DeprecationWarning): + with mock.patch.object(auth, "default") as adc: + adc.return_value = (cred, None) + transport = transport_class( + host="squid.clam.whelk", + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=client_cert_source_callback, + ) + adc.assert_called_once() + + grpc_ssl_channel_cred.assert_called_once_with( + certificate_chain=b"cert bytes", private_key=b"key bytes" + ) + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=("https://www.googleapis.com/auth/cloud-platform",), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert transport.grpc_channel == mock_grpc_channel + assert transport._ssl_channel_credentials == mock_ssl_cred + + +# Remove this test when deprecated arguments (api_mtls_endpoint, client_cert_source) are +# removed from grpc/grpc_asyncio transport constructor. +@pytest.mark.parametrize( + "transport_class", + [ + transports.DocumentProcessorServiceGrpcTransport, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + ], +) +def test_document_processor_service_transport_channel_mtls_with_adc(transport_class): + mock_ssl_cred = mock.Mock() + with mock.patch.multiple( + "google.auth.transport.grpc.SslCredentials", + __init__=mock.Mock(return_value=None), + ssl_credentials=mock.PropertyMock(return_value=mock_ssl_cred), + ): + with mock.patch.object( + transport_class, "create_channel" + ) as grpc_create_channel: + mock_grpc_channel = mock.Mock() + grpc_create_channel.return_value = mock_grpc_channel + mock_cred = mock.Mock() + + with pytest.warns(DeprecationWarning): + transport = transport_class( + host="squid.clam.whelk", + credentials=mock_cred, + api_mtls_endpoint="mtls.squid.clam.whelk", + client_cert_source=None, + ) + + grpc_create_channel.assert_called_once_with( + "mtls.squid.clam.whelk:443", + credentials=mock_cred, + credentials_file=None, + scopes=("https://www.googleapis.com/auth/cloud-platform",), + ssl_credentials=mock_ssl_cred, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + assert transport.grpc_channel == mock_grpc_channel + + +def test_document_processor_service_grpc_lro_client(): + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + transport = client.transport + + # Ensure that we have a api-core operations client. + assert isinstance(transport.operations_client, operations_v1.OperationsClient,) + + # Ensure that subsequent calls to the property send the exact same object. + assert transport.operations_client is transport.operations_client + + +def test_document_processor_service_grpc_lro_async_client(): + client = DocumentProcessorServiceAsyncClient( + credentials=credentials.AnonymousCredentials(), transport="grpc_asyncio", + ) + transport = client.transport + + # Ensure that we have a api-core operations client. + assert isinstance(transport.operations_client, operations_v1.OperationsAsyncClient,) + + # Ensure that subsequent calls to the property send the exact same object. + assert transport.operations_client is transport.operations_client + + +def test_human_review_config_path(): + project = "squid" + location = "clam" + processor = "whelk" + + expected = "projects/{project}/locations/{location}/processors/{processor}/humanReviewConfig".format( + project=project, location=location, processor=processor, + ) + actual = DocumentProcessorServiceClient.human_review_config_path( + project, location, processor + ) + assert expected == actual + + +def test_parse_human_review_config_path(): + expected = { + "project": "octopus", + "location": "oyster", + "processor": "nudibranch", + } + path = DocumentProcessorServiceClient.human_review_config_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_human_review_config_path(path) + assert expected == actual + + +def test_processor_path(): + project = "cuttlefish" + location = "mussel" + processor = "winkle" + + expected = "projects/{project}/locations/{location}/processors/{processor}".format( + project=project, location=location, processor=processor, + ) + actual = DocumentProcessorServiceClient.processor_path(project, location, processor) + assert expected == actual + + +def test_parse_processor_path(): + expected = { + "project": "nautilus", + "location": "scallop", + "processor": "abalone", + } + path = DocumentProcessorServiceClient.processor_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_processor_path(path) + assert expected == actual + + +def test_common_billing_account_path(): + billing_account = "squid" + + expected = "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + actual = DocumentProcessorServiceClient.common_billing_account_path(billing_account) + assert expected == actual + + +def test_parse_common_billing_account_path(): + expected = { + "billing_account": "clam", + } + path = DocumentProcessorServiceClient.common_billing_account_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_billing_account_path(path) + assert expected == actual + + +def test_common_folder_path(): + folder = "whelk" + + expected = "folders/{folder}".format(folder=folder,) + actual = DocumentProcessorServiceClient.common_folder_path(folder) + assert expected == actual + + +def test_parse_common_folder_path(): + expected = { + "folder": "octopus", + } + path = DocumentProcessorServiceClient.common_folder_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_folder_path(path) + assert expected == actual + + +def test_common_organization_path(): + organization = "oyster" + + expected = "organizations/{organization}".format(organization=organization,) + actual = DocumentProcessorServiceClient.common_organization_path(organization) + assert expected == actual + + +def test_parse_common_organization_path(): + expected = { + "organization": "nudibranch", + } + path = DocumentProcessorServiceClient.common_organization_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_organization_path(path) + assert expected == actual + + +def test_common_project_path(): + project = "cuttlefish" + + expected = "projects/{project}".format(project=project,) + actual = DocumentProcessorServiceClient.common_project_path(project) + assert expected == actual + + +def test_parse_common_project_path(): + expected = { + "project": "mussel", + } + path = DocumentProcessorServiceClient.common_project_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_project_path(path) + assert expected == actual + + +def test_common_location_path(): + project = "winkle" + location = "nautilus" + + expected = "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + actual = DocumentProcessorServiceClient.common_location_path(project, location) + assert expected == actual + + +def test_parse_common_location_path(): + expected = { + "project": "scallop", + "location": "abalone", + } + path = DocumentProcessorServiceClient.common_location_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_location_path(path) + assert expected == actual + + +def test_client_withDEFAULT_CLIENT_INFO(): + client_info = gapic_v1.client_info.ClientInfo() + + with mock.patch.object( + transports.DocumentProcessorServiceTransport, "_prep_wrapped_messages" + ) as prep: + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) + + with mock.patch.object( + transports.DocumentProcessorServiceTransport, "_prep_wrapped_messages" + ) as prep: + transport_class = DocumentProcessorServiceClient.get_transport_class() + transport = transport_class( + credentials=credentials.AnonymousCredentials(), client_info=client_info, + ) + prep.assert_called_once_with(client_info) diff --git a/tests/unit/gapic/documentai_v1beta2/__init__.py b/tests/unit/gapic/documentai_v1beta2/__init__.py index 8b137891..42ffdf2b 100644 --- a/tests/unit/gapic/documentai_v1beta2/__init__.py +++ b/tests/unit/gapic/documentai_v1beta2/__init__.py @@ -1 +1,16 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/unit/gapic/documentai_v1beta2/test_document_understanding_service.py b/tests/unit/gapic/documentai_v1beta2/test_document_understanding_service.py index 290cb2f2..ffea17d6 100644 --- a/tests/unit/gapic/documentai_v1beta2/test_document_understanding_service.py +++ b/tests/unit/gapic/documentai_v1beta2/test_document_understanding_service.py @@ -101,7 +101,25 @@ def test__get_default_mtls_endpoint(): @pytest.mark.parametrize( "client_class", - [DocumentUnderstandingServiceClient, DocumentUnderstandingServiceAsyncClient], + [DocumentUnderstandingServiceClient, DocumentUnderstandingServiceAsyncClient,], +) +def test_document_understanding_service_client_from_service_account_info(client_class): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_info" + ) as factory: + factory.return_value = creds + info = {"valid": True} + client = client_class.from_service_account_info(info) + assert client.transport._credentials == creds + assert isinstance(client, client_class) + + assert client.transport._host == "us-documentai.googleapis.com:443" + + +@pytest.mark.parametrize( + "client_class", + [DocumentUnderstandingServiceClient, DocumentUnderstandingServiceAsyncClient,], ) def test_document_understanding_service_client_from_service_account_file(client_class): creds = credentials.AnonymousCredentials() @@ -110,17 +128,22 @@ def test_document_understanding_service_client_from_service_account_file(client_ ) as factory: factory.return_value = creds client = client_class.from_service_account_file("dummy/file/path.json") - assert client._transport._credentials == creds + assert client.transport._credentials == creds + assert isinstance(client, client_class) client = client_class.from_service_account_json("dummy/file/path.json") - assert client._transport._credentials == creds + assert client.transport._credentials == creds + assert isinstance(client, client_class) - assert client._transport._host == "us-documentai.googleapis.com:443" + assert client.transport._host == "us-documentai.googleapis.com:443" def test_document_understanding_service_client_get_transport_class(): transport = DocumentUnderstandingServiceClient.get_transport_class() - assert transport == transports.DocumentUnderstandingServiceGrpcTransport + available_transports = [ + transports.DocumentUnderstandingServiceGrpcTransport, + ] + assert transport in available_transports transport = DocumentUnderstandingServiceClient.get_transport_class("grpc") assert transport == transports.DocumentUnderstandingServiceGrpcTransport @@ -179,7 +202,7 @@ def test_document_understanding_service_client_client_options( credentials_file=None, host="squid.clam.whelk", scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -195,7 +218,7 @@ def test_document_understanding_service_client_client_options( credentials_file=None, host=client.DEFAULT_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -211,7 +234,7 @@ def test_document_understanding_service_client_client_options( credentials_file=None, host=client.DEFAULT_MTLS_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -239,7 +262,7 @@ def test_document_understanding_service_client_client_options( credentials_file=None, host=client.DEFAULT_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id="octopus", client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -300,29 +323,25 @@ def test_document_understanding_service_client_mtls_env_auto( client_cert_source=client_cert_source_callback ) with mock.patch.object(transport_class, "__init__") as patched: - ssl_channel_creds = mock.Mock() - with mock.patch( - "grpc.ssl_channel_credentials", return_value=ssl_channel_creds - ): - patched.return_value = None - client = client_class(client_options=options) + patched.return_value = None + client = client_class(client_options=options) - if use_client_cert_env == "false": - expected_ssl_channel_creds = None - expected_host = client.DEFAULT_ENDPOINT - else: - expected_ssl_channel_creds = ssl_channel_creds - expected_host = client.DEFAULT_MTLS_ENDPOINT + if use_client_cert_env == "false": + expected_client_cert_source = None + expected_host = client.DEFAULT_ENDPOINT + else: + expected_client_cert_source = client_cert_source_callback + expected_host = client.DEFAULT_MTLS_ENDPOINT - patched.assert_called_once_with( - credentials=None, - credentials_file=None, - host=expected_host, - scopes=None, - ssl_channel_credentials=expected_ssl_channel_creds, - quota_project_id=None, - client_info=transports.base.DEFAULT_CLIENT_INFO, - ) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + client_cert_source_for_mtls=expected_client_cert_source, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) # Check the case ADC client cert is provided. Whether client cert is used depends on # GOOGLE_API_USE_CLIENT_CERTIFICATE value. @@ -331,66 +350,53 @@ def test_document_understanding_service_client_mtls_env_auto( ): with mock.patch.object(transport_class, "__init__") as patched: with mock.patch( - "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + "google.auth.transport.mtls.has_default_client_cert_source", + return_value=True, ): with mock.patch( - "google.auth.transport.grpc.SslCredentials.is_mtls", - new_callable=mock.PropertyMock, - ) as is_mtls_mock: - with mock.patch( - "google.auth.transport.grpc.SslCredentials.ssl_credentials", - new_callable=mock.PropertyMock, - ) as ssl_credentials_mock: - if use_client_cert_env == "false": - is_mtls_mock.return_value = False - ssl_credentials_mock.return_value = None - expected_host = client.DEFAULT_ENDPOINT - expected_ssl_channel_creds = None - else: - is_mtls_mock.return_value = True - ssl_credentials_mock.return_value = mock.Mock() - expected_host = client.DEFAULT_MTLS_ENDPOINT - expected_ssl_channel_creds = ( - ssl_credentials_mock.return_value - ) - - patched.return_value = None - client = client_class() - patched.assert_called_once_with( - credentials=None, - credentials_file=None, - host=expected_host, - scopes=None, - ssl_channel_credentials=expected_ssl_channel_creds, - quota_project_id=None, - client_info=transports.base.DEFAULT_CLIENT_INFO, - ) + "google.auth.transport.mtls.default_client_cert_source", + return_value=client_cert_source_callback, + ): + if use_client_cert_env == "false": + expected_host = client.DEFAULT_ENDPOINT + expected_client_cert_source = None + else: + expected_host = client.DEFAULT_MTLS_ENDPOINT + expected_client_cert_source = client_cert_source_callback - # Check the case client_cert_source and ADC client cert are not provided. - with mock.patch.dict( - os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} - ): - with mock.patch.object(transport_class, "__init__") as patched: - with mock.patch( - "google.auth.transport.grpc.SslCredentials.__init__", return_value=None - ): - with mock.patch( - "google.auth.transport.grpc.SslCredentials.is_mtls", - new_callable=mock.PropertyMock, - ) as is_mtls_mock: - is_mtls_mock.return_value = False patched.return_value = None client = client_class() patched.assert_called_once_with( credentials=None, credentials_file=None, - host=client.DEFAULT_ENDPOINT, + host=expected_host, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=expected_client_cert_source, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) + # Check the case client_cert_source and ADC client cert are not provided. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.mtls.has_default_client_cert_source", + return_value=False, + ): + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + @pytest.mark.parametrize( "client_class,transport_class,transport_name", @@ -420,7 +426,7 @@ def test_document_understanding_service_client_client_options_scopes( credentials_file=None, host=client.DEFAULT_ENDPOINT, scopes=["1", "2"], - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -454,7 +460,7 @@ def test_document_understanding_service_client_client_options_credentials_file( credentials_file="credentials.json", host=client.DEFAULT_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -473,7 +479,7 @@ def test_document_understanding_service_client_client_options_from_dict(): credentials_file=None, host="squid.clam.whelk", scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -493,7 +499,7 @@ def test_batch_process_documents( # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/spam") @@ -514,19 +520,40 @@ def test_batch_process_documents_from_dict(): test_batch_process_documents(request_type=dict) +def test_batch_process_documents_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + client.batch_process_documents() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_understanding.BatchProcessDocumentsRequest() + + @pytest.mark.asyncio -async def test_batch_process_documents_async(transport: str = "grpc_asyncio"): +async def test_batch_process_documents_async( + transport: str = "grpc_asyncio", + request_type=document_understanding.BatchProcessDocumentsRequest, +): client = DocumentUnderstandingServiceAsyncClient( credentials=credentials.AnonymousCredentials(), transport=transport, ) # Everything is optional in proto3 as far as the runtime is concerned, # and we are mocking out the actual API, so just send an empty request. - request = document_understanding.BatchProcessDocumentsRequest() + request = request_type() # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( @@ -539,12 +566,17 @@ async def test_batch_process_documents_async(transport: str = "grpc_asyncio"): assert len(call.mock_calls) _, args, _ = call.mock_calls[0] - assert args[0] == request + assert args[0] == document_understanding.BatchProcessDocumentsRequest() # Establish that the response is the type that we expect. assert isinstance(response, future.Future) +@pytest.mark.asyncio +async def test_batch_process_documents_async_from_dict(): + await test_batch_process_documents_async(request_type=dict) + + def test_batch_process_documents_field_headers(): client = DocumentUnderstandingServiceClient( credentials=credentials.AnonymousCredentials(), @@ -557,7 +589,7 @@ def test_batch_process_documents_field_headers(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: call.return_value = operations_pb2.Operation(name="operations/op") @@ -586,7 +618,7 @@ async def test_batch_process_documents_field_headers_async(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( operations_pb2.Operation(name="operations/op") @@ -611,7 +643,7 @@ def test_batch_process_documents_flattened(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/op") @@ -658,7 +690,7 @@ async def test_batch_process_documents_flattened_async(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/op") @@ -713,9 +745,7 @@ def test_process_document( request = request_type() # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = document.Document( mime_type="mime_type_value", text="text_value", uri="uri_value", @@ -730,6 +760,7 @@ def test_process_document( assert args[0] == document_understanding.ProcessDocumentRequest() # Establish that the response is the type that we expect. + assert isinstance(response, document.Document) assert response.mime_type == "mime_type_value" @@ -741,20 +772,37 @@ def test_process_document_from_dict(): test_process_document(request_type=dict) +def test_process_document_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentUnderstandingServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + client.process_document() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_understanding.ProcessDocumentRequest() + + @pytest.mark.asyncio -async def test_process_document_async(transport: str = "grpc_asyncio"): +async def test_process_document_async( + transport: str = "grpc_asyncio", + request_type=document_understanding.ProcessDocumentRequest, +): client = DocumentUnderstandingServiceAsyncClient( credentials=credentials.AnonymousCredentials(), transport=transport, ) # Everything is optional in proto3 as far as the runtime is concerned, # and we are mocking out the actual API, so just send an empty request. - request = document_understanding.ProcessDocumentRequest() + request = request_type() # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( document.Document(mime_type="mime_type_value", text="text_value",) @@ -766,7 +814,7 @@ async def test_process_document_async(transport: str = "grpc_asyncio"): assert len(call.mock_calls) _, args, _ = call.mock_calls[0] - assert args[0] == request + assert args[0] == document_understanding.ProcessDocumentRequest() # Establish that the response is the type that we expect. assert isinstance(response, document.Document) @@ -776,6 +824,11 @@ async def test_process_document_async(transport: str = "grpc_asyncio"): assert response.text == "text_value" +@pytest.mark.asyncio +async def test_process_document_async_from_dict(): + await test_process_document_async(request_type=dict) + + def test_process_document_field_headers(): client = DocumentUnderstandingServiceClient( credentials=credentials.AnonymousCredentials(), @@ -787,9 +840,7 @@ def test_process_document_field_headers(): request.parent = "parent/value" # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: call.return_value = document.Document() client.process_document(request) @@ -816,9 +867,7 @@ async def test_process_document_field_headers_async(): request.parent = "parent/value" # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(document.Document()) await client.process_document(request) @@ -869,7 +918,7 @@ def test_transport_instance(): credentials=credentials.AnonymousCredentials(), ) client = DocumentUnderstandingServiceClient(transport=transport) - assert client._transport is transport + assert client.transport is transport def test_transport_get_channel(): @@ -908,7 +957,7 @@ def test_transport_grpc_default(): credentials=credentials.AnonymousCredentials(), ) assert isinstance( - client._transport, transports.DocumentUnderstandingServiceGrpcTransport, + client.transport, transports.DocumentUnderstandingServiceGrpcTransport, ) @@ -1002,6 +1051,53 @@ def test_document_understanding_service_transport_auth_adc(): ) +@pytest.mark.parametrize( + "transport_class", + [ + transports.DocumentUnderstandingServiceGrpcTransport, + transports.DocumentUnderstandingServiceGrpcAsyncIOTransport, + ], +) +def test_document_understanding_service_grpc_transport_client_cert_source_for_mtls( + transport_class, +): + cred = credentials.AnonymousCredentials() + + # Check ssl_channel_credentials is used if provided. + with mock.patch.object(transport_class, "create_channel") as mock_create_channel: + mock_ssl_channel_creds = mock.Mock() + transport_class( + host="squid.clam.whelk", + credentials=cred, + ssl_channel_credentials=mock_ssl_channel_creds, + ) + mock_create_channel.assert_called_once_with( + "squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=("https://www.googleapis.com/auth/cloud-platform",), + ssl_credentials=mock_ssl_channel_creds, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Check if ssl_channel_credentials is not provided, then client_cert_source_for_mtls + # is used. + with mock.patch.object(transport_class, "create_channel", return_value=mock.Mock()): + with mock.patch("grpc.ssl_channel_credentials") as mock_ssl_cred: + transport_class( + credentials=cred, + client_cert_source_for_mtls=client_cert_source_callback, + ) + expected_cert, expected_key = client_cert_source_callback() + mock_ssl_cred.assert_called_once_with( + certificate_chain=expected_cert, private_key=expected_key + ) + + def test_document_understanding_service_host_no_port(): client = DocumentUnderstandingServiceClient( credentials=credentials.AnonymousCredentials(), @@ -1009,7 +1105,7 @@ def test_document_understanding_service_host_no_port(): api_endpoint="us-documentai.googleapis.com" ), ) - assert client._transport._host == "us-documentai.googleapis.com:443" + assert client.transport._host == "us-documentai.googleapis.com:443" def test_document_understanding_service_host_with_port(): @@ -1019,11 +1115,11 @@ def test_document_understanding_service_host_with_port(): api_endpoint="us-documentai.googleapis.com:8000" ), ) - assert client._transport._host == "us-documentai.googleapis.com:8000" + assert client.transport._host == "us-documentai.googleapis.com:8000" def test_document_understanding_service_grpc_transport_channel(): - channel = grpc.insecure_channel("http://localhost/") + channel = grpc.secure_channel("http://localhost/", grpc.local_channel_credentials()) # Check that channel is used if provided. transport = transports.DocumentUnderstandingServiceGrpcTransport( @@ -1031,10 +1127,11 @@ def test_document_understanding_service_grpc_transport_channel(): ) assert transport.grpc_channel == channel assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None def test_document_understanding_service_grpc_asyncio_transport_channel(): - channel = aio.insecure_channel("http://localhost/") + channel = aio.secure_channel("http://localhost/", grpc.local_channel_credentials()) # Check that channel is used if provided. transport = transports.DocumentUnderstandingServiceGrpcAsyncIOTransport( @@ -1042,8 +1139,11 @@ def test_document_understanding_service_grpc_asyncio_transport_channel(): ) assert transport.grpc_channel == channel assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None +# Remove this test when deprecated arguments (api_mtls_endpoint, client_cert_source) are +# removed from grpc/grpc_asyncio transport constructor. @pytest.mark.parametrize( "transport_class", [ @@ -1058,7 +1158,7 @@ def test_document_understanding_service_transport_channel_mtls_with_client_cert_ "grpc.ssl_channel_credentials", autospec=True ) as grpc_ssl_channel_cred: with mock.patch.object( - transport_class, "create_channel", autospec=True + transport_class, "create_channel" ) as grpc_create_channel: mock_ssl_cred = mock.Mock() grpc_ssl_channel_cred.return_value = mock_ssl_cred @@ -1087,10 +1187,17 @@ def test_document_understanding_service_transport_channel_mtls_with_client_cert_ scopes=("https://www.googleapis.com/auth/cloud-platform",), ssl_credentials=mock_ssl_cred, quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], ) assert transport.grpc_channel == mock_grpc_channel + assert transport._ssl_channel_credentials == mock_ssl_cred +# Remove this test when deprecated arguments (api_mtls_endpoint, client_cert_source) are +# removed from grpc/grpc_asyncio transport constructor. @pytest.mark.parametrize( "transport_class", [ @@ -1108,7 +1215,7 @@ def test_document_understanding_service_transport_channel_mtls_with_adc( ssl_credentials=mock.PropertyMock(return_value=mock_ssl_cred), ): with mock.patch.object( - transport_class, "create_channel", autospec=True + transport_class, "create_channel" ) as grpc_create_channel: mock_grpc_channel = mock.Mock() grpc_create_channel.return_value = mock_grpc_channel @@ -1129,6 +1236,10 @@ def test_document_understanding_service_transport_channel_mtls_with_adc( scopes=("https://www.googleapis.com/auth/cloud-platform",), ssl_credentials=mock_ssl_cred, quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], ) assert transport.grpc_channel == mock_grpc_channel @@ -1137,7 +1248,7 @@ def test_document_understanding_service_grpc_lro_client(): client = DocumentUnderstandingServiceClient( credentials=credentials.AnonymousCredentials(), transport="grpc", ) - transport = client._transport + transport = client.transport # Ensure that we have a api-core operations client. assert isinstance(transport.operations_client, operations_v1.OperationsClient,) @@ -1150,7 +1261,7 @@ def test_document_understanding_service_grpc_lro_async_client(): client = DocumentUnderstandingServiceAsyncClient( credentials=credentials.AnonymousCredentials(), transport="grpc_asyncio", ) - transport = client._client._transport + transport = client.transport # Ensure that we have a api-core operations client. assert isinstance(transport.operations_client, operations_v1.OperationsAsyncClient,) @@ -1159,6 +1270,109 @@ def test_document_understanding_service_grpc_lro_async_client(): assert transport.operations_client is transport.operations_client +def test_common_billing_account_path(): + billing_account = "squid" + + expected = "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + actual = DocumentUnderstandingServiceClient.common_billing_account_path( + billing_account + ) + assert expected == actual + + +def test_parse_common_billing_account_path(): + expected = { + "billing_account": "clam", + } + path = DocumentUnderstandingServiceClient.common_billing_account_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentUnderstandingServiceClient.parse_common_billing_account_path(path) + assert expected == actual + + +def test_common_folder_path(): + folder = "whelk" + + expected = "folders/{folder}".format(folder=folder,) + actual = DocumentUnderstandingServiceClient.common_folder_path(folder) + assert expected == actual + + +def test_parse_common_folder_path(): + expected = { + "folder": "octopus", + } + path = DocumentUnderstandingServiceClient.common_folder_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentUnderstandingServiceClient.parse_common_folder_path(path) + assert expected == actual + + +def test_common_organization_path(): + organization = "oyster" + + expected = "organizations/{organization}".format(organization=organization,) + actual = DocumentUnderstandingServiceClient.common_organization_path(organization) + assert expected == actual + + +def test_parse_common_organization_path(): + expected = { + "organization": "nudibranch", + } + path = DocumentUnderstandingServiceClient.common_organization_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentUnderstandingServiceClient.parse_common_organization_path(path) + assert expected == actual + + +def test_common_project_path(): + project = "cuttlefish" + + expected = "projects/{project}".format(project=project,) + actual = DocumentUnderstandingServiceClient.common_project_path(project) + assert expected == actual + + +def test_parse_common_project_path(): + expected = { + "project": "mussel", + } + path = DocumentUnderstandingServiceClient.common_project_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentUnderstandingServiceClient.parse_common_project_path(path) + assert expected == actual + + +def test_common_location_path(): + project = "winkle" + location = "nautilus" + + expected = "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + actual = DocumentUnderstandingServiceClient.common_location_path(project, location) + assert expected == actual + + +def test_parse_common_location_path(): + expected = { + "project": "scallop", + "location": "abalone", + } + path = DocumentUnderstandingServiceClient.common_location_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentUnderstandingServiceClient.parse_common_location_path(path) + assert expected == actual + + def test_client_withDEFAULT_CLIENT_INFO(): client_info = gapic_v1.client_info.ClientInfo() diff --git a/tests/unit/gapic/documentai_v1beta3/__init__.py b/tests/unit/gapic/documentai_v1beta3/__init__.py index 8b137891..42ffdf2b 100644 --- a/tests/unit/gapic/documentai_v1beta3/__init__.py +++ b/tests/unit/gapic/documentai_v1beta3/__init__.py @@ -1 +1,16 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py b/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py index 4b17a5ed..7179689c 100644 --- a/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py +++ b/tests/unit/gapic/documentai_v1beta3/test_document_processor_service.py @@ -45,6 +45,7 @@ transports, ) from google.cloud.documentai_v1beta3.types import document +from google.cloud.documentai_v1beta3.types import document_io from google.cloud.documentai_v1beta3.types import document_processor_service from google.cloud.documentai_v1beta3.types import geometry from google.longrunning import operations_pb2 @@ -108,7 +109,25 @@ def test__get_default_mtls_endpoint(): @pytest.mark.parametrize( "client_class", - [DocumentProcessorServiceClient, DocumentProcessorServiceAsyncClient], + [DocumentProcessorServiceClient, DocumentProcessorServiceAsyncClient,], +) +def test_document_processor_service_client_from_service_account_info(client_class): + creds = credentials.AnonymousCredentials() + with mock.patch.object( + service_account.Credentials, "from_service_account_info" + ) as factory: + factory.return_value = creds + info = {"valid": True} + client = client_class.from_service_account_info(info) + assert client.transport._credentials == creds + assert isinstance(client, client_class) + + assert client.transport._host == "us-documentai.googleapis.com:443" + + +@pytest.mark.parametrize( + "client_class", + [DocumentProcessorServiceClient, DocumentProcessorServiceAsyncClient,], ) def test_document_processor_service_client_from_service_account_file(client_class): creds = credentials.AnonymousCredentials() @@ -117,17 +136,22 @@ def test_document_processor_service_client_from_service_account_file(client_clas ) as factory: factory.return_value = creds client = client_class.from_service_account_file("dummy/file/path.json") - assert client._transport._credentials == creds + assert client.transport._credentials == creds + assert isinstance(client, client_class) client = client_class.from_service_account_json("dummy/file/path.json") - assert client._transport._credentials == creds + assert client.transport._credentials == creds + assert isinstance(client, client_class) - assert client._transport._host == "us-documentai.googleapis.com:443" + assert client.transport._host == "us-documentai.googleapis.com:443" def test_document_processor_service_client_get_transport_class(): transport = DocumentProcessorServiceClient.get_transport_class() - assert transport == transports.DocumentProcessorServiceGrpcTransport + available_transports = [ + transports.DocumentProcessorServiceGrpcTransport, + ] + assert transport in available_transports transport = DocumentProcessorServiceClient.get_transport_class("grpc") assert transport == transports.DocumentProcessorServiceGrpcTransport @@ -186,7 +210,7 @@ def test_document_processor_service_client_client_options( credentials_file=None, host="squid.clam.whelk", scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -202,7 +226,7 @@ def test_document_processor_service_client_client_options( credentials_file=None, host=client.DEFAULT_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -218,7 +242,7 @@ def test_document_processor_service_client_client_options( credentials_file=None, host=client.DEFAULT_MTLS_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -246,7 +270,7 @@ def test_document_processor_service_client_client_options( credentials_file=None, host=client.DEFAULT_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id="octopus", client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -307,29 +331,25 @@ def test_document_processor_service_client_mtls_env_auto( client_cert_source=client_cert_source_callback ) with mock.patch.object(transport_class, "__init__") as patched: - ssl_channel_creds = mock.Mock() - with mock.patch( - "grpc.ssl_channel_credentials", return_value=ssl_channel_creds - ): - patched.return_value = None - client = client_class(client_options=options) + patched.return_value = None + client = client_class(client_options=options) - if use_client_cert_env == "false": - expected_ssl_channel_creds = None - expected_host = client.DEFAULT_ENDPOINT - else: - expected_ssl_channel_creds = ssl_channel_creds - expected_host = client.DEFAULT_MTLS_ENDPOINT + if use_client_cert_env == "false": + expected_client_cert_source = None + expected_host = client.DEFAULT_ENDPOINT + else: + expected_client_cert_source = client_cert_source_callback + expected_host = client.DEFAULT_MTLS_ENDPOINT - patched.assert_called_once_with( - credentials=None, - credentials_file=None, - host=expected_host, - scopes=None, - ssl_channel_credentials=expected_ssl_channel_creds, - quota_project_id=None, - client_info=transports.base.DEFAULT_CLIENT_INFO, - ) + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=expected_host, + scopes=None, + client_cert_source_for_mtls=expected_client_cert_source, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) # Check the case ADC client cert is provided. Whether client cert is used depends on # GOOGLE_API_USE_CLIENT_CERTIFICATE value. @@ -338,66 +358,53 @@ def test_document_processor_service_client_mtls_env_auto( ): with mock.patch.object(transport_class, "__init__") as patched: with mock.patch( - "google.auth.transport.grpc.SslCredentials.__init__", return_value=None + "google.auth.transport.mtls.has_default_client_cert_source", + return_value=True, ): with mock.patch( - "google.auth.transport.grpc.SslCredentials.is_mtls", - new_callable=mock.PropertyMock, - ) as is_mtls_mock: - with mock.patch( - "google.auth.transport.grpc.SslCredentials.ssl_credentials", - new_callable=mock.PropertyMock, - ) as ssl_credentials_mock: - if use_client_cert_env == "false": - is_mtls_mock.return_value = False - ssl_credentials_mock.return_value = None - expected_host = client.DEFAULT_ENDPOINT - expected_ssl_channel_creds = None - else: - is_mtls_mock.return_value = True - ssl_credentials_mock.return_value = mock.Mock() - expected_host = client.DEFAULT_MTLS_ENDPOINT - expected_ssl_channel_creds = ( - ssl_credentials_mock.return_value - ) - - patched.return_value = None - client = client_class() - patched.assert_called_once_with( - credentials=None, - credentials_file=None, - host=expected_host, - scopes=None, - ssl_channel_credentials=expected_ssl_channel_creds, - quota_project_id=None, - client_info=transports.base.DEFAULT_CLIENT_INFO, - ) + "google.auth.transport.mtls.default_client_cert_source", + return_value=client_cert_source_callback, + ): + if use_client_cert_env == "false": + expected_host = client.DEFAULT_ENDPOINT + expected_client_cert_source = None + else: + expected_host = client.DEFAULT_MTLS_ENDPOINT + expected_client_cert_source = client_cert_source_callback - # Check the case client_cert_source and ADC client cert are not provided. - with mock.patch.dict( - os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} - ): - with mock.patch.object(transport_class, "__init__") as patched: - with mock.patch( - "google.auth.transport.grpc.SslCredentials.__init__", return_value=None - ): - with mock.patch( - "google.auth.transport.grpc.SslCredentials.is_mtls", - new_callable=mock.PropertyMock, - ) as is_mtls_mock: - is_mtls_mock.return_value = False patched.return_value = None client = client_class() patched.assert_called_once_with( credentials=None, credentials_file=None, - host=client.DEFAULT_ENDPOINT, + host=expected_host, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=expected_client_cert_source, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) + # Check the case client_cert_source and ADC client cert are not provided. + with mock.patch.dict( + os.environ, {"GOOGLE_API_USE_CLIENT_CERTIFICATE": use_client_cert_env} + ): + with mock.patch.object(transport_class, "__init__") as patched: + with mock.patch( + "google.auth.transport.mtls.has_default_client_cert_source", + return_value=False, + ): + patched.return_value = None + client = client_class() + patched.assert_called_once_with( + credentials=None, + credentials_file=None, + host=client.DEFAULT_ENDPOINT, + scopes=None, + client_cert_source_for_mtls=None, + quota_project_id=None, + client_info=transports.base.DEFAULT_CLIENT_INFO, + ) + @pytest.mark.parametrize( "client_class,transport_class,transport_name", @@ -427,7 +434,7 @@ def test_document_processor_service_client_client_options_scopes( credentials_file=None, host=client.DEFAULT_ENDPOINT, scopes=["1", "2"], - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -461,7 +468,7 @@ def test_document_processor_service_client_client_options_credentials_file( credentials_file="credentials.json", host=client.DEFAULT_ENDPOINT, scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -480,7 +487,7 @@ def test_document_processor_service_client_client_options_from_dict(): credentials_file=None, host="squid.clam.whelk", scopes=None, - ssl_channel_credentials=None, + client_cert_source_for_mtls=None, quota_project_id=None, client_info=transports.base.DEFAULT_CLIENT_INFO, ) @@ -498,9 +505,7 @@ def test_process_document( request = request_type() # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = document_processor_service.ProcessResponse( human_review_operation="human_review_operation_value", @@ -515,6 +520,7 @@ def test_process_document( assert args[0] == document_processor_service.ProcessRequest() # Establish that the response is the type that we expect. + assert isinstance(response, document_processor_service.ProcessResponse) assert response.human_review_operation == "human_review_operation_value" @@ -524,20 +530,37 @@ def test_process_document_from_dict(): test_process_document(request_type=dict) +def test_process_document_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.process_document), "__call__") as call: + client.process_document() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ProcessRequest() + + @pytest.mark.asyncio -async def test_process_document_async(transport: str = "grpc_asyncio"): +async def test_process_document_async( + transport: str = "grpc_asyncio", + request_type=document_processor_service.ProcessRequest, +): client = DocumentProcessorServiceAsyncClient( credentials=credentials.AnonymousCredentials(), transport=transport, ) # Everything is optional in proto3 as far as the runtime is concerned, # and we are mocking out the actual API, so just send an empty request. - request = document_processor_service.ProcessRequest() + request = request_type() # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( document_processor_service.ProcessResponse( @@ -551,7 +574,7 @@ async def test_process_document_async(transport: str = "grpc_asyncio"): assert len(call.mock_calls) _, args, _ = call.mock_calls[0] - assert args[0] == request + assert args[0] == document_processor_service.ProcessRequest() # Establish that the response is the type that we expect. assert isinstance(response, document_processor_service.ProcessResponse) @@ -559,6 +582,11 @@ async def test_process_document_async(transport: str = "grpc_asyncio"): assert response.human_review_operation == "human_review_operation_value" +@pytest.mark.asyncio +async def test_process_document_async_from_dict(): + await test_process_document_async(request_type=dict) + + def test_process_document_field_headers(): client = DocumentProcessorServiceClient( credentials=credentials.AnonymousCredentials(), @@ -570,9 +598,7 @@ def test_process_document_field_headers(): request.name = "name/value" # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: call.return_value = document_processor_service.ProcessResponse() client.process_document(request) @@ -599,9 +625,7 @@ async def test_process_document_field_headers_async(): request.name = "name/value" # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( document_processor_service.ProcessResponse() ) @@ -624,9 +648,7 @@ def test_process_document_flattened(): ) # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = document_processor_service.ProcessResponse() @@ -662,9 +684,7 @@ async def test_process_document_flattened_async(): ) # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.process_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.process_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = document_processor_service.ProcessResponse() @@ -710,7 +730,7 @@ def test_batch_process_documents( # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/spam") @@ -731,19 +751,40 @@ def test_batch_process_documents_from_dict(): test_batch_process_documents(request_type=dict) +def test_batch_process_documents_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object( + type(client.transport.batch_process_documents), "__call__" + ) as call: + client.batch_process_documents() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.BatchProcessRequest() + + @pytest.mark.asyncio -async def test_batch_process_documents_async(transport: str = "grpc_asyncio"): +async def test_batch_process_documents_async( + transport: str = "grpc_asyncio", + request_type=document_processor_service.BatchProcessRequest, +): client = DocumentProcessorServiceAsyncClient( credentials=credentials.AnonymousCredentials(), transport=transport, ) # Everything is optional in proto3 as far as the runtime is concerned, # and we are mocking out the actual API, so just send an empty request. - request = document_processor_service.BatchProcessRequest() + request = request_type() # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( @@ -756,12 +797,17 @@ async def test_batch_process_documents_async(transport: str = "grpc_asyncio"): assert len(call.mock_calls) _, args, _ = call.mock_calls[0] - assert args[0] == request + assert args[0] == document_processor_service.BatchProcessRequest() # Establish that the response is the type that we expect. assert isinstance(response, future.Future) +@pytest.mark.asyncio +async def test_batch_process_documents_async_from_dict(): + await test_batch_process_documents_async(request_type=dict) + + def test_batch_process_documents_field_headers(): client = DocumentProcessorServiceClient( credentials=credentials.AnonymousCredentials(), @@ -774,7 +820,7 @@ def test_batch_process_documents_field_headers(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: call.return_value = operations_pb2.Operation(name="operations/op") @@ -803,7 +849,7 @@ async def test_batch_process_documents_field_headers_async(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( operations_pb2.Operation(name="operations/op") @@ -828,7 +874,7 @@ def test_batch_process_documents_flattened(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/op") @@ -866,7 +912,7 @@ async def test_batch_process_documents_flattened_async(): # Mock the actual call within the gRPC stub, and fake the request. with mock.patch.object( - type(client._client._transport.batch_process_documents), "__call__" + type(client.transport.batch_process_documents), "__call__" ) as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/op") @@ -913,7 +959,7 @@ def test_review_document( request = request_type() # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object(type(client._transport.review_document), "__call__") as call: + with mock.patch.object(type(client.transport.review_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/spam") @@ -933,20 +979,37 @@ def test_review_document_from_dict(): test_review_document(request_type=dict) +def test_review_document_empty_call(): + # This test is a coverage failsafe to make sure that totally empty calls, + # i.e. request == None and no flattened fields passed, work. + client = DocumentProcessorServiceClient( + credentials=credentials.AnonymousCredentials(), transport="grpc", + ) + + # Mock the actual call within the gRPC stub, and fake the request. + with mock.patch.object(type(client.transport.review_document), "__call__") as call: + client.review_document() + call.assert_called() + _, args, _ = call.mock_calls[0] + + assert args[0] == document_processor_service.ReviewDocumentRequest() + + @pytest.mark.asyncio -async def test_review_document_async(transport: str = "grpc_asyncio"): +async def test_review_document_async( + transport: str = "grpc_asyncio", + request_type=document_processor_service.ReviewDocumentRequest, +): client = DocumentProcessorServiceAsyncClient( credentials=credentials.AnonymousCredentials(), transport=transport, ) # Everything is optional in proto3 as far as the runtime is concerned, # and we are mocking out the actual API, so just send an empty request. - request = document_processor_service.ReviewDocumentRequest() + request = request_type() # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.review_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.review_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( operations_pb2.Operation(name="operations/spam") @@ -958,12 +1021,17 @@ async def test_review_document_async(transport: str = "grpc_asyncio"): assert len(call.mock_calls) _, args, _ = call.mock_calls[0] - assert args[0] == request + assert args[0] == document_processor_service.ReviewDocumentRequest() # Establish that the response is the type that we expect. assert isinstance(response, future.Future) +@pytest.mark.asyncio +async def test_review_document_async_from_dict(): + await test_review_document_async(request_type=dict) + + def test_review_document_field_headers(): client = DocumentProcessorServiceClient( credentials=credentials.AnonymousCredentials(), @@ -975,7 +1043,7 @@ def test_review_document_field_headers(): request.human_review_config = "human_review_config/value" # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object(type(client._transport.review_document), "__call__") as call: + with mock.patch.object(type(client.transport.review_document), "__call__") as call: call.return_value = operations_pb2.Operation(name="operations/op") client.review_document(request) @@ -1005,9 +1073,7 @@ async def test_review_document_field_headers_async(): request.human_review_config = "human_review_config/value" # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.review_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.review_document), "__call__") as call: call.return_value = grpc_helpers_async.FakeUnaryUnaryCall( operations_pb2.Operation(name="operations/op") ) @@ -1033,7 +1099,7 @@ def test_review_document_flattened(): ) # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object(type(client._transport.review_document), "__call__") as call: + with mock.patch.object(type(client.transport.review_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/op") @@ -1070,9 +1136,7 @@ async def test_review_document_flattened_async(): ) # Mock the actual call within the gRPC stub, and fake the request. - with mock.patch.object( - type(client._client._transport.review_document), "__call__" - ) as call: + with mock.patch.object(type(client.transport.review_document), "__call__") as call: # Designate an appropriate return value for the call. call.return_value = operations_pb2.Operation(name="operations/op") @@ -1144,7 +1208,7 @@ def test_transport_instance(): credentials=credentials.AnonymousCredentials(), ) client = DocumentProcessorServiceClient(transport=transport) - assert client._transport is transport + assert client.transport is transport def test_transport_get_channel(): @@ -1183,7 +1247,7 @@ def test_transport_grpc_default(): credentials=credentials.AnonymousCredentials(), ) assert isinstance( - client._transport, transports.DocumentProcessorServiceGrpcTransport, + client.transport, transports.DocumentProcessorServiceGrpcTransport, ) @@ -1278,6 +1342,53 @@ def test_document_processor_service_transport_auth_adc(): ) +@pytest.mark.parametrize( + "transport_class", + [ + transports.DocumentProcessorServiceGrpcTransport, + transports.DocumentProcessorServiceGrpcAsyncIOTransport, + ], +) +def test_document_processor_service_grpc_transport_client_cert_source_for_mtls( + transport_class, +): + cred = credentials.AnonymousCredentials() + + # Check ssl_channel_credentials is used if provided. + with mock.patch.object(transport_class, "create_channel") as mock_create_channel: + mock_ssl_channel_creds = mock.Mock() + transport_class( + host="squid.clam.whelk", + credentials=cred, + ssl_channel_credentials=mock_ssl_channel_creds, + ) + mock_create_channel.assert_called_once_with( + "squid.clam.whelk:443", + credentials=cred, + credentials_file=None, + scopes=("https://www.googleapis.com/auth/cloud-platform",), + ssl_credentials=mock_ssl_channel_creds, + quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], + ) + + # Check if ssl_channel_credentials is not provided, then client_cert_source_for_mtls + # is used. + with mock.patch.object(transport_class, "create_channel", return_value=mock.Mock()): + with mock.patch("grpc.ssl_channel_credentials") as mock_ssl_cred: + transport_class( + credentials=cred, + client_cert_source_for_mtls=client_cert_source_callback, + ) + expected_cert, expected_key = client_cert_source_callback() + mock_ssl_cred.assert_called_once_with( + certificate_chain=expected_cert, private_key=expected_key + ) + + def test_document_processor_service_host_no_port(): client = DocumentProcessorServiceClient( credentials=credentials.AnonymousCredentials(), @@ -1285,7 +1396,7 @@ def test_document_processor_service_host_no_port(): api_endpoint="us-documentai.googleapis.com" ), ) - assert client._transport._host == "us-documentai.googleapis.com:443" + assert client.transport._host == "us-documentai.googleapis.com:443" def test_document_processor_service_host_with_port(): @@ -1295,11 +1406,11 @@ def test_document_processor_service_host_with_port(): api_endpoint="us-documentai.googleapis.com:8000" ), ) - assert client._transport._host == "us-documentai.googleapis.com:8000" + assert client.transport._host == "us-documentai.googleapis.com:8000" def test_document_processor_service_grpc_transport_channel(): - channel = grpc.insecure_channel("http://localhost/") + channel = grpc.secure_channel("http://localhost/", grpc.local_channel_credentials()) # Check that channel is used if provided. transport = transports.DocumentProcessorServiceGrpcTransport( @@ -1307,10 +1418,11 @@ def test_document_processor_service_grpc_transport_channel(): ) assert transport.grpc_channel == channel assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None def test_document_processor_service_grpc_asyncio_transport_channel(): - channel = aio.insecure_channel("http://localhost/") + channel = aio.secure_channel("http://localhost/", grpc.local_channel_credentials()) # Check that channel is used if provided. transport = transports.DocumentProcessorServiceGrpcAsyncIOTransport( @@ -1318,8 +1430,11 @@ def test_document_processor_service_grpc_asyncio_transport_channel(): ) assert transport.grpc_channel == channel assert transport._host == "squid.clam.whelk:443" + assert transport._ssl_channel_credentials == None +# Remove this test when deprecated arguments (api_mtls_endpoint, client_cert_source) are +# removed from grpc/grpc_asyncio transport constructor. @pytest.mark.parametrize( "transport_class", [ @@ -1334,7 +1449,7 @@ def test_document_processor_service_transport_channel_mtls_with_client_cert_sour "grpc.ssl_channel_credentials", autospec=True ) as grpc_ssl_channel_cred: with mock.patch.object( - transport_class, "create_channel", autospec=True + transport_class, "create_channel" ) as grpc_create_channel: mock_ssl_cred = mock.Mock() grpc_ssl_channel_cred.return_value = mock_ssl_cred @@ -1363,10 +1478,17 @@ def test_document_processor_service_transport_channel_mtls_with_client_cert_sour scopes=("https://www.googleapis.com/auth/cloud-platform",), ssl_credentials=mock_ssl_cred, quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], ) assert transport.grpc_channel == mock_grpc_channel + assert transport._ssl_channel_credentials == mock_ssl_cred +# Remove this test when deprecated arguments (api_mtls_endpoint, client_cert_source) are +# removed from grpc/grpc_asyncio transport constructor. @pytest.mark.parametrize( "transport_class", [ @@ -1382,7 +1504,7 @@ def test_document_processor_service_transport_channel_mtls_with_adc(transport_cl ssl_credentials=mock.PropertyMock(return_value=mock_ssl_cred), ): with mock.patch.object( - transport_class, "create_channel", autospec=True + transport_class, "create_channel" ) as grpc_create_channel: mock_grpc_channel = mock.Mock() grpc_create_channel.return_value = mock_grpc_channel @@ -1403,6 +1525,10 @@ def test_document_processor_service_transport_channel_mtls_with_adc(transport_cl scopes=("https://www.googleapis.com/auth/cloud-platform",), ssl_credentials=mock_ssl_cred, quota_project_id=None, + options=[ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ], ) assert transport.grpc_channel == mock_grpc_channel @@ -1411,7 +1537,7 @@ def test_document_processor_service_grpc_lro_client(): client = DocumentProcessorServiceClient( credentials=credentials.AnonymousCredentials(), transport="grpc", ) - transport = client._transport + transport = client.transport # Ensure that we have a api-core operations client. assert isinstance(transport.operations_client, operations_v1.OperationsClient,) @@ -1424,7 +1550,7 @@ def test_document_processor_service_grpc_lro_async_client(): client = DocumentProcessorServiceAsyncClient( credentials=credentials.AnonymousCredentials(), transport="grpc_asyncio", ) - transport = client._client._transport + transport = client.transport # Ensure that we have a api-core operations client. assert isinstance(transport.operations_client, operations_v1.OperationsAsyncClient,) @@ -1433,6 +1559,159 @@ def test_document_processor_service_grpc_lro_async_client(): assert transport.operations_client is transport.operations_client +def test_human_review_config_path(): + project = "squid" + location = "clam" + processor = "whelk" + + expected = "projects/{project}/locations/{location}/processors/{processor}/humanReviewConfig".format( + project=project, location=location, processor=processor, + ) + actual = DocumentProcessorServiceClient.human_review_config_path( + project, location, processor + ) + assert expected == actual + + +def test_parse_human_review_config_path(): + expected = { + "project": "octopus", + "location": "oyster", + "processor": "nudibranch", + } + path = DocumentProcessorServiceClient.human_review_config_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_human_review_config_path(path) + assert expected == actual + + +def test_processor_path(): + project = "cuttlefish" + location = "mussel" + processor = "winkle" + + expected = "projects/{project}/locations/{location}/processors/{processor}".format( + project=project, location=location, processor=processor, + ) + actual = DocumentProcessorServiceClient.processor_path(project, location, processor) + assert expected == actual + + +def test_parse_processor_path(): + expected = { + "project": "nautilus", + "location": "scallop", + "processor": "abalone", + } + path = DocumentProcessorServiceClient.processor_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_processor_path(path) + assert expected == actual + + +def test_common_billing_account_path(): + billing_account = "squid" + + expected = "billingAccounts/{billing_account}".format( + billing_account=billing_account, + ) + actual = DocumentProcessorServiceClient.common_billing_account_path(billing_account) + assert expected == actual + + +def test_parse_common_billing_account_path(): + expected = { + "billing_account": "clam", + } + path = DocumentProcessorServiceClient.common_billing_account_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_billing_account_path(path) + assert expected == actual + + +def test_common_folder_path(): + folder = "whelk" + + expected = "folders/{folder}".format(folder=folder,) + actual = DocumentProcessorServiceClient.common_folder_path(folder) + assert expected == actual + + +def test_parse_common_folder_path(): + expected = { + "folder": "octopus", + } + path = DocumentProcessorServiceClient.common_folder_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_folder_path(path) + assert expected == actual + + +def test_common_organization_path(): + organization = "oyster" + + expected = "organizations/{organization}".format(organization=organization,) + actual = DocumentProcessorServiceClient.common_organization_path(organization) + assert expected == actual + + +def test_parse_common_organization_path(): + expected = { + "organization": "nudibranch", + } + path = DocumentProcessorServiceClient.common_organization_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_organization_path(path) + assert expected == actual + + +def test_common_project_path(): + project = "cuttlefish" + + expected = "projects/{project}".format(project=project,) + actual = DocumentProcessorServiceClient.common_project_path(project) + assert expected == actual + + +def test_parse_common_project_path(): + expected = { + "project": "mussel", + } + path = DocumentProcessorServiceClient.common_project_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_project_path(path) + assert expected == actual + + +def test_common_location_path(): + project = "winkle" + location = "nautilus" + + expected = "projects/{project}/locations/{location}".format( + project=project, location=location, + ) + actual = DocumentProcessorServiceClient.common_location_path(project, location) + assert expected == actual + + +def test_parse_common_location_path(): + expected = { + "project": "scallop", + "location": "abalone", + } + path = DocumentProcessorServiceClient.common_location_path(**expected) + + # Check that the path construction is reversible. + actual = DocumentProcessorServiceClient.parse_common_location_path(path) + assert expected == actual + + def test_client_withDEFAULT_CLIENT_INFO(): client_info = gapic_v1.client_info.ClientInfo() pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy