#!/usr/bin/env python
# coding=utf8
# Copyright 2010 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Storage command line tool."""

import ConfigParser
import errno
import getopt
import logging
import os
import re
import socket
import signal
import stat
import sys
import traceback
import xml.sax.xmlreader

GOOG_API_VERSION_HDR = 'x-goog-api-version'

debug = 0


def OutputAndExit(message):
  sys.stderr.write('%s\n' % message)
  sys.exit(1)


# Before importing boto, find where gsutil is installed and include its
# boto sub-directory at the start of the PYTHONPATH, to ensure the versions of
# gsutil and boto stay in sync after software updates. This also allows gsutil
# to be used without explicitly adding it to the PYTHONPATH.
# We use realpath() below to unwind symlinks if any were used in the gsutil
# installation.
gsutil_bin_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
if not gsutil_bin_dir:
  OutputAndExit('Unable to determine where gsutil is installed. Sorry, '
                'cannot run correctly without this.\n')
boto_lib_dir = os.path.join(gsutil_bin_dir, 'boto')
if not os.path.isdir(boto_lib_dir):
  OutputAndExit('There is no boto library under the gsutil install directory '
                '(%s).\nThe gsutil command cannot work properly when installed '
                'this way.\nPlease re-install gsutil per the installation '
                'instructions.' % gsutil_bin_dir)
sys.path.insert(0, boto_lib_dir)
import boto
boto.UserAgent += '/gsutil'
from boto.exception import BotoClientError
from boto.exception import InvalidAclError
from boto.exception import InvalidUriError
from boto.exception import ResumableUploadException
from boto.exception import StorageResponseError
from boto.provider import Provider

from gslib.command import Command
from gslib.exception import CommandException
from gslib.exception import ProjectIdException
from gslib.wildcard_iterator import WildcardException

# We don't use the oauth2 authentication plugin directly; importing it here
# ensures that it's loaded and available by default.
_HAVE_OAUTH2 = False
try:
  from oauth2_plugin import oauth2_plugin
  _HAVE_OAUTH2 = True
except ImportError:
  pass

usage_string = """
SYNOPSIS
  gsutil [-d][-D] [-h header]... [-m] [command args...]

  -d option shows HTTP requests/headers.
  -D option includes additional debug info needed when posting support
     requests to gs-discussion@googlegroups.com.
  -DD includes HTTP upstream payload.

  -h option allows you to specify additional HTTP headers, for example:
     gsutil -h "Cache-Control:public,max-age=3600" -h "Content-Type:gzip" cp ...

  -m option causes supported operations (cp, rm) to be executed in parallel.

  Commands:
    Concatenate object content to stdout:
      cat [-h] uri...
        -h  Prints short header for each object.
    Copy objects:
      cp [-a canned_acl] [-e] [-t] [-z ext1,ext2,...] src_uri dst_uri
        - or -
      cp [-a canned_acl] [-e] [-R] [-t] [-z extensions] uri... dst_uri
        -a Sets named canned_acl when uploaded objects created (list below).
        -e Exclude symlinks. When specified, symbolic links will not be copied.
        -R Causes directories and buckets to be copied recursively.
        -t Sets MIME type based on file extension.
        -z 'txt,html' Compresses file uploads with the given extensions.
      Use '-' in place of src_uri or dst_uri to perform streaming transfer.
    Get ACL XML for a bucket or object (save and edit for "setacl" command):
      getacl uri
    List buckets or objects:
      ls [-b] [-l] [-L] [-p proj_id] uri...
         -l Prints long listing (owner, length); -L provides more detail.
         -b Prints info about the bucket when used with a bucket URI.
         -p proj_id Specifies the project ID to use for listing buckets.
    Make buckets:
      mb [-l LocationConstraint] [-p proj_id] uri...
         -l can be us or eu. Default is us
         -p proj_id Specifies the project ID under which to create the bucket.
    Move/rename objects:
      mv src_uri dst_uri
        - or -
      mv uri... dst_uri
    Remove buckets:
      rb uri...
    Remove objects:
      rm [-f] uri...
         -f Continues despite errors when removing by wildcard.
    Set ACL on buckets and/or objects:
      setacl file-or-canned_acl_name uri...
    Print version info:
      ver
    Obtain credentials and create configuration file:
      config [options] [-o <config file>]
         Run 'gsutil config -h' for detailed help on this command.

  Omitting URI scheme defaults to "file". For example, "dir/file.txt" is
  equivalent to "file://dir/file.txt"

  URIs support object name wildcards, for example:
    gsutil cp gs://mybucket/[a-f]*.doc localdir

  Source directory or bucket names are implicitly wildcarded, so
    gsutil cp localdir gs://mybucket
  will recursively copy localdir.

  canned_acl_name can be one of: "private", "project-private",
  "public-read", "public-read-write", "authenticated-read",
  "bucket-owner-read", "bucket-owner-full-control"
"""


def HaveFileUris(args):
  """Checks whether args contains any file URIs.

  Args:
    args: command-line arguments

  Returns:
    True if args contains any file URIs.
  """
  for uri_str in args:
    if uri_str.lower().startswith('file://') or uri_str.find(':') == -1:
      return True
  return False


def HaveProviderUris(args):
  """Checks whether args contains any provider URIs (like 'gs://').

  Args:
    args: command-line arguments

  Returns:
    True if args contains any provider URIs.
  """
  for uri_str in args:
    if re.match('^[a-z]+://$', uri_str):
      return True
  return False


def GetBotoConfigFileList():
  """Returns list of boto config files that exist."""

  config_paths = boto.pyami.config.BotoConfigLocations
  if 'AWS_CREDENTIAL_FILE' in os.environ:
    config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])
  config_files = {}
  for config_path in config_paths:
    if os.path.exists(config_path):
      config_files[config_path] = 1
  cf_list = []
  for config_file in config_files:
    cf_list.append(config_file)
  return cf_list

config_file_list = GetBotoConfigFileList()
command_inst = Command(gsutil_bin_dir, boto_lib_dir, usage_string,
                       config_file_list)

def HasConfiguredCredentials():
  """Determines if boto credential/config file exists."""

  config = boto.config
  has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and
                    config.has_option('Credentials', 'gs_secret_access_key'))
  has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and
                    config.has_option('Credentials', 'aws_secret_access_key'))
  has_refresh_token = (_HAVE_OAUTH2 and
      config.has_option('Credentials', 'gs_oauth2_refresh_token'))
  has_auth_plugins = config.has_option('Plugin', 'plugin_directory')
  return (has_goog_creds or has_amzn_creds or has_refresh_token
          or has_auth_plugins)

def ConfigureNoOpAuthIfNeeded():
  """Sets up no-op auth handler if no boto credential/config file available."""

  config = boto.config
  if not HasConfiguredCredentials():
    if config_file_list:
      if (config.has_option('Credentials', 'gs_oauth2_refresh_token')
          and not _HAVE_OAUTH2):
        raise CommandException(
            "Your configuration is set up with OAuth2 authentication "
            "credentials.\nHowever, OAuth2 is only supported when running "
            "under Python 2.6 or later\n(unless additional dependencies are "
            "installed, see README for details); you are running Python %s." %
            sys.version)
      raise CommandException('You have no storage service credentials in any '
                             'of the following boto config\nfiles. Please add '
                             'your credentials as described in the gsutil '
                             "README file, or else\nre-run 'gsutil config' "
                             'to re-create a config file:\n%s' %
                             config_file_list)
    else:
      # With no boto config file the user can still access publicly readable
      # buckets and objects.
      from gslib import no_op_auth_plugin

# Maps command name to
# [function, min_args, max_args, supported_sub_args, file_uri_ok,
#  provider_uri_ok, uris_start_arg]
NO_MAX = sys.maxint
commands = {
    'cat': [command_inst.CatCommand, 0, NO_MAX, 'h', False, True, 0],
    'config': [command_inst.CreateConfigCommand,
        0, 0, 'habfwrs:o:', False, False, 0],
    'cp': [command_inst.CopyObjsCommand, 2, NO_MAX, 'a:erRtz:', True, False, 0],
    'getacl': [command_inst.GetAclCommand, 1, 1, '', False, False, 0],
    'help': [command_inst.HelpCommand, 0, 0, '', False, False, 0],
    'ls': [command_inst.ListCommand, 0, NO_MAX, 'blLp:', False, True, 0],
    'mb': [command_inst.MakeBucketsCommand, 1, NO_MAX, 'l:p:', False, False, 0],
    'mv': [command_inst.MoveObjsCommand, 2, NO_MAX, '', True, False, 0],
    'rb': [command_inst.RemoveBucketsCommand, 1, NO_MAX, '', False, False, 0],
    'rm': [command_inst.RemoveObjsCommand, 1, NO_MAX, 'f', False, False, 0],
    'setacl': [command_inst.SetAclCommand, 2, NO_MAX, '', False, False, 1],
    'update': [command_inst.UpdateCommand, 0, 0, 'f', False, False, 0],
    'ver': [command_inst.VerCommand, 0, 0, '', False, False, 0],
    # Alias for ver command.
    'version': [command_inst.VerCommand, 0, 0, '', False, False, 0],
}

xml_parsing_commands = ['getacl', 'ls', 'setacl']
no_config_required_commands = ['help', 'ver', 'version', 'config']


def SanityCheckXmlParser(cmd):
  # If the given command requires xml parsing, ensure we're
  # not using the xmlplus parser, which works incorrectly (see
  # http://code.google.com/p/gsutil/issues/detail?id=18 for details).
  if cmd not in xml_parsing_commands:
    return
  code_file = xml.sax.xmlreader.__file__
  if code_file.find('xmlplus') != -1:
    raise CommandException('The "%s" command requires XML parsing, and your '
                           'Python installation includes an\nXML parser (%s)\n'
                           'that does not work correctly.' %
                           (cmd, code_file))

def HandleUnknownFailure(debug, e):
  # Called if we fall through all known/handled exceptions. Allows us to
  # print a stacktrace if -D option used.
  if debug > 2:
    stack_trace = traceback.format_exc()
    sys.stderr.write('DEBUG: Exception stack trace:\n    %s\n' %
                     re.sub('\\n', '\n    ', stack_trace))
  else:
    OutputAndExit('Failure: %s.' % e)

def HandleCommandException(e):
  if e.informational:
    OutputAndExit(e.reason)
  else:
    OutputAndExit('CommandException: %s' % e.reason)

def HandleControlC(signal_num, cur_stack_frame):
  # Called when user hits ^C so we can print a brief message instead of
  # the normal Python stack trace (unless -D option is used).
  global debug
  if debug > 2:
    stack_trace = ''.join(traceback.format_list(traceback.extract_stack()))
    OutputAndExit('DEBUG: Caught signal %d - Exception stack trace:\n'
                  '    %s' % (signal_num, re.sub('\\n', '\n    ', stack_trace)))
  else:
    OutputAndExit('Caught signal %d - exiting' % signal_num)

def main():
  if sys.version_info[:3] < (2, 5, 1):
    raise CommandException('This tool requires Python 2.5.1 or higher.')

  # If user enters no commands just print the usage info.
  if len(sys.argv) == 1:
    command_inst.OutputUsageAndExit()

  global debug
  try:
    opts, args = getopt.getopt(sys.argv[1:], 'dDvh:m',
                               ['debug', 'detailedDebug', 'version', 'help',
                                'header', 'multithreaded'])
    if not args:
      cmd = 'help'
    else:
      cmd = args[0]
    if cmd not in commands:
      raise CommandException('Invalid command "%s".' % cmd)
    SanityCheckXmlParser(cmd)
    command_info = commands[cmd]
    command_func = command_info[0]
    min_arg_count = command_info[1]
    max_arg_count = command_info[2]
    supported_sub_args = command_info[3]
    file_uri_ok = command_info[4]
    provider_uri_ok = command_info[5]
    uris_start_arg = command_info[6]
    sub_opts, args = getopt.getopt(args[1:], supported_sub_args)
    if len(args) < min_arg_count or len(args) > max_arg_count:
      raise CommandException('Wrong number of arguments for "%s" command.' %
                             cmd)
    if not file_uri_ok and HaveFileUris(args[uris_start_arg:]):
      raise CommandException('"%s" command does not support "file://" URIs. '
                             'Did you mean to use a gs:// URI?' %
                             cmd)
    if not provider_uri_ok and HaveProviderUris(args[uris_start_arg:]):
      raise CommandException('"%s" command does not support provider-only '
                             'URIs.' % cmd)

    if cmd not in no_config_required_commands:
      ConfigureNoOpAuthIfNeeded()

    # Include api_version header in all commands.
    api_version = boto.config.get_value('GSUtil', 'default_api_version', '1')
    headers = { GOOG_API_VERSION_HDR : api_version }

    # Change the default of the 'https_validate_certificates' boto option to
    # True (it is currently False in boto).
    if not boto.config.has_option('Boto', 'https_validate_certificates'):
      if not boto.config.has_section('Boto'):
        boto.config.add_section('Boto')
      boto.config.setbool('Boto', 'https_validate_certificates', True)

    for o, a in opts:
      if o in ('-d', '--debug'):
        # Passing debug=2 causes boto to include httplib header output.
        debug = 2
      if o in ('-D', '--detailedDebug'):
        # We use debug level 3 to ask gsutil code to output more detailed
        # debug output. This is a bit of a hack since it overloads the same
        # flag that was originally implemented for boto use. And we use -DD
        # to ask for really detailed debugging (i.e., including HTTP payload).
        if debug == 3:
          debug = 4
        else:
          debug = 3
      if o in ('-?', '--help'):
        command_inst.OutputUsageAndExit()
      if o in ('-h', '--header'):
        (hdr_name, unused_ptn, hdr_val) = a.partition(':')
        if not hdr_name or not hdr_val:
          command_inst.OutputUsageAndExit()
        headers[hdr_name] = hdr_val
      if o in ('-v', '--version'):
        command_inst.VerCommand([], None)
        sys.exit(0)
      if o in ('-m', '--multithreaded'):
        command_inst.SetParallelOperations(True)
    if debug > 1:
      sys.stderr.write(
          '***************************** WARNING *****************************\n'
          '*** You are running gsutil with debug output enabled.\n'
          '*** Be aware that debug output includes authentication '
          'credentials.\n'
          '*** Do not share (e.g., post to support forums) debug output\n'
          '*** unless you have sanitized authentication tokens in the\n'
          '*** output, or have revoked your credentials.\n'
          '***************************** WARNING *****************************\n')
    if debug == 2:
      logging.basicConfig(level=logging.INFO)
    elif debug > 2:
      logging.basicConfig(level=logging.DEBUG)
      command_inst.VerCommand([], None)
      config_items = []
      try:
        config_items.extend(boto.config.items('Boto'))
        config_items.extend(boto.config.items('GSUtil'))
      except ConfigParser.NoSectionError:
        pass
      sys.stderr.write('config_file_list: %s\n' % config_file_list)
      sys.stderr.write('config: %s\n' % str(config_items))
    else:
      logging.basicConfig()
    # Catch ^C so we can print a brief message instead of the normal Python
    # stack trace.
    signal.signal(signal.SIGINT, HandleControlC)
    command_func(args, sub_opts, headers, debug)
  except AttributeError, e:
    if str(e).find('secret_access_key') != -1:
      OutputAndExit('Missing credentials for the given URI(s). Does your '
                    'boto config file contain all needed credentials?')
    else:
      OutputAndExit(str(e))
  except BotoClientError, e:
    OutputAndExit('BotoClientError: %s.' % e.reason)
  except CommandException, e:
    HandleCommandException(e)
  except getopt.GetoptError, e:
    HandleCommandException(CommandException(e.msg))
  except InvalidAclError, e:
    OutputAndExit('InvalidAclError: %s.' % str(e))
  except InvalidUriError, e:
    OutputAndExit('InvalidUriError: %s.' % e.message)
  except ProjectIdException, e:
    OutputAndExit('ProjectIdException: %s.' % e.reason)
  except boto.auth_handler.NotReadyToAuthenticate:
    OutputAndExit('NotReadyToAuthenticate')
  except OSError, e:
    OutputAndExit('OSError: %s.' % e.strerror)
  except WildcardException, e:
    OutputAndExit(e.reason)
  except StorageResponseError, e:
    # Check for access denied, and provide detail to users who have no boto
    # config file (who have been using gsutil only for accessing publicly
    # readable buckets and objects).
    if e.status == 403 and not HasConfiguredCredentials():
      OutputAndExit(
          'You are attempting to access protected data with no configured '
          'credentials.\nPlease see '
          'http://code.google.com/apis/storage/docs/signup.html for\ndetails '
          'about activating the Google Storage service and then run the\n'
          '"gsutil config" command to configure gsutil to use these credentials.')
    if not e.body:
      e.body = ''
    detail_start = e.body.find('<Details>')
    detail_end = e.body.find('</Details>')
    exc_name = re.split("[\.']", str(type(e)))[-2]
    if detail_start != -1 and detail_end != -1:
      detail = e.body[detail_start+9:detail_end]
      OutputAndExit('%s: status=%d, code=%s, reason=%s, detail=%s.' %
                    (exc_name, e.status, e.code, e.reason, detail))
    else:
      OutputAndExit('%s:: status=%d, code=%s, reason=%s.' %
                    (exc_name, e.status, e.code, e.reason))
  except ResumableUploadException, e:
    OutputAndExit('ResumableUploadException: %s.' % e.message)
  except boto.exception.TooManyAuthHandlerReadyToAuthenticate, e:
    OutputAndExit(
        'You have credentials for more than one authentication handler \n'
        'configured in your boto configuration file and/or environment \n'
        'variables (for example, both an OAuth2 token and Developer Access \n'
        'keys/secret):\n%s' % e)
  except socket.error, e:
    if e.args[0] == errno.EPIPE:
      # Retrying with a smaller file (per suggestion below) works because
      # the library code send loop (in boto/s3/key.py) can get through the
      # entire file and then request the HTTP response before the socket
      # gets closed and the response lost.
      message = (
"""
Got a "Broken pipe" error. This can happen to clients using Python 2.x,
when the server sends an error response and then closes the socket (see
http://bugs.python.org/issue5542). If you are trying to upload a large
object you might retry with a small (say 200k) object, and see if you get
a more specific error code.
""")
      OutputAndExit(message)
    else:
      HandleUnknownFailure(debug, e)
  except Exception, e:
    HandleUnknownFailure(debug, e)

if __name__ == '__main__':
  main()
  sys.exit(0)
