4
HappyBase connection module.
9
from thrift.transport.TSocket import TSocket
10
from thrift.transport.TTransport import TBufferedTransport, TFramedTransport
11
from thrift.protocol import TBinaryProtocol
13
from .hbase import Hbase
14
from .hbase.ttypes import ColumnDescriptor
15
from .table import Table
16
from .util import pep8_to_camel_case
18
logger = logging.getLogger(__name__)
20
COMPAT_MODES = ('0.90', '0.92')
21
THRIFT_TRANSPORTS = dict(
22
buffered=TBufferedTransport,
23
framed=TFramedTransport,
26
DEFAULT_HOST = 'localhost'
30
class Connection(object):
31
"""Connection to an HBase Thrift server.
33
The `host` and `port` parameters specify the host name and TCP port
34
of the HBase Thrift server to connect to. If omitted or ``None``,
35
a connection to the default port on ``localhost`` is made. If
36
specifed, the `timeout` parameter specifies the socket timeout in
39
If `autoconnect` is `True` (the default) the connection is made directly,
40
otherwise :py:meth:`Connection.open` must be called explicitly before first
43
The optional `table_prefix` and `table_prefix_separator` arguments specify
44
a prefix and a separator string to be prepended to all table names, e.g.
45
when :py:meth:`Connection.table` is invoked. For example, if `table_prefix`
46
is ``myproject``, all tables tables will have names like ``myproject_XYZ``.
48
The optional `compat` parameter sets the compatibility level for this
49
connection. Older HBase versions have slightly different Thrift interfaces,
50
and using the wrong protocol can lead to crashes caused by communication
51
errors, so make sure to use the correct one. This value can be either the
52
string ``0.92`` (the default) for use with HBase 0.92.x and later versions,
53
or ``0.90`` for use with HBase 0.90.x.
55
The optional `transport` parameter specifies the Thrift transport mode to
56
use. Supported values for this parameter are ``buffered`` (the default) and
57
``framed``. Make sure to choose the right one, since otherwise you might
58
see non-obvious connection errors or program hangs when making
59
a connection. HBase versions before 0.94 always use the buffered transport.
60
Starting with HBase 0.94, the Thrift server optionally uses a framed
61
transport, depending on the parameter passed to the ``hbase-daemon.sh start
62
thrift`` command. The default ``-threadpool`` mode uses the buffered
63
transport; the ``-hsha``, ``-nonblocking``, and ``-threadedselector`` modes
64
use the framed transport.
70
`table_prefix_separator` parameter
73
support for framed Thrift transports
75
:param str host: The host to connect to
76
:param int port: The port to connect to
77
:param int timeout: The socket timeout in milliseconds (optional)
78
:param bool autoconnect: Whether the connection should be opened directly
79
:param str table_prefix: Prefix used to construct table names (optional)
80
:param str table_prefix_separator: Separator used for `table_prefix`
81
:param str compat: Compatibility mode (optional)
82
:param str transport: Thrift transport mode (optional)
84
def __init__(self, host=DEFAULT_HOST, port=DEFAULT_PORT, timeout=None,
85
autoconnect=True, table_prefix=None,
86
table_prefix_separator='_', compat='0.92',
87
transport='buffered'):
89
if transport not in THRIFT_TRANSPORTS:
90
raise ValueError("'transport' must be one of %s"
91
% ", ".join(THRIFT_TRANSPORTS.keys()))
93
if table_prefix is not None \
94
and not isinstance(table_prefix, basestring):
95
raise TypeError("'table_prefix' must be a string")
97
if not isinstance(table_prefix_separator, basestring):
98
raise TypeError("'table_prefix_separator' must be a string")
100
if compat not in COMPAT_MODES:
101
raise ValueError("'compat' must be one of %s"
102
% ", ".join(COMPAT_MODES))
104
# Allow host and port to be None, which may be easier for
105
# applications wrapping a Connection instance.
106
self.host = host or DEFAULT_HOST
107
self.port = port or DEFAULT_PORT
108
self.timeout = timeout
109
self.table_prefix = table_prefix
110
self.table_prefix_separator = table_prefix_separator
113
self._transport_class = THRIFT_TRANSPORTS[transport]
114
self._refresh_thrift_client()
119
self._initialized = True
121
def _refresh_thrift_client(self):
122
"""Refresh the Thrift socket, transport, and client."""
123
socket = TSocket(self.host, self.port)
124
if self.timeout is not None:
125
socket.setTimeout(self.timeout)
127
self.transport = self._transport_class(socket)
128
protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
129
self.client = Hbase.Client(protocol)
131
def _table_name(self, name):
132
"""Construct a table name by optionally adding a table name prefix."""
133
if self.table_prefix is None:
136
return self.table_prefix + self.table_prefix_separator + name
139
"""Open the underlying transport to the HBase instance.
141
This method opens the underlying Thrift transport (TCP connection).
143
if self.transport.isOpen():
146
logger.debug("Opening Thrift transport to %s:%d", self.host, self.port)
147
self.transport.open()
150
"""Close the underyling transport to the HBase instance.
152
This method closes the underlying Thrift transport (TCP connection).
154
if not self.transport.isOpen():
157
if logger is not None:
158
# If called from __del__(), module variables may no longer
161
"Closing Thrift transport to %s:%d",
162
self.host, self.port)
164
self.transport.close()
169
except AttributeError:
170
# Failure from constructor
175
def table(self, name, use_prefix=True):
176
"""Return a table object.
178
Returns a :py:class:`happybase.Table` instance for the table named
179
`name`. This does not result in a round-trip to the server, and the
180
table is not checked for existence.
182
The optional `use_prefix` parameter specifies whether the table prefix
183
(if any) is prepended to the specified `name`. Set this to `False` if
184
you want to use a table that resides in another ‘prefix namespace’,
185
e.g. a table from a ‘friendly’ application co-hosted on the same HBase
186
instance. See the `table_prefix` parameter to the
187
:py:class:`Connection` constructor for more information.
189
:param str name: the name of the table
190
:param bool use_prefix: whether to use the table prefix (if any)
191
:return: Table instance
192
:rtype: :py:class:`Table`
195
name = self._table_name(name)
196
return Table(name, self)
199
# Table administration and maintenance
203
"""Return a list of table names available in this HBase instance.
205
If a `table_prefix` was set for this :py:class:`Connection`, only
206
tables that have the specified prefix will be listed.
208
:return: The table names
209
:rtype: List of strings
211
names = self.client.getTableNames()
213
# Filter using prefix, and strip prefix from names
214
if self.table_prefix is not None:
215
prefix = self._table_name('')
217
names = [n[offset:] for n in names if n.startswith(prefix)]
221
def create_table(self, name, families):
224
:param str name: The table name
225
:param dict families: The name and options for each column family
227
The `families` parameter is a dictionary mapping column family
228
names to a dictionary containing the options for this column
234
'cf1': dict(max_versions=10),
235
'cf2': dict(max_versions=1, block_cache_enabled=False),
236
'cf3': dict(), # use defaults
238
connection.create_table('mytable', families)
240
These options correspond to the ColumnDescriptor structure in
241
the Thrift API, but note that the names should be provided in
242
Python style, not in camel case notation, e.g. `time_to_live`,
243
not `timeToLive`. The following options are supported:
245
* ``max_versions`` (`int`)
246
* ``compression`` (`str`)
247
* ``in_memory`` (`bool`)
248
* ``bloom_filter_type`` (`str`)
249
* ``bloom_filter_vector_size`` (`int`)
250
* ``bloom_filter_nb_hashes`` (`int`)
251
* ``block_cache_enabled`` (`bool`)
252
* ``time_to_live`` (`int`)
254
name = self._table_name(name)
255
if not isinstance(families, dict):
256
raise TypeError("'families' arg must be a dictionary")
260
"Cannot create table %r (no column families specified)"
263
column_descriptors = []
264
for cf_name, options in families.iteritems():
269
for option_name, value in options.iteritems():
270
kwargs[pep8_to_camel_case(option_name)] = value
272
if not cf_name.endswith(':'):
274
kwargs['name'] = cf_name
276
column_descriptors.append(ColumnDescriptor(**kwargs))
278
self.client.createTable(name, column_descriptors)
280
def delete_table(self, name, disable=False):
281
"""Delete the specified table.
283
.. versionadded:: 0.5
284
the `disable` parameter
286
In HBase, a table always needs to be disabled before it can be deleted.
287
If the `disable` parameter is `True`, this method first disables the
288
table if it wasn't already and then deletes it.
290
:param str name: The table name
291
:param bool disable: Whether to first disable the table if needed
293
if disable and self.is_table_enabled(name):
294
self.disable_table(name)
296
name = self._table_name(name)
297
self.client.deleteTable(name)
299
def enable_table(self, name):
300
"""Enable the specified table.
302
:param str name: The table name
304
name = self._table_name(name)
305
self.client.enableTable(name)
307
def disable_table(self, name):
308
"""Disable the specified table.
310
:param str name: The table name
312
name = self._table_name(name)
313
self.client.disableTable(name)
315
def is_table_enabled(self, name):
316
"""Return whether the specified table is enabled.
318
:param str name: The table name
320
:return: whether the table is enabled
323
name = self._table_name(name)
324
return self.client.isTableEnabled(name)
326
def compact_table(self, name, major=False):
327
"""Compact the specified table.
329
:param str name: The table name
330
:param bool major: Whether to perform a major compaction.
332
name = self._table_name(name)
334
self.client.majorCompact(name)
336
self.client.compact(name)