text.py 17.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#
#

# Copyright (C) 2006, 2007, 2010, 2011 Google Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.

"""Utility functions for manipulating or working with text.

"""


import re
import os
import time
import collections

from ganeti import errors
32
from ganeti import compat
33
34
35
36
37
38


#: Unit checker regexp
_PARSEUNIT_REGEX = re.compile(r"^([.\d]+)\s*([a-zA-Z]+)?$")

#: Characters which don't need to be quoted for shell commands
Iustin Pop's avatar
Iustin Pop committed
39
_SHELL_UNQUOTED_RE = re.compile("^[-.,=:/_+@A-Za-z0-9]+$")
40

41
42
43
#: Shell param checker regexp
_SHELLPARAM_REGEX = re.compile(r"^[-a-zA-Z0-9._+/:%@]+$")

44
45
46
#: ASCII equivalent of unicode character 'HORIZONTAL ELLIPSIS' (U+2026)
_ASCII_ELLIPSIS = "..."

47
48
49
#: MAC address octet
_MAC_ADDR_OCTET_RE = r"[0-9a-f]{2}"

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

def MatchNameComponent(key, name_list, case_sensitive=True):
  """Try to match a name against a list.

  This function will try to match a name like test1 against a list
  like C{['test1.example.com', 'test2.example.com', ...]}. Against
  this list, I{'test1'} as well as I{'test1.example'} will match, but
  not I{'test1.ex'}. A multiple match will be considered as no match
  at all (e.g. I{'test1'} against C{['test1.example.com',
  'test1.example.org']}), except when the key fully matches an entry
  (e.g. I{'test1'} against C{['test1', 'test1.example.com']}).

  @type key: str
  @param key: the name to be searched
  @type name_list: list
  @param name_list: the list of strings against which to search the key
  @type case_sensitive: boolean
  @param case_sensitive: whether to provide a case-sensitive match

  @rtype: None or str
  @return: None if there is no match I{or} if there are multiple matches,
      otherwise the element from the list which matches

  """
  if key in name_list:
    return key

  re_flags = 0
  if not case_sensitive:
    re_flags |= re.IGNORECASE
    key = key.upper()
81
82
83

  name_re = re.compile(r"^%s(\..*)?$" % re.escape(key), re_flags)

84
85
86
  names_filtered = []
  string_matches = []
  for name in name_list:
87
    if name_re.match(name) is not None:
88
89
90
91
92
93
94
95
      names_filtered.append(name)
      if not case_sensitive and key == name.upper():
        string_matches.append(name)

  if len(string_matches) == 1:
    return string_matches[0]
  if len(names_filtered) == 1:
    return names_filtered[0]
96

97
98
99
  return None


100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def _DnsNameGlobHelper(match):
  """Helper function for L{DnsNameGlobPattern}.

  Returns regular expression pattern for parts of the pattern.

  """
  text = match.group(0)

  if text == "*":
    return "[^.]*"
  elif text == "?":
    return "[^.]"
  else:
    return re.escape(text)


def DnsNameGlobPattern(pattern):
  """Generates regular expression from DNS name globbing pattern.

  A DNS name globbing pattern (e.g. C{*.site}) is converted to a regular
  expression. Escape sequences or ranges (e.g. [a-z]) are not supported.

  Matching always starts at the leftmost part. An asterisk (*) matches all
  characters except the dot (.) separating DNS name parts. A question mark (?)
  matches a single character except the dot (.).

  @type pattern: string
  @param pattern: DNS name globbing pattern
  @rtype: string
  @return: Regular expression

  """
  return r"^%s(\..*)?$" % re.sub(r"\*|\?|[^*?]*", _DnsNameGlobHelper, pattern)


135
def FormatUnit(value, units, roman=False):
136
137
138
139
140
141
142
143
144
145
146
147
148
149
  """Formats an incoming number of MiB with the appropriate unit.

  @type value: int
  @param value: integer representing the value in MiB (1048576)
  @type units: char
  @param units: the type of formatting we should do:
      - 'h' for automatic scaling
      - 'm' for MiBs
      - 'g' for GiBs
      - 't' for TiBs
  @rtype: str
  @return: the formatted value (with suffix)

  """
Iustin Pop's avatar
Iustin Pop committed
150
  if units not in ("m", "g", "t", "h"):
151
152
    raise errors.ProgrammerError("Invalid unit specified '%s'" % str(units))

Iustin Pop's avatar
Iustin Pop committed
153
  suffix = ""
154

Iustin Pop's avatar
Iustin Pop committed
155
156
157
  if units == "m" or (units == "h" and value < 1024):
    if units == "h":
      suffix = "M"
158
    return "%s%s" % (compat.RomanOrRounded(value, 0, roman), suffix)
159

Iustin Pop's avatar
Iustin Pop committed
160
161
162
  elif units == "g" or (units == "h" and value < (1024 * 1024)):
    if units == "h":
      suffix = "G"
163
164
    return "%s%s" % (compat.RomanOrRounded(float(value) / 1024, 1, roman),
                     suffix)
165
166

  else:
Iustin Pop's avatar
Iustin Pop committed
167
168
    if units == "h":
      suffix = "T"
169
170
    return "%s%s" % (compat.RomanOrRounded(float(value) / 1024 / 1024, 1,
                                           roman), suffix)
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190


def ParseUnit(input_string):
  """Tries to extract number and scale from the given string.

  Input must be in the format C{NUMBER+ [DOT NUMBER+] SPACE*
  [UNIT]}. If no unit is specified, it defaults to MiB. Return value
  is always an int in MiB.

  """
  m = _PARSEUNIT_REGEX.match(str(input_string))
  if not m:
    raise errors.UnitParseError("Invalid format")

  value = float(m.groups()[0])

  unit = m.groups()[1]
  if unit:
    lcunit = unit.lower()
  else:
Iustin Pop's avatar
Iustin Pop committed
191
    lcunit = "m"
192

Iustin Pop's avatar
Iustin Pop committed
193
  if lcunit in ("m", "mb", "mib"):
194
195
196
    # Value already in MiB
    pass

Iustin Pop's avatar
Iustin Pop committed
197
  elif lcunit in ("g", "gb", "gib"):
198
199
    value *= 1024

Iustin Pop's avatar
Iustin Pop committed
200
  elif lcunit in ("t", "tb", "tib"):
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
    value *= 1024 * 1024

  else:
    raise errors.UnitParseError("Unknown unit: %s" % unit)

  # Make sure we round up
  if int(value) < value:
    value += 1

  # Round up to the next multiple of 4
  value = int(value)
  if value % 4:
    value += 4 - value % 4

  return value


def ShellQuote(value):
  """Quotes shell argument according to POSIX.

  @type value: str
  @param value: the argument to be quoted
  @rtype: str
  @return: the quoted value

  """
  if _SHELL_UNQUOTED_RE.match(value):
    return value
  else:
    return "'%s'" % value.replace("'", "'\\''")


def ShellQuoteArgs(args):
  """Quotes a list of shell arguments.

  @type args: list
  @param args: list of arguments to be quoted
  @rtype: str
  @return: the quoted arguments concatenated with spaces

  """
  return " ".join([ShellQuote(i) for i in args])


245
246
247
248
249
250
251
def ShellCombineCommands(cmdlist):
  """Out of a list of shell comands construct a single one.

  """
  return ["/bin/sh", "-c", " && ".join(ShellQuoteArgs(c) for c in cmdlist)]


252
253
254
255
256
257
class ShellWriter:
  """Helper class to write scripts with indentation.

  """
  INDENT_STR = "  "

258
  def __init__(self, fh, indent=True):
259
260
261
262
    """Initializes this class.

    """
    self._fh = fh
263
    self._indent_enabled = indent
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
    self._indent = 0

  def IncIndent(self):
    """Increase indentation level by 1.

    """
    self._indent += 1

  def DecIndent(self):
    """Decrease indentation level by 1.

    """
    assert self._indent > 0
    self._indent -= 1

  def Write(self, txt, *args):
    """Write line to output file.

    """
    assert self._indent >= 0

    if args:
286
      line = txt % args
287
    else:
288
289
      line = txt

290
    if line and self._indent_enabled:
291
292
293
294
      # Indent only if there's something on the line
      self._fh.write(self._indent * self.INDENT_STR)

    self._fh.write(line)
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

    self._fh.write("\n")


def GenerateSecret(numbytes=20):
  """Generates a random secret.

  This will generate a pseudo-random secret returning an hex string
  (so that it can be used where an ASCII string is needed).

  @param numbytes: the number of bytes which will be represented by the returned
      string (defaulting to 20, the length of a SHA1 hash)
  @rtype: str
  @return: an hex representation of the pseudo-random sequence

  """
  return os.urandom(numbytes).encode("hex")


314
315
def _MakeMacAddrRegexp(octets):
  """Builds a regular expression for verifying MAC addresses.
316

317
318
319
  @type octets: integer
  @param octets: How many octets to expect (1-6)
  @return: Compiled regular expression
320

321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
  """
  assert octets > 0
  assert octets <= 6

  return re.compile("^%s$" % ":".join([_MAC_ADDR_OCTET_RE] * octets),
                    re.I)


#: Regular expression for full MAC address
_MAC_CHECK_RE = _MakeMacAddrRegexp(6)

#: Regular expression for half a MAC address
_MAC_PREFIX_CHECK_RE = _MakeMacAddrRegexp(3)


def _MacAddressCheck(check_re, mac, msg):
  """Checks a MAC address using a regular expression.

  @param check_re: Compiled regular expression as returned by C{re.compile}
  @type mac: string
  @param mac: MAC address to be validated
  @type msg: string
  @param msg: Error message (%s will be replaced with MAC address)

  """
  if check_re.match(mac):
    return mac.lower()

  raise errors.OpPrereqError(msg % mac, errors.ECODE_INVAL)
350

351
352
353
354
355
356
357
358
359
360
361
362

def NormalizeAndValidateMac(mac):
  """Normalizes and check if a MAC address is valid and contains six octets.

  Checks whether the supplied MAC address is formally correct. Accepts
  colon-separated format only. Normalize it to all lower case.

  @type mac: string
  @param mac: MAC address to be validated
  @rtype: string
  @return: Normalized and validated MAC address
  @raise errors.OpPrereqError: If the MAC address isn't valid
363
364

  """
365
  return _MacAddressCheck(_MAC_CHECK_RE, mac, "Invalid MAC address '%s'")
366

367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382

def NormalizeAndValidateThreeOctetMacPrefix(mac):
  """Normalizes a potential MAC address prefix (three octets).

  Checks whether the supplied string is a valid MAC address prefix consisting
  of three colon-separated octets. The result is normalized to all lower case.

  @type mac: string
  @param mac: Prefix to be validated
  @rtype: string
  @return: Normalized and validated prefix
  @raise errors.OpPrereqError: If the MAC address prefix isn't valid

  """
  return _MacAddressCheck(_MAC_PREFIX_CHECK_RE, mac,
                          "Invalid MAC address prefix '%s'")
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404


def SafeEncode(text):
  """Return a 'safe' version of a source string.

  This function mangles the input string and returns a version that
  should be safe to display/encode as ASCII. To this end, we first
  convert it to ASCII using the 'backslashreplace' encoding which
  should get rid of any non-ASCII chars, and then we process it
  through a loop copied from the string repr sources in the python; we
  don't use string_escape anymore since that escape single quotes and
  backslashes too, and that is too much; and that escaping is not
  stable, i.e. string_escape(string_escape(x)) != string_escape(x).

  @type text: str or unicode
  @param text: input data
  @rtype: str
  @return: a safe version of text

  """
  if isinstance(text, unicode):
    # only if unicode; if str already, we handle it below
Iustin Pop's avatar
Iustin Pop committed
405
    text = text.encode("ascii", "backslashreplace")
406
407
408
  resu = ""
  for char in text:
    c = ord(char)
Michael Hanselmann's avatar
Michael Hanselmann committed
409
    if char == "\t":
Iustin Pop's avatar
Iustin Pop committed
410
411
412
413
      resu += r"\t"
    elif char == "\n":
      resu += r"\n"
    elif char == "\r":
414
415
416
417
418
419
420
421
422
      resu += r'\'r'
    elif c < 32 or c >= 127: # non-printable
      resu += "\\x%02x" % (c & 0xff)
    else:
      resu += char
  return resu


def UnescapeAndSplit(text, sep=","):
Michele Tartara's avatar
Michele Tartara committed
423
  r"""Split and unescape a string based on a given separator.
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451

  This function splits a string based on a separator where the
  separator itself can be escape in order to be an element of the
  elements. The escaping rules are (assuming coma being the
  separator):
    - a plain , separates the elements
    - a sequence \\\\, (double backslash plus comma) is handled as a
      backslash plus a separator comma
    - a sequence \, (backslash plus comma) is handled as a
      non-separator comma

  @type text: string
  @param text: the string to split
  @type sep: string
  @param text: the separator
  @rtype: string
  @return: a list of strings

  """
  # we split the list by sep (with no escaping at this stage)
  slist = text.split(sep)
  # next, we revisit the elements and if any of them ended with an odd
  # number of backslashes, then we join it with the next
  rlist = []
  while slist:
    e1 = slist.pop(0)
    if e1.endswith("\\"):
      num_b = len(e1) - len(e1.rstrip("\\"))
452
      if num_b % 2 == 1 and slist:
453
        e2 = slist.pop(0)
454
455
456
457
        # Merge the two elements and push the result back to the source list for
        # revisiting. If e2 ended with backslashes, further merging may need to
        # be done.
        slist.insert(0, e1 + sep + e2)
458
        continue
459
    # here the backslashes remain (all), and will be reduced in the next step
460
461
462
463
464
465
    rlist.append(e1)
  # finally, replace backslash-something with something
  rlist = [re.sub(r"\\(.)", r"\1", v) for v in rlist]
  return rlist


466
467
468
469
470
471
472
473
474
475
476
477
478
def EscapeAndJoin(slist, sep=","):
  """Encode a list in a way parsable by UnescapeAndSplit.

  @type slist: list of strings
  @param slist: the strings to be encoded
  @rtype: string
  @return: the encoding of the list oas a string

  """
  return sep.join([re.sub("\\" + sep, "\\\\" + sep,
                          re.sub(r"\\", r"\\\\", v)) for v in slist])


479
480
481
482
483
484
485
486
487
488
def CommaJoin(names):
  """Nicely join a set of identifiers.

  @param names: set, list or tuple
  @return: a string with the formatted results

  """
  return ", ".join([str(val) for val in names])


489
def FormatTime(val, usecs=None):
490
491
492
493
494
495
496
497
498
499
  """Formats a time value.

  @type val: float or None
  @param val: Timestamp as returned by time.time() (seconds since Epoch,
    1970-01-01 00:00:00 UTC)
  @return: a string value or N/A if we don't have a valid timestamp

  """
  if val is None or not isinstance(val, (int, float)):
    return "N/A"
500

501
502
  # these two codes works on Linux, but they are not guaranteed on all
  # platforms
503
504
505
506
507
508
  result = time.strftime("%F %T", time.localtime(val))

  if usecs is not None:
    result += ".%06d" % usecs

  return result
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554


def FormatSeconds(secs):
  """Formats seconds for easier reading.

  @type secs: number
  @param secs: Number of seconds
  @rtype: string
  @return: Formatted seconds (e.g. "2d 9h 19m 49s")

  """
  parts = []

  secs = round(secs, 0)

  if secs > 0:
    # Negative values would be a bit tricky
    for unit, one in [("d", 24 * 60 * 60), ("h", 60 * 60), ("m", 60)]:
      (complete, secs) = divmod(secs, one)
      if complete or parts:
        parts.append("%d%s" % (complete, unit))

  parts.append("%ds" % secs)

  return " ".join(parts)


class LineSplitter:
  """Splits data chunks into lines separated by newline.

  Instances provide a file-like interface.

  """
  def __init__(self, line_fn, *args):
    """Initializes this class.

    @type line_fn: callable
    @param line_fn: Function called for each line, first parameter is line
    @param args: Extra arguments for L{line_fn}

    """
    assert callable(line_fn)

    if args:
      # Python 2.4 doesn't have functools.partial yet
      self._line_fn = \
555
        lambda line: line_fn(line, *args) # pylint: disable=W0142
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
    else:
      self._line_fn = line_fn

    self._lines = collections.deque()
    self._buffer = ""

  def write(self, data):
    parts = (self._buffer + data).split("\n")
    self._buffer = parts.pop()
    self._lines.extend(parts)

  def flush(self):
    while self._lines:
      self._line_fn(self._lines.popleft().rstrip("\r\n"))

  def close(self):
    self.flush()
    if self._buffer:
      self._line_fn(self._buffer)
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615


def IsValidShellParam(word):
  """Verifies is the given word is safe from the shell's p.o.v.

  This means that we can pass this to a command via the shell and be
  sure that it doesn't alter the command line and is passed as such to
  the actual command.

  Note that we are overly restrictive here, in order to be on the safe
  side.

  @type word: str
  @param word: the word to check
  @rtype: boolean
  @return: True if the word is 'safe'

  """
  return bool(_SHELLPARAM_REGEX.match(word))


def BuildShellCmd(template, *args):
  """Build a safe shell command line from the given arguments.

  This function will check all arguments in the args list so that they
  are valid shell parameters (i.e. they don't contain shell
  metacharacters). If everything is ok, it will return the result of
  template % args.

  @type template: str
  @param template: the string holding the template for the
      string formatting
  @rtype: str
  @return: the expanded command line

  """
  for word in args:
    if not IsValidShellParam(word):
      raise errors.ProgrammerError("Shell argument '%s' contains"
                                   " invalid characters" % word)
  return template % args
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641


def FormatOrdinal(value):
  """Formats a number as an ordinal in the English language.

  E.g. the number 1 becomes "1st", 22 becomes "22nd".

  @type value: integer
  @param value: Number
  @rtype: string

  """
  tens = value % 10

  if value > 10 and value < 20:
    suffix = "th"
  elif tens == 1:
    suffix = "st"
  elif tens == 2:
    suffix = "nd"
  elif tens == 3:
    suffix = "rd"
  else:
    suffix = "th"

  return "%s%s" % (value, suffix)
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664


def Truncate(text, length):
  """Truncate string and add ellipsis if needed.

  @type text: string
  @param text: Text
  @type length: integer
  @param length: Desired length
  @rtype: string
  @return: Truncated text

  """
  assert length > len(_ASCII_ELLIPSIS)

  # Serialize if necessary
  if not isinstance(text, basestring):
    text = str(text)

  if len(text) <= length:
    return text
  else:
    return text[:length - len(_ASCII_ELLIPSIS)] + _ASCII_ELLIPSIS
665
666
667
668
669
670
671
672
673


def FilterEmptyLinesAndComments(text):
  """Filters empty lines and comments from a line-based string.

  Whitespace is also removed from the beginning and end of all lines.

  @type text: string
  @param text: Input string
674
  @rtype: list
675
676

  """
677
678
679
  return [line for line in map(lambda s: s.strip(), text.splitlines())
          # Ignore empty lines and comments
          if line and not line.startswith("#")]
680
681
682
683
684
685
686
687
688
689
690
691


def FormatKeyValue(data):
  """Formats a dictionary as "key=value" parameters.

  The keys are sorted to have a stable order.

  @type data: dict
  @rtype: list of string

  """
  return ["%s=%s" % (key, value) for (key, value) in sorted(data.items())]