The Assimilation Project  based on Assimilation version 1.1.7.1474836767
graphnodes.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 # vim: smartindent tabstop=4 shiftwidth=4 expandtab number colorcolumn=100
3 #
4 # This file is part of the Assimilation Project.
5 #
6 # Copyright (C) 2011, 2012 - Alan Robertson <alanr@unix.sh>
7 #
8 # The Assimilation software is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # The Assimilation software is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with the Assimilation Project software. If not, see http://www.gnu.org/licenses/
20 #
21 #
22 ''' This module defines the classes for most of our CMA nodes ... '''
23 # Pylint is nuts here...
24 # pylint: disable=C0411
25 import sys, re, time, hashlib, netaddr, socket
26 from py2neo import neo4j
27 from consts import CMAconsts
28 from store import Store
29 from cmadb import CMAdb
30 from AssimCtypes import ADDR_FAMILY_IPV4, ADDR_FAMILY_IPV6, ADDR_FAMILY_802
31 from AssimCclasses import pyNetAddr, pyConfigContext
32 
33 def nodeconstructor(**properties):
34  '''A generic class-like constructor that knows our class name is stored as nodetype
35  It's a form of "factory" for our database classes
36  '''
37  #print >> sys.stderr, 'Calling nodeconstructor with properties: %s' % (str(properties))
38  realcls = GraphNode.classmap[str(properties['nodetype'])]
39  # callconstructor is kinda cool - it figures out how to correctly call the constructor
40  # with the values in 'properties' as arguments
41  return Store.callconstructor(realcls, properties)
42 
43 def RegisterGraphClass(classtoregister):
44  '''Register the given class as being a Graph class so we can
45  map the class name to the class object.
46  This is intended to be used as a decorator.
47  '''
48  GraphNode.classmap[classtoregister.__name__] = classtoregister
49  return classtoregister
50 
51 class GraphNode(object):
52  '''
53  GraphNode is the base class for all our 'normal' graph nodes.
54  '''
55  REESC = re.compile(r'\\')
56  REQUOTE = re.compile(r'"')
57  classmap = {}
58 
59  @staticmethod
60  def factory(**kwargs):
61  'A factory "constructor" function - acts like a universal constructor for GraphNode types'
62  return nodeconstructor(**kwargs)
63 
64  @staticmethod
66  'Invalidate any persistent objects that might become invalid when resetting the database'
67  pass
68 
69  def __init__(self, domain, time_create_ms=None, time_create_iso8601=None):
70  'Abstract Graph node base class'
71  self.domain = domain
72  self.nodetype = self.__class__.__name__
73  self._baseinitfinished = False
74  if time_create_ms is None:
75  time_create_ms = int(round(time.time()*1000))
76  if time_create_iso8601 is None:
77  time_create_iso8601 = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
78  self.time_create_iso8601 = time_create_iso8601
79  self.time_create_ms = time_create_ms
80 
81  @staticmethod
83  'Return our key attributes in order of significance'
84  raise NotImplementedError('Abstract base class function __meta_keyattrs__')
85 
86  @classmethod
87  def __meta_labels__(cls):
88  'Return the default set of labels which should be put on our objects when created'
89  labels = []
90  classes = [cls]
91  classes.extend(cls.__bases__)
92  labels = []
93  for c in classes:
94  name = c.__name__
95  if name == 'GraphNode':
96  break
97  labels.append('Class_' + name)
98  return labels
99 
100  def post_db_init(self):
101  '''Set node creation time'''
102  if not self._baseinitfinished:
103  self._baseinitfinished = True
104 
105  def update_attributes(self, other):
106  'Update our attributes from another node of the same type'
107  if other.nodetype != self.nodetype:
108  raise ValueError('Cannot update attributes from incompatible nodes (%s vs %s)'
109  % (self.nodetype, other.nodetype))
110  for attr in other.__dict__.keys():
111  if not hasattr(self, attr) or getattr(self, attr) != getattr(other, attr):
112  setattr(self, attr, getattr(other, attr))
113  return self
114 
115  def __str__(self):
116  'Default routine for printing GraphNodes'
117  result = '%s({' % self.__class__.__name__
118  comma = ''
119  for attr in Store.safe_attrs(self):
120  result += '%s%s = %s'% (comma, attr, str(getattr(self, attr)))
121  comma = ",\n "
122  if Store.has_node(self):
123  if Store.is_abstract(self):
124  result += comma + 'HasNode = "abstract"'
125  else:
126  result += (comma + 'HasNode = %d' %Store.id(self))
127 
128  result += "\n})"
129  return result
130 
131  # pylint R0911: Too many return statements
132  # pylint: disable=R0911
133  def get(self, attrstring, valueifnotfound=None):
134  'Implement potentially deep attribute value lookups through JSON strings'
135  try:
136  (prefix, suffix) = attrstring.split('.', 1)
137  except ValueError:
138  suffix = None
139  prefix = attrstring
140  if not hasattr(self, prefix):
141  if not prefix.endswith(']'):
142  return valueifnotfound
143  else:
144  # Probably an array index
145  # Note that very similar code exists in AssimCclasses for pyConfigContext
146  # deepget member function
147  allbutrbracket = prefix[0:len(prefix)-1]
148  try:
149  (preprefix, idx) = allbutrbracket.split('[', 1)
150  except ValueError:
151  return valueifnotfound
152  if not hasattr(self, preprefix):
153  return valueifnotfound
154  try:
155  arraypart = getattr(self, preprefix)
156  idx = int(idx) # Possible ValueError
157  arrayvalue = arraypart[idx] # possible IndexError or TypeError
158  if suffix is None:
159  return arrayvalue
160  except (TypeError, IndexError, ValueError):
161  return valueifnotfound
162  prefixvalue = arrayvalue
163  else:
164  prefixvalue = getattr(self, prefix)
165  if suffix is None:
166  return prefixvalue
167  # OK. We're in the more complicated case...
168  # Our expectation is that the prefixvalue is JSON...
169  jsonstruct = pyConfigContext(init=prefixvalue)
170  if jsonstruct is None:
171  # Should we throw an exception instead?
172  return valueifnotfound
173  return jsonstruct.deepget(suffix, valueifnotfound)
174 
175  def JSON(self, includemap=None, excludemap=None):
176  '''Output this object according to JSON rules. We take advantage
177  of the fact that Neo4j restricts what kind of objects we can
178  have as Node properties.
179  '''
180 
181  attrstodump = []
182  for attr in Store.safe_attrs(self):
183  if includemap is not None and attr not in includemap:
184  continue
185  if excludemap is not None and attr in excludemap:
186  continue
187  attrstodump.append(attr)
188  ret = '{'
189  comma = ''
190  for attr in attrstodump.sort():
191  ret += '%s"%s": %s' % (comma, attr, GraphNode._JSONelem(getattr(self, attr)))
192  comma = ','
193  ret += '}'
194  return ret
195 
196  @staticmethod
197  def _JSONelem(value):
198  'Return the value of an element suitable for JSON output'
199  if isinstance(value, str) or isinstance(value, unicode):
200  return '"%s"' % GraphNode._JSONesc(value)
201  if isinstance(value, bool):
202  if value:
203  return 'true'
204  return 'false'
205  if isinstance(value, list) or isinstance(value, tuple):
206  ret = '['
207  comma = ''
208  for elem in value:
209  ret += '%s%s' % (comma, GraphNode._JSONelem(elem))
210  comma = ','
211  ret += ']'
212  return ret
213  return str(value)
214 
215  @staticmethod
216  def _JSONesc(stringthing):
217  'Escape this string according to JSON string escaping rules'
218  stringthing = GraphNode.REESC.sub(r'\\\\', stringthing)
219  stringthing = GraphNode.REQUOTE.sub(r'\"', stringthing)
220  return stringthing
221 
222  @staticmethod
223  def initclasstypeobj(store, nodetype):
224  '''Initialize things for our "nodetype"
225  This involves
226  - Ensuring that there's an index for this class
227  - Caching the class that goes with this nodetype
228  - setting up all of our IS_A objects, including the root object if necessary,
229  - updating the store's uniqueindexmap[nodetype]
230  - updating the store's classkeymap[nodetype]
231  This should eliminate the need to do any of these things for any class.
232  '''
233  ourclass = GraphNode.classmap[nodetype]
234  if nodetype not in store.classkeymap:
235  store.uniqueindexmap[nodetype] = True
236  keys = ourclass.__meta_keyattrs__()
237  ckm_entry = {'kattr': keys[0], 'index': nodetype}
238  if len(keys) > 1:
239  ckm_entry['vattr'] = keys[1]
240  else:
241  ckm_entry['value'] = 'None'
242  store.classkeymap[nodetype] = ckm_entry
243  store.db.legacy.get_or_create_index(neo4j.Node, nodetype)
244 
245 
246 def add_an_array_item(currarray, itemtoadd):
247  'Function to add an item to an array of strings (like for roles)'
248  if currarray is not None and len(currarray) == 1 and currarray[0] == '':
249  currarray = []
250  if isinstance(itemtoadd, (tuple, list)):
251  for item in itemtoadd:
252  currarray = add_an_array_item(currarray, item)
253  return currarray
254  assert isinstance(itemtoadd, (str, unicode))
255  if currarray is None:
256  currarray = [itemtoadd]
257  elif currarray not in currarray:
258  currarray.append(itemtoadd)
259  return currarray
260 
261 def delete_an_array_item(currarray, itemtodel):
262  'Function to delete an item from an array of strings (like for roles)'
263  if isinstance(itemtodel, (tuple, list)):
264  for item in itemtodel:
265  currarray = delete_an_array_item(currarray, item)
266  return currarray
267  assert isinstance(itemtodel, (str, unicode))
268  if itemtodel is not None and itemtodel in currarray:
269  currarray = currarray.remove(itemtodel)
270  if len(currarray) == 0:
271  currarray = [''] # Limitation of Neo4j
272  return currarray
273 
274 
275 @RegisterGraphClass
277  '''Class defining best practice rules'''
278 
279  def __init__(self, bp_class, json, rulesetname):
280  GraphNode.__init__(self, domain='metadata')
281  self.bp_class = bp_class
282  self.rulesetname = rulesetname
283  self.json = json
284  self._jsonobj = pyConfigContext(json)
285 
286  def jsonobj(self):
287  'Return the JSON object corresponding to our rules'
288  return self._jsonobj
289 
290  @staticmethod
292  'Return our key attributes in order of significance'
293  return ['bp_class', 'rulesetname']
294 
295 @RegisterGraphClass
297  '''Class defining best practice rule sets'''
298  def __init__(self, rulesetname, basisrules=None):
299  GraphNode.__init__(self, domain='metadata')
300  self.rulesetname = rulesetname
301  self.basisrules = basisrules
302  if self.basisrules is None or not Store.is_abstract(self):
303  return
304  query = CMAconsts.QUERY_RULESET_RULES
305  parent = CMAdb.store.load_cypher_node(query, BPRuleSet, params={'name': basisrules})
306  CMAdb.store.relate_new(self, CMAconsts.REL_basedon, parent)
307 
308  @staticmethod
310  'Return our key attributes in order of significance'
311  return ['rulesetname']
312 
313 @RegisterGraphClass
315  'An object that represents a NIC - characterized by its MAC address'
316  def __init__(self, domain, macaddr, ifname=None, json=None):
317  GraphNode.__init__(self, domain=domain)
318  mac = pyNetAddr(macaddr)
319  if mac is None or mac.addrtype() != ADDR_FAMILY_802:
320  raise ValueError('Not a legal MAC address [%s]' % macaddr)
321  self.macaddr = str(mac)
322  if ifname is not None:
323  self.ifname = ifname
324  if json is not None:
325  self.json = json
326  self._json = pyConfigContext(json)
327  for attr in ('carrier', 'duplex', 'MTU', 'operstate', 'speed'):
328  if attr in self._json:
329  setattr(self, attr, self._json[attr])
330  if not hasattr(self, 'OUI'):
331  oui = self.mac_to_oui(self.macaddr)
332  if oui is not None:
333  self.OUI = oui
334 
335  @staticmethod
336  def mac_to_oui(macaddr):
337  'Convert a MAC address to an OUI organization string - or raise KeyError'
338  try:
339  # Pylint is confused about the netaddr.EUI.oui.registration return result...
340  # pylint: disable=E1101
341  return str(netaddr.EUI(macaddr).oui.registration().org)
342  except netaddr.NotRegisteredError:
343  prefix = str(macaddr)[0:8]
344  return CMAdb.io.config['OUI'][prefix] if prefix in CMAdb.io.config['OUI'] else None
345 
346  @staticmethod
348  'Return our key attributes in decreasing order of significance'
349  return ['macaddr', 'domain']
350 
351 
352 @RegisterGraphClass
354  '''An object that represents a v4 or v6 IP address without a port - characterized by its
355  IP address. They are always represented in the database in ipv6 format.
356  '''
357  StoreHostNames = True
358  def __init__(self, domain, ipaddr, cidrmask='unknown'):
359  'Construct an IPaddrNode - validating our parameters'
360  GraphNode.__init__(self, domain=domain)
361  if isinstance(ipaddr, str) or isinstance(ipaddr, unicode):
362  ipaddrout = pyNetAddr(str(ipaddr))
363  else:
364  ipaddrout = ipaddr
365  if isinstance(ipaddrout, pyNetAddr):
366  addrtype = ipaddrout.addrtype()
367  if addrtype == ADDR_FAMILY_IPV4:
368  ipaddrout = ipaddrout.toIPv6()
369  elif addrtype != ADDR_FAMILY_IPV6:
370  raise ValueError('Invalid network address type for IPaddrNode constructor: %s'
371  % str(ipaddrout))
372  ipaddrout.setport(0)
373  else:
374  raise ValueError('Invalid address type for IPaddrNode constructor: %s type(%s)'
375  % (str(ipaddr), type(ipaddr)))
376  self.ipaddr = unicode(str(ipaddrout))
377  self.cidrmask = cidrmask
378  if IPaddrNode.StoreHostNames and not hasattr(self, 'hostname'):
379  ip = repr(pyNetAddr(ipaddr))
380  try:
381  self.hostname = socket.gethostbyaddr(ip)[0]
382  except socket.herror:
383  return
384 
385  @staticmethod
387  'Return our key attributes in order of significance'
388  return ['ipaddr', 'domain']
389 
390 @RegisterGraphClass
392  'An object that represents an IP:port combination characterized by the pair'
393  def __init__(self, domain, ipaddr, port=None, protocol='tcp'):
394  'Construct an IPtcpportNode - validating our parameters'
395  GraphNode.__init__(self, domain=domain)
396  if isinstance(ipaddr, (str, unicode)):
397  ipaddr = pyNetAddr(str(ipaddr))
398  if isinstance(ipaddr, pyNetAddr):
399  if port is None:
400  port = ipaddr.port()
401  else:
402  ipaddr.setport(port)
403  self._repr = repr(ipaddr)
404  if port <= 0 or port >= 65536:
405  raise ValueError('Invalid port for constructor: %s' % str(port))
406  addrtype = ipaddr.addrtype()
407  if addrtype == ADDR_FAMILY_IPV4:
408  ipaddr = ipaddr.toIPv6()
409  elif addrtype != ADDR_FAMILY_IPV6:
410  raise ValueError('Invalid network address type [%s] for constructor: %s'
411  % (addrtype, str(ipaddr)))
412  ipaddr.setport(0)
413  else:
414  raise ValueError('Invalid initial value for IPtcpportNode constructor: %s type(%s)'
415  % (str(ipaddr), type(ipaddr)))
416  self.ipaddr = unicode(str(ipaddr))
417  self.port = port
418  self.protocol = protocol
419  self.ipport = self.format_ipport()
420 
421  @staticmethod
423  'Return our key attributes in order of significance'
424  return ['ipport', 'domain']
425 
426  def format_ipport(self):
427  '''Format the ip and port into our key field
428  Note that we make the port the most significant part of the key - which
429  should allow some more interesting queries.
430  '''
431  return '%s_%s_%s' % (self.port, self.protocol, self.ipaddr)
432 
433 
434 @RegisterGraphClass
436  'A node representing a running process in a host'
437  # R0913: Too many arguments (9/7)
438  # pylint: disable=R0913
439  def __init__(self, domain, processname, host, pathname, argv, uid, gid, cwd, roles=None,
440  is_monitored=False):
441  GraphNode.__init__(self, domain=domain)
442  self.host = host
443  self.pathname = pathname
444  self.argv = argv
445  self.uid = uid
446  self.gid = gid
447  self.cwd = cwd
448  self.is_monitored = is_monitored
449  if roles is None:
450  self.roles = ['']
451  else:
452  self.roles = None
453  self.addrole(roles)
454  #self.processname='%s|%s|%s|%s:%s|%s' \
455  #% (path.basename(pathname), path.dirname(pathname), host, uid, gid, str(argv))
456  #procstring = '%s|%s|%s:%s|%s' \
457  #% (str(path.dirname(pathname)), str(host), str(uid), str(gid), str(argv))
458  #hashsum = hashlib.sha1()
459  # E1101: Instance of 'sha1' has no 'update' member (but it does!)
460  # pylint: disable=E1101
461  #hashsum.update(procstring)
462  #self.processname = '%s::%s' % (path.basename(pathname), hashsum.hexdigest())
463  self.processname = processname
464 
465 
466  def addrole(self, roles):
467  'Add a role to our ProcessNode'
468  self.roles = add_an_array_item(self.roles, roles)
469  # Make sure the Processnode 'roles' attribute gets marked as dirty...
470  Store.mark_dirty(self, 'roles')
471  return self.roles
472 
473  def delrole(self, roles):
474  'Delete a role from our ProcessNode'
475  self.roles = delete_an_array_item(self.roles, roles)
476  # Mark our Processnode 'roles' attribute dirty...
477  Store.mark_dirty(self, 'roles')
478  return self.roles
479 
480  @staticmethod
482  'Return our key attributes in order of significance'
483  return ['processname', 'domain']
484 
485 @RegisterGraphClass
487  '''A node representing a map object encoded as a JSON string
488  This has everything to do with performance in Neo4j.
489  They don't support maps, and they do a poor (*very* slow) job of supporting large strings.
490  The only way I know of to support our JSON-based maps in Neo4j is as large strings.
491  These used to be stored in the Drone nodes themselves, but that meant that every time
492  a Drone was transferred to the python code, it transferred *all* of its attributes,
493  which means transferring lots and lots of very slow and rarely needed string data.
494 
495  Although these are transmitted in UDP packets, they are compressed, and JSON compresses very
496  well, and in some cases extremely well. I've actually seen 3M of (unusually verbose)
497  JSON discovery data compress down to less than 40K of binary.
498  XML blobs are typically more compressible than the average JSON blob.
499  '''
500 
501  def __init__(self, json, jhash=None):
502  GraphNode.__init__(self, domain='metadata')
503  self._map = pyConfigContext(json)
504  self.json = str(self._map)
505  # We use sha224 to keep the length under 60 characters (56 to be specific)
506  # This is a performance consideration for the current (2.3) verison of Neo4j
507  if jhash is None:
508  jhash = self.strhash(self.json)
509  self.jhash = jhash
510 
511  @staticmethod
512  def strhash(string):
513  'Return our canonical hash value (< 60 chars long)'
514  return hashlib.sha224(string).hexdigest()
515 
516  def __str__(self):
517  'Convert to string - returning the JSON string itself'
518  return self.json
519 
520  def hash(self):
521  'Return the (sha224) hash of this JSON string'
522  return self.jhash
523 
524  def map(self):
525  'Return the map (pyConfigContext) that corresponds to our JSON string'
526  return self._map
527 
528  def keys(self):
529  'Return the keys that go with our map'
530  return self.map().keys()
531 
532  def get(self, key, alternative=None):
533  '''Return value if object contains the given *structured* key - 'alternative' if not.'''
534  return self.map().deepget(key, alternative)
535 
536  def deepget(self, key, alternative=None):
537  '''Return value if object contains the given *structured* key - 'alternative' if not.'''
538  return self.map().deepget(key, alternative)
539 
540  def __getitem__(self, key):
541  return self.map()[key]
542 
543  def __iter__(self):
544  'Iterate over self.keys() - giving the names of all our *top level* attributes.'
545  for key in self.keys():
546  yield key
547 
548  def __contains__(self, key):
549  return key in self.map()
550 
551  def __len__(self):
552  return len(self.map())
553 
554  @staticmethod
556  'Return our key attributes in order of significance'
557  return ['jhash']
558 
559 # pylint W0212: we need to get the value of the _id fields...
560 # pylint R0903: too few public methods. Not appropriate here...
561 # pylint: disable=W0212,R0903
562 class NeoRelationship(object):
563  '''Our encapsulation of a Neo4j Relationship - good for displaying them '''
564  def __init__(self, relationship):
565  '''Constructor for our Relationship proxy
566  Relationship should be a neo4j.Relationship
567  '''
568  self._relationship = relationship
569  self._id = relationship._id
570  self.type = relationship.type
571  self.start_node = relationship.start_node._id
572  self.end_node = relationship.end_node._id
573  self.properties = relationship.properties
574 
575 if __name__ == '__main__':
576  def maintest():
577  'test main program'
578  from cmainit import CMAinit
579  from droneinfo import Drone
580  from systemnode import SystemNode
581  print >> sys.stderr, 'Starting'
582  CMAinit(None, cleanoutdb=True, debug=True)
583  if CMAdb.store.transaction_pending:
584  print 'Transaction pending in:', CMAdb.store
585  print 'Results:', CMAdb.store.commit()
586  print ProcessNode.__meta_labels__()
587  print SystemNode.__meta_labels__()
588  print Drone.__meta_labels__()
589  print 'keys:', Drone.__meta_keyattrs__()
590  print >> sys.stderr, 'Init done'
591  return 0
592 
593  sys.exit(maintest())
def __init__(self, domain, time_create_ms=None, time_create_iso8601=None)
Definition: graphnodes.py:69
def initclasstypeobj(store, nodetype)
Definition: graphnodes.py:223
def __init__(self, rulesetname, basisrules=None)
Definition: graphnodes.py:298
def __init__(self, domain, ipaddr, port=None, protocol='tcp')
Definition: graphnodes.py:393
def __init__(self, domain, ipaddr, cidrmask='unknown')
Definition: graphnodes.py:358
def JSON(self, includemap=None, excludemap=None)
Definition: graphnodes.py:175
def get(self, attrstring, valueifnotfound=None)
Definition: graphnodes.py:133
def get(self, key, alternative=None)
Definition: graphnodes.py:532
def delrole(self, roles)
Definition: graphnodes.py:473
def add_an_array_item(currarray, itemtoadd)
Definition: graphnodes.py:246
def __init__(self, json, jhash=None)
Definition: graphnodes.py:501
def mac_to_oui(macaddr)
Definition: graphnodes.py:336
def delete_an_array_item(currarray, itemtodel)
Definition: graphnodes.py:261
def RegisterGraphClass(classtoregister)
Definition: graphnodes.py:43
def nodeconstructor(properties)
Definition: graphnodes.py:33
def __init__(self, bp_class, json, rulesetname)
Definition: graphnodes.py:279
def __contains__(self, key)
Definition: graphnodes.py:548
def __init__(self, domain, macaddr, ifname=None, json=None)
Definition: graphnodes.py:316
def __init__(self, relationship)
Definition: graphnodes.py:564
def addrole(self, roles)
Definition: graphnodes.py:466
def update_attributes(self, other)
Definition: graphnodes.py:105
def deepget(self, key, alternative=None)
Definition: graphnodes.py:536
def __init__(self, domain, processname, host, pathname, argv, uid, gid, cwd, roles=None, is_monitored=False)
Definition: graphnodes.py:440
def __getitem__(self, key)
Definition: graphnodes.py:540