Graph.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. """
  2. altgraph.Graph - Base Graph class
  3. =================================
  4. ..
  5. #--Version 2.1
  6. #--Bob Ippolito October, 2004
  7. #--Version 2.0
  8. #--Istvan Albert June, 2004
  9. #--Version 1.0
  10. #--Nathan Denny, May 27, 1999
  11. """
  12. from collections import deque
  13. from altgraph import GraphError
  14. class Graph(object):
  15. """
  16. The Graph class represents a directed graph with *N* nodes and *E* edges.
  17. Naming conventions:
  18. - the prefixes such as *out*, *inc* and *all* will refer to methods
  19. that operate on the outgoing, incoming or all edges of that node.
  20. For example: :py:meth:`inc_degree` will refer to the degree of the node
  21. computed over the incoming edges (the number of neighbours linking to
  22. the node).
  23. - the prefixes such as *forw* and *back* will refer to the
  24. orientation of the edges used in the method with respect to the node.
  25. For example: :py:meth:`forw_bfs` will start at the node then use the
  26. outgoing edges to traverse the graph (goes forward).
  27. """
  28. def __init__(self, edges=None):
  29. """
  30. Initialization
  31. """
  32. self.next_edge = 0
  33. self.nodes, self.edges = {}, {}
  34. self.hidden_edges, self.hidden_nodes = {}, {}
  35. if edges is not None:
  36. for item in edges:
  37. if len(item) == 2:
  38. head, tail = item
  39. self.add_edge(head, tail)
  40. elif len(item) == 3:
  41. head, tail, data = item
  42. self.add_edge(head, tail, data)
  43. else:
  44. raise GraphError("Cannot create edge from %s" % (item,))
  45. def __repr__(self):
  46. return "<Graph: %d nodes, %d edges>" % (
  47. self.number_of_nodes(),
  48. self.number_of_edges(),
  49. )
  50. def add_node(self, node, node_data=None):
  51. """
  52. Adds a new node to the graph. Arbitrary data can be attached to the
  53. node via the node_data parameter. Adding the same node twice will be
  54. silently ignored.
  55. The node must be a hashable value.
  56. """
  57. #
  58. # the nodes will contain tuples that will store incoming edges,
  59. # outgoing edges and data
  60. #
  61. # index 0 -> incoming edges
  62. # index 1 -> outgoing edges
  63. if node in self.hidden_nodes:
  64. # Node is present, but hidden
  65. return
  66. if node not in self.nodes:
  67. self.nodes[node] = ([], [], node_data)
  68. def add_edge(self, head_id, tail_id, edge_data=1, create_nodes=True):
  69. """
  70. Adds a directed edge going from head_id to tail_id.
  71. Arbitrary data can be attached to the edge via edge_data.
  72. It may create the nodes if adding edges between nonexisting ones.
  73. :param head_id: head node
  74. :param tail_id: tail node
  75. :param edge_data: (optional) data attached to the edge
  76. :param create_nodes: (optional) creates the head_id or tail_id
  77. node in case they did not exist
  78. """
  79. # shorcut
  80. edge = self.next_edge
  81. # add nodes if on automatic node creation
  82. if create_nodes:
  83. self.add_node(head_id)
  84. self.add_node(tail_id)
  85. # update the corresponding incoming and outgoing lists in the nodes
  86. # index 0 -> incoming edges
  87. # index 1 -> outgoing edges
  88. try:
  89. self.nodes[tail_id][0].append(edge)
  90. self.nodes[head_id][1].append(edge)
  91. except KeyError:
  92. raise GraphError("Invalid nodes %s -> %s" % (head_id, tail_id))
  93. # store edge information
  94. self.edges[edge] = (head_id, tail_id, edge_data)
  95. self.next_edge += 1
  96. def hide_edge(self, edge):
  97. """
  98. Hides an edge from the graph. The edge may be unhidden at some later
  99. time.
  100. """
  101. try:
  102. head_id, tail_id, edge_data = self.hidden_edges[edge] = self.edges[edge]
  103. self.nodes[tail_id][0].remove(edge)
  104. self.nodes[head_id][1].remove(edge)
  105. del self.edges[edge]
  106. except KeyError:
  107. raise GraphError("Invalid edge %s" % edge)
  108. def hide_node(self, node):
  109. """
  110. Hides a node from the graph. The incoming and outgoing edges of the
  111. node will also be hidden. The node may be unhidden at some later time.
  112. """
  113. try:
  114. all_edges = self.all_edges(node)
  115. self.hidden_nodes[node] = (self.nodes[node], all_edges)
  116. for edge in all_edges:
  117. self.hide_edge(edge)
  118. del self.nodes[node]
  119. except KeyError:
  120. raise GraphError("Invalid node %s" % node)
  121. def restore_node(self, node):
  122. """
  123. Restores a previously hidden node back into the graph and restores
  124. all of its incoming and outgoing edges.
  125. """
  126. try:
  127. self.nodes[node], all_edges = self.hidden_nodes[node]
  128. for edge in all_edges:
  129. self.restore_edge(edge)
  130. del self.hidden_nodes[node]
  131. except KeyError:
  132. raise GraphError("Invalid node %s" % node)
  133. def restore_edge(self, edge):
  134. """
  135. Restores a previously hidden edge back into the graph.
  136. """
  137. try:
  138. head_id, tail_id, data = self.hidden_edges[edge]
  139. self.nodes[tail_id][0].append(edge)
  140. self.nodes[head_id][1].append(edge)
  141. self.edges[edge] = head_id, tail_id, data
  142. del self.hidden_edges[edge]
  143. except KeyError:
  144. raise GraphError("Invalid edge %s" % edge)
  145. def restore_all_edges(self):
  146. """
  147. Restores all hidden edges.
  148. """
  149. for edge in list(self.hidden_edges.keys()):
  150. try:
  151. self.restore_edge(edge)
  152. except GraphError:
  153. pass
  154. def restore_all_nodes(self):
  155. """
  156. Restores all hidden nodes.
  157. """
  158. for node in list(self.hidden_nodes.keys()):
  159. self.restore_node(node)
  160. def __contains__(self, node):
  161. """
  162. Test whether a node is in the graph
  163. """
  164. return node in self.nodes
  165. def edge_by_id(self, edge):
  166. """
  167. Returns the edge that connects the head_id and tail_id nodes
  168. """
  169. try:
  170. head, tail, data = self.edges[edge]
  171. except KeyError:
  172. head, tail = None, None
  173. raise GraphError("Invalid edge %s" % edge)
  174. return (head, tail)
  175. def edge_by_node(self, head, tail):
  176. """
  177. Returns the edge that connects the head_id and tail_id nodes
  178. """
  179. for edge in self.out_edges(head):
  180. if self.tail(edge) == tail:
  181. return edge
  182. return None
  183. def number_of_nodes(self):
  184. """
  185. Returns the number of nodes
  186. """
  187. return len(self.nodes)
  188. def number_of_edges(self):
  189. """
  190. Returns the number of edges
  191. """
  192. return len(self.edges)
  193. def __iter__(self):
  194. """
  195. Iterates over all nodes in the graph
  196. """
  197. return iter(self.nodes)
  198. def node_list(self):
  199. """
  200. Return a list of the node ids for all visible nodes in the graph.
  201. """
  202. return list(self.nodes.keys())
  203. def edge_list(self):
  204. """
  205. Returns an iterator for all visible nodes in the graph.
  206. """
  207. return list(self.edges.keys())
  208. def number_of_hidden_edges(self):
  209. """
  210. Returns the number of hidden edges
  211. """
  212. return len(self.hidden_edges)
  213. def number_of_hidden_nodes(self):
  214. """
  215. Returns the number of hidden nodes
  216. """
  217. return len(self.hidden_nodes)
  218. def hidden_node_list(self):
  219. """
  220. Returns the list with the hidden nodes
  221. """
  222. return list(self.hidden_nodes.keys())
  223. def hidden_edge_list(self):
  224. """
  225. Returns a list with the hidden edges
  226. """
  227. return list(self.hidden_edges.keys())
  228. def describe_node(self, node):
  229. """
  230. return node, node data, outgoing edges, incoming edges for node
  231. """
  232. incoming, outgoing, data = self.nodes[node]
  233. return node, data, outgoing, incoming
  234. def describe_edge(self, edge):
  235. """
  236. return edge, edge data, head, tail for edge
  237. """
  238. head, tail, data = self.edges[edge]
  239. return edge, data, head, tail
  240. def node_data(self, node):
  241. """
  242. Returns the data associated with a node
  243. """
  244. return self.nodes[node][2]
  245. def edge_data(self, edge):
  246. """
  247. Returns the data associated with an edge
  248. """
  249. return self.edges[edge][2]
  250. def update_edge_data(self, edge, edge_data):
  251. """
  252. Replace the edge data for a specific edge
  253. """
  254. self.edges[edge] = self.edges[edge][0:2] + (edge_data,)
  255. def head(self, edge):
  256. """
  257. Returns the node of the head of the edge.
  258. """
  259. return self.edges[edge][0]
  260. def tail(self, edge):
  261. """
  262. Returns node of the tail of the edge.
  263. """
  264. return self.edges[edge][1]
  265. def out_nbrs(self, node):
  266. """
  267. List of nodes connected by outgoing edges
  268. """
  269. return [self.tail(n) for n in self.out_edges(node)]
  270. def inc_nbrs(self, node):
  271. """
  272. List of nodes connected by incoming edges
  273. """
  274. return [self.head(n) for n in self.inc_edges(node)]
  275. def all_nbrs(self, node):
  276. """
  277. List of nodes connected by incoming and outgoing edges
  278. """
  279. return list(dict.fromkeys(self.inc_nbrs(node) + self.out_nbrs(node)))
  280. def out_edges(self, node):
  281. """
  282. Returns a list of the outgoing edges
  283. """
  284. try:
  285. return list(self.nodes[node][1])
  286. except KeyError:
  287. raise GraphError("Invalid node %s" % node)
  288. def inc_edges(self, node):
  289. """
  290. Returns a list of the incoming edges
  291. """
  292. try:
  293. return list(self.nodes[node][0])
  294. except KeyError:
  295. raise GraphError("Invalid node %s" % node)
  296. def all_edges(self, node):
  297. """
  298. Returns a list of incoming and outging edges.
  299. """
  300. return set(self.inc_edges(node) + self.out_edges(node))
  301. def out_degree(self, node):
  302. """
  303. Returns the number of outgoing edges
  304. """
  305. return len(self.out_edges(node))
  306. def inc_degree(self, node):
  307. """
  308. Returns the number of incoming edges
  309. """
  310. return len(self.inc_edges(node))
  311. def all_degree(self, node):
  312. """
  313. The total degree of a node
  314. """
  315. return self.inc_degree(node) + self.out_degree(node)
  316. def _topo_sort(self, forward=True):
  317. """
  318. Topological sort.
  319. Returns a list of nodes where the successors (based on outgoing and
  320. incoming edges selected by the forward parameter) of any given node
  321. appear in the sequence after that node.
  322. """
  323. topo_list = []
  324. queue = deque()
  325. indeg = {}
  326. # select the operation that will be performed
  327. if forward:
  328. get_edges = self.out_edges
  329. get_degree = self.inc_degree
  330. get_next = self.tail
  331. else:
  332. get_edges = self.inc_edges
  333. get_degree = self.out_degree
  334. get_next = self.head
  335. for node in self.node_list():
  336. degree = get_degree(node)
  337. if degree:
  338. indeg[node] = degree
  339. else:
  340. queue.append(node)
  341. while queue:
  342. curr_node = queue.popleft()
  343. topo_list.append(curr_node)
  344. for edge in get_edges(curr_node):
  345. tail_id = get_next(edge)
  346. if tail_id in indeg:
  347. indeg[tail_id] -= 1
  348. if indeg[tail_id] == 0:
  349. queue.append(tail_id)
  350. if len(topo_list) == len(self.node_list()):
  351. valid = True
  352. else:
  353. # the graph has cycles, invalid topological sort
  354. valid = False
  355. return (valid, topo_list)
  356. def forw_topo_sort(self):
  357. """
  358. Topological sort.
  359. Returns a list of nodes where the successors (based on outgoing edges)
  360. of any given node appear in the sequence after that node.
  361. """
  362. return self._topo_sort(forward=True)
  363. def back_topo_sort(self):
  364. """
  365. Reverse topological sort.
  366. Returns a list of nodes where the successors (based on incoming edges)
  367. of any given node appear in the sequence after that node.
  368. """
  369. return self._topo_sort(forward=False)
  370. def _bfs_subgraph(self, start_id, forward=True):
  371. """
  372. Private method creates a subgraph in a bfs order.
  373. The forward parameter specifies whether it is a forward or backward
  374. traversal.
  375. """
  376. if forward:
  377. get_bfs = self.forw_bfs
  378. get_nbrs = self.out_nbrs
  379. else:
  380. get_bfs = self.back_bfs
  381. get_nbrs = self.inc_nbrs
  382. g = Graph()
  383. bfs_list = get_bfs(start_id)
  384. for node in bfs_list:
  385. g.add_node(node)
  386. for node in bfs_list:
  387. for nbr_id in get_nbrs(node):
  388. if forward:
  389. g.add_edge(node, nbr_id)
  390. else:
  391. g.add_edge(nbr_id, node)
  392. return g
  393. def forw_bfs_subgraph(self, start_id):
  394. """
  395. Creates and returns a subgraph consisting of the breadth first
  396. reachable nodes based on their outgoing edges.
  397. """
  398. return self._bfs_subgraph(start_id, forward=True)
  399. def back_bfs_subgraph(self, start_id):
  400. """
  401. Creates and returns a subgraph consisting of the breadth first
  402. reachable nodes based on the incoming edges.
  403. """
  404. return self._bfs_subgraph(start_id, forward=False)
  405. def iterdfs(self, start, end=None, forward=True):
  406. """
  407. Collecting nodes in some depth first traversal.
  408. The forward parameter specifies whether it is a forward or backward
  409. traversal.
  410. """
  411. visited, stack = {start}, deque([start])
  412. if forward:
  413. get_edges = self.out_edges
  414. get_next = self.tail
  415. else:
  416. get_edges = self.inc_edges
  417. get_next = self.head
  418. while stack:
  419. curr_node = stack.pop()
  420. yield curr_node
  421. if curr_node == end:
  422. break
  423. for edge in sorted(get_edges(curr_node)):
  424. tail = get_next(edge)
  425. if tail not in visited:
  426. visited.add(tail)
  427. stack.append(tail)
  428. def iterdata(self, start, end=None, forward=True, condition=None):
  429. """
  430. Perform a depth-first walk of the graph (as ``iterdfs``)
  431. and yield the item data of every node where condition matches. The
  432. condition callback is only called when node_data is not None.
  433. """
  434. visited, stack = {start}, deque([start])
  435. if forward:
  436. get_edges = self.out_edges
  437. get_next = self.tail
  438. else:
  439. get_edges = self.inc_edges
  440. get_next = self.head
  441. get_data = self.node_data
  442. while stack:
  443. curr_node = stack.pop()
  444. curr_data = get_data(curr_node)
  445. if curr_data is not None:
  446. if condition is not None and not condition(curr_data):
  447. continue
  448. yield curr_data
  449. if curr_node == end:
  450. break
  451. for edge in get_edges(curr_node):
  452. tail = get_next(edge)
  453. if tail not in visited:
  454. visited.add(tail)
  455. stack.append(tail)
  456. def _iterbfs(self, start, end=None, forward=True):
  457. """
  458. The forward parameter specifies whether it is a forward or backward
  459. traversal. Returns a list of tuples where the first value is the hop
  460. value the second value is the node id.
  461. """
  462. queue, visited = deque([(start, 0)]), {start}
  463. # the direction of the bfs depends on the edges that are sampled
  464. if forward:
  465. get_edges = self.out_edges
  466. get_next = self.tail
  467. else:
  468. get_edges = self.inc_edges
  469. get_next = self.head
  470. while queue:
  471. curr_node, curr_step = queue.popleft()
  472. yield (curr_node, curr_step)
  473. if curr_node == end:
  474. break
  475. for edge in get_edges(curr_node):
  476. tail = get_next(edge)
  477. if tail not in visited:
  478. visited.add(tail)
  479. queue.append((tail, curr_step + 1))
  480. def forw_bfs(self, start, end=None):
  481. """
  482. Returns a list of nodes in some forward BFS order.
  483. Starting from the start node the breadth first search proceeds along
  484. outgoing edges.
  485. """
  486. return [node for node, step in self._iterbfs(start, end, forward=True)]
  487. def back_bfs(self, start, end=None):
  488. """
  489. Returns a list of nodes in some backward BFS order.
  490. Starting from the start node the breadth first search proceeds along
  491. incoming edges.
  492. """
  493. return [node for node, _ in self._iterbfs(start, end, forward=False)]
  494. def forw_dfs(self, start, end=None):
  495. """
  496. Returns a list of nodes in some forward DFS order.
  497. Starting with the start node the depth first search proceeds along
  498. outgoing edges.
  499. """
  500. return list(self.iterdfs(start, end, forward=True))
  501. def back_dfs(self, start, end=None):
  502. """
  503. Returns a list of nodes in some backward DFS order.
  504. Starting from the start node the depth first search proceeds along
  505. incoming edges.
  506. """
  507. return list(self.iterdfs(start, end, forward=False))
  508. def connected(self):
  509. """
  510. Returns :py:data:`True` if the graph's every node can be reached from
  511. every other node.
  512. """
  513. node_list = self.node_list()
  514. for node in node_list:
  515. bfs_list = self.forw_bfs(node)
  516. if len(bfs_list) != len(node_list):
  517. return False
  518. return True
  519. def clust_coef(self, node):
  520. """
  521. Computes and returns the local clustering coefficient of node.
  522. The local cluster coefficient is proportion of the actual number of
  523. edges between neighbours of node and the maximum number of edges
  524. between those neighbours.
  525. See "Local Clustering Coefficient" on
  526. <http://en.wikipedia.org/wiki/Clustering_coefficient>
  527. for a formal definition.
  528. """
  529. num = 0
  530. nbr_set = set(self.out_nbrs(node))
  531. if node in nbr_set:
  532. nbr_set.remove(node) # loop defense
  533. for nbr in nbr_set:
  534. sec_set = set(self.out_nbrs(nbr))
  535. if nbr in sec_set:
  536. sec_set.remove(nbr) # loop defense
  537. num += len(nbr_set & sec_set)
  538. nbr_num = len(nbr_set)
  539. if nbr_num:
  540. clust_coef = float(num) / (nbr_num * (nbr_num - 1))
  541. else:
  542. clust_coef = 0.0
  543. return clust_coef
  544. def get_hops(self, start, end=None, forward=True):
  545. """
  546. Computes the hop distance to all nodes centered around a node.
  547. First order neighbours are at hop 1, their neigbours are at hop 2 etc.
  548. Uses :py:meth:`forw_bfs` or :py:meth:`back_bfs` depending on the value
  549. of the forward parameter. If the distance between all neighbouring
  550. nodes is 1 the hop number corresponds to the shortest distance between
  551. the nodes.
  552. :param start: the starting node
  553. :param end: ending node (optional). When not specified will search the
  554. whole graph.
  555. :param forward: directionality parameter (optional).
  556. If C{True} (default) it uses L{forw_bfs} otherwise L{back_bfs}.
  557. :return: returns a list of tuples where each tuple contains the
  558. node and the hop.
  559. Typical usage::
  560. >>> print (graph.get_hops(1, 8))
  561. >>> [(1, 0), (2, 1), (3, 1), (4, 2), (5, 3), (7, 4), (8, 5)]
  562. # node 1 is at 0 hops
  563. # node 2 is at 1 hop
  564. # ...
  565. # node 8 is at 5 hops
  566. """
  567. if forward:
  568. return list(self._iterbfs(start=start, end=end, forward=True))
  569. else:
  570. return list(self._iterbfs(start=start, end=end, forward=False))