# bfs_kbacon.py from collections import deque import networkx as nx from matplotlib import pyplot as plt import time class Graph(object): """A graph object, stored as an adjacency dictionary. Each node in the graph is a key in the dictionary. The value of each key is a list of the corresponding node's neighbors. Attributes: dictionary: the adjacency list of the graph. """ def __init__(self, adjacency): """Store the adjacency dictionary as a class attribute.""" self.dictionary = adjacency def __str__(self): """String representation: a sorted view of the adjacency dictionary. Example: >>> test = {'A':['B'], 'B':['A', 'C',], 'C':['B']} >>> print(Graph(test)) A: B B: A; C C: B """ to_print = "" for key in sorted(self.dictionary.keys()): to_print += str(key) + ": " + "; ".join(sorted(self.dictionary[key])) +"\n" return to_print def traverse(self, start): """Begin at 'start' and perform a breadth-first search until all nodes in the graph have been visited. Return a list of values, in the order that they were visited. Inputs: start: the node to start the search at. Returns: the list of visited nodes (in order of visitation). Raises: ValueError: if 'start' is not in the adjacency dictionary. Example: >>> test = {'A':['B'], 'B':['A', 'C',], 'C':['B']} >>> Graph(test).traverse('B') ['B', 'A', 'C'] """ if start not in self.dictionary.keys(): raise ValueError("Can't start there") current = start marked = set() visited = list() visit_queue = deque() while len(visited) < len(self.dictionary.keys()): visited.append(current) marked.add(current) for neighbors in self.dictionary[current]: if neighbors not in marked: visit_queue.append(neighbors) marked.add(neighbors) if len(visit_queue) != 0: current = visit_queue.popleft() return visited def shortest_path(self, start, target): """Begin at the node containing 'start' and perform a breadth-first search until the node containing 'target' is found. Return a list containg the shortest path from 'start' to 'target'. If either of the inputs are not in the adjacency graph, raise a ValueError. Inputs: start: the node to start the search at. target: the node to search for. Returns: A list of nodes along the shortest path from start to target, including the endpoints. Example: >>> test = {'A':['B', 'F'], 'B':['A', 'C'], 'C':['B', 'D'], ... 'D':['C', 'E'], 'E':['D', 'F'], 'F':['A', 'E', 'G'], ... 'G':['A', 'F']} >>> Graph(test).shortest_path('A', 'G') ['A', 'F', 'G'] """ if start not in self.dictionary.keys() or target not in self.dictionary.keys(): raise ValueError("Elements not in the list") current = start marked = set() visited = list() visit_queue = deque() go = True di = {} path = [] while go: if current == target: go = False go_second = True back = target while go_second: path.append(back) back = di[back] if back == start: go_second = False path.append(start) return path[::-1] visited.append(current) marked.add(current) for neighbors in self.dictionary[current]: if neighbors not in marked: visit_queue.append(neighbors) di[neighbors] = current marked.add(neighbors) current = visit_queue.popleft() def convert_to_networkx(dictionary): """Convert 'dictionary' to a networkX object and return it.""" nx_graph = nx.Graph() for node in dictionary.keys(): for edge in dictionary[node]: nx_graph.add_edge(node, edge) return nx_graph def parse(filename="movieData.txt"): """Generate an adjacency dictionary where each key is a movie and each value is a list of actors in the movie. """ # open the file, read it in, and split the text by '\n' with open(filename, 'r') as movieFile: moviesList = movieFile.read().split('\n') graph = dict() # for each movie in the file, for movie in moviesList: # get movie name and list of actors names = movie.split('/') title = names[0] graph[title] = [] # add the actors to the dictionary for actor in names[1:]: graph[title].append(actor) return graph class BaconSolver(object): """Class for solving the Kevin Bacon problem.""" def __init__(self, filename="movieData.txt"): """Initialize the networkX graph and with data from the specified file. Store the graph as a class attribute. Also store the collection of actors in the file as an attribute. """ self.dictionary = parse(filename) self.networkx = convert_to_networkx(self.dictionary) def path_to_bacon(self, start, target="Bacon, Kevin"): """Find the shortest path from 'start' to 'target'.""" if start not in self.networkx: raise ValueError("I'm sorry, the person you are looking for cannot be found") elif target not in self.networkx: raise ValueError("I'm sorry, the person you are looking for cannot be found") return nx.shortest_path(self.networkx, start, target) def bacon_number(self, start, target="Bacon, Kevin"): """Return the Bacon number of 'start'.""" li = self.path_to_bacon(start, target) bacon_numbers = 1 for i in range(0, len(li)): if i % 2 == 0: bacon_numbers += 1 return bacon_numbers - 2 def average_bacon(self, target="Bacon, Kevin"): """Calculate the average Bacon number in the data set. Note that actors are not guaranteed to be connected to the target. Inputs: target (str): the node to search the graph for """ total = 0 count = 0 not_connected = 0 actors = set() for actor in self.dictionary.values(): for a in actor: actors.add(a) for actor in actors: try: total += self.bacon_number(actor, target) count += 1 except: not_connected += 1 return total/float(count), not_connected # =========================== END OF FILE =============================== # if __name__ == "__main__": movie_graph = BaconSolver("movieData.txt") print movie_graph.average_bacon() # print movie_graph.bacon_number("Jackson, Samuel L.")