当前位置:网站首页>使用NetworkX对社交网络进行系统的分析:Facebook网络分析案例
使用NetworkX对社交网络进行系统的分析:Facebook网络分析案例
2022-06-27 00:33:00 【智源社区】

import pandas as pdimport numpy as npimport networkx as nximport matplotlib.pyplot as pltfrom random import randintfacebook = pd.read_csv( "data/facebook_combined.txt.gz", compression="gzip", sep=" ", names=["start_node", "end_node"],)
G = nx.from_pandas_edgelist(facebook, "start_node", "end_node")fig, ax = plt.subplots(figsize=(15, 9))ax.axis("off")plot_options = {"node_size": 10, "with_labels": False, "width": 0.15}nx.draw_networkx(G, pos=nx.random_layout(G), ax=ax, **plot_options)pos = nx.spring_layout(G, iterations=15, seed=1721)fig, ax = plt.subplots(figsize=(15, 9))ax.axis("off")nx.draw_networkx(G, pos=pos, ax=ax, **plot_options)
G.number_of_nodes()4039# 连边数量G.number_of_edges()88234np.mean([d for _, d in G.degree()])43.69101262688784shortest_path_lengths = dict(nx.all_pairs_shortest_path_length(G))# Length of shortest path between nodes 0 and 42shortest_path_lengths[0][42] 1diameter = max(nx.eccentricity(G, sp=shortest_path_lengths).values())diameter8# Compute the average shortest path length for each nodeaverage_path_lengths = [ np.mean(list(spl.values())) for spl in shortest_path_lengths.values()]# The average over all nodesnp.mean(average_path_lengths)3.691592636562027# We know the maximum shortest path length (the diameter), so create an array# to store values from 0 up to (and including) diameterpath_lengths = np.zeros(diameter + 1, dtype=int)# Extract the frequency of shortest path lengths between two nodesfor pls in shortest_path_lengths.values(): pl, cnts = np.unique(list(pls.values()), return_counts=True) path_lengths[pl] += cnts# Express frequency distribution as a percentage (ignoring path lengths of 0)freq_percent = 100 * path_lengths[1:] / path_lengths[1:].sum()# Plot the frequency distribution (ignoring path lengths of 0) as a percentagefig, ax = plt.subplots(figsize=(15, 8))ax.bar(np.arange(1, diameter + 1), height=freq_percent)ax.set_title( "Distribution of shortest path length in G", fontdict={"size": 35}, loc="center")ax.set_xlabel("Shortest Path Length", fontdict={"size": 22})ax.set_ylabel("Frequency (%)", fontdict={"size": 22})
nx.density(G)0.010819963503439287nx.number_connected_components(G)1degree_centrality = nx.centrality.degree_centrality(G) # save results in a variable to use again(sorted(degree_centrality.items(), key=lambda item: item[1], reverse=True))[:8][(107, 0.258791480931154), (1684, 0.1961367013372957), (1912, 0.18697374938088163), (3437, 0.13546310054482416), (0, 0.08593363051015354), (2543, 0.07280832095096582), (2347, 0.07206537890044576), (1888, 0.0629024269440317)](sorted(G.degree, key=lambda item: item[1], reverse=True))[:8][(107, 1045), (1684, 792), (1912, 755), (3437, 547), (0, 347), (2543, 294), (2347, 291), (1888, 254)]plt.figure(figsize=(15, 8))plt.hist(degree_centrality.values(), bins=25)plt.xticks(ticks=[0, 0.025, 0.05, 0.1, 0.15, 0.2]) # set the x axis ticksplt.title("Degree Centrality Histogram ", fontdict={"size": 35}, loc="center")plt.xlabel("Degree Centrality", fontdict={"size": 20})plt.ylabel("Counts", fontdict={"size": 20})
node_size = [ v * 1000 for v in degree_centrality.values()] # set up nodes size for a nice graph representationplt.figure(figsize=(15, 8))nx.draw_networkx(G, pos=pos, node_size=node_size, with_labels=False, width=0.15)plt.axis("off")
betweenness_centrality = nx.centrality.betweenness_centrality( G) # save results in a variable to use again(sorted(betweenness_centrality.items(), key=lambda item: item[1], reverse=True))[:8][(107, 0.4805180785560152), (1684, 0.3377974497301992), (3437, 0.23611535735892905), (1912, 0.2292953395868782), (1085, 0.14901509211665306), (0, 0.14630592147442917), (698, 0.11533045020560802), (567, 0.09631033121856215)]plt.figure(figsize=(15, 8))plt.hist(betweenness_centrality.values(), bins=100)plt.xticks(ticks=[0, 0.02, 0.1, 0.2, 0.3, 0.4, 0.5]) # set the x axis ticksplt.title("Betweenness Centrality Histogram ", fontdict={"size": 35}, loc="center")plt.xlabel("Betweenness Centrality", fontdict={"size": 20})plt.ylabel("Counts", fontdict={"size": 20})
node_size = [ v * 1200 for v in betweenness_centrality.values()] # set up nodes size for a nice graph representationplt.figure(figsize=(15, 8))nx.draw_networkx(G, pos=pos, node_size=node_size, with_labels=False, width=0.15)plt.axis("off")
closeness_centrality = nx.centrality.closeness_centrality( G) # save results in a variable to use again(sorted(closeness_centrality.items(), key=lambda item: item[1], reverse=True))[:8][(107, 0.45969945355191255), (58, 0.3974018305284913), (428, 0.3948371956585509), (563, 0.3939127889961955), (1684, 0.39360561458231796), (171, 0.37049270575282134), (348, 0.36991572004397216), (483, 0.3698479575013739)]# 此外,一个特定节点v到任何其他节点的平均距离也可以很容易地用公式求出:1 / closeness_centrality[107]2.1753343239227343plt.figure(figsize=(15, 8))plt.hist(closeness_centrality.values(), bins=60)plt.title("Closeness Centrality Histogram ", fontdict={"size": 35}, loc="center")plt.xlabel("Closeness Centrality", fontdict={"size": 20})plt.ylabel("Counts", fontdict={"size": 20})
node_size = [ v * 50 for v in closeness_centrality.values()] # set up nodes size for a nice graph representationplt.figure(figsize=(15, 8))nx.draw_networkx(G, pos=pos, node_size=node_size, with_labels=False, width=0.15)plt.axis("off")
以及特征向量中心性等中心性指标,用类似的方式即
可获取上述图表。
nx.average_clustering(G)0.6055467186200876plt.figure(figsize=(15, 8))plt.hist(nx.clustering(G).values(), bins=50)plt.title("Clustering Coefficient Histogram ", fontdict={"size": 35}, loc="center")plt.xlabel("Clustering Coefficient", fontdict={"size": 20})plzt.ylabel("Counts", fontdict={"size": 20})
nx.has_bridges(G)True# 输出桥的数量
bridges = list(nx.bridges(G))len(bridges)75plt.figure(figsize=(15, 8))nx.draw_networkx(G, pos=pos, node_size=10, with_labels=False, width=0.15)nx.draw_networkx_edges( G, pos, edgelist=local_bridges, width=0.5, edge_color="lawngreen") # green color for local bridgesnx.draw_networkx_edges( G, pos, edgelist=bridges, width=0.5, edge_color="r") # red color for bridgesplt.axis("off")nx.degree_assortativity_coefficient(G)0.06357722918564943nx.degree_pearson_correlation_coefficient(G) 0.06357722918564918
colors = ["" for x in range(G.number_of_nodes())] # initialize colors listcounter = 0for com in nx.community.label_propagation_communities(G): color = "#%06X" % randint(0, 0xFFFFFF) # creates random RGB color counter += 1 for node in list( com ): # fill colors list with the particular color for the community nodes colors[node] = colorcounter44plt.figure(figsize=(15, 9))plt.axis("off")nx.draw_networkx( G, pos=pos, node_size=10, with_labels=False, width=0.15, node_color=colors)
colors = ["" for x in range(G.number_of_nodes())]for com in nx.community.asyn_fluidc(G, 8, seed=0): color = "#%06X" % randint(0, 0xFFFFFF) # creates random RGB color for node in list(com): colors[node] = colorplt.figure(figsize=(15, 9))plt.axis("off")nx.draw_networkx( G, pos=pos, node_size=10, with_labels=False, width=0.15, node_color=colors)
[1]https://networkx.org/nx-guides/content/exploratory_notebooks/facebook_notebook.html#id2
[2]http://snap.stanford.edu/data/ego-Facebook.html
边栏推荐
- 2022年地理信息系统与遥感专业就业前景与升学高校排名选择
- 小白看MySQL--windows环境安装MySQL
- 2022年地理信息系统与遥感专业就业前景与升学高校排名选择
- 其他服务注册与发现
- One click acceleration of Sony camera SD card file copy operation, file operation batch processing tutorial
- 简单快速的数网络(网络中的网络套娃)
- memcached基础1
- Unable to create a folder to save the sketch: MKDIR sketch
- Pet hospital management system based on SSMP
- ArcGIS 镶嵌数据集切片丢失问题处理
猜你喜欢
![Count the logarithm of points that cannot reach each other in an undirected graph [classic adjacency table building +dfs Statistics - > query set optimization] [query set manual / write details]](/img/cc/a0be58eddc72c22a9a6ee5c61eb81a.png)
Count the logarithm of points that cannot reach each other in an undirected graph [classic adjacency table building +dfs Statistics - > query set optimization] [query set manual / write details]

BootstrapBlazor + FreeSql实战 Chart 图表使用(2)

LeetCode 142. Circular linked list II

IIS 部署静态网站和 FTP 服务

Hid device descriptor and keyboard key value corresponding coding table in USB protocol

30《MySQL 教程》MySQL 存储引擎概述

05 | 规范设计(下):commit 信息风格迥异、难以阅读,如何规范?

About Random Numbers

ESP32实验-自建web服务器配网02

Flutter series: flow in flutter
随机推荐
memcached基础4
How to convert an old keyboard into a USB keyboard and program it yourself?
温故知新--常温常新
Unable to create a folder to save the sketch: MKDIR sketch
Central Limit Theorem
基于SSMP的宠物医院管理系统
Implementation of ARP module in LwIP
建模规范:环境设置
xml学习笔记
The [MySQL] time field is set to the current time by default
解决u8glib只显示一行文字或者不显示的问题
flutter系列之:flutter中的flow
JSON解析,ESP32轻松获取时间气温和天气
Law of Large Numbers
大白话高并发(一)
30《MySQL 教程》MySQL 存储引擎概述
Modeling specifications: environment settings
Timing mechanism of LwIP
These 10 copywriting artifacts help you speed up the code. Are you still worried that you can't write a copywriting for US media?
What are the skills and methods for slip ring installation
