2026
Cai, Weibin; Zafarani, Reza
When Does Demographic Information Help? Data and Modeling Regimes for Perspective-Aware Hate Speech Detection Miscellaneous
2026.
@misc{cai2026doesdemographicinformationhelp,
title = {When Does Demographic Information Help? Data and Modeling Regimes for Perspective-Aware Hate Speech Detection},
author = {Weibin Cai and Reza Zafarani},
url = {https://arxiv.org/abs/2605.27313},
year = {2026},
date = {2026-01-01},
abstract = {Demographic information is often used to model annotator perspectives in subjective tasks such as hate speech detection, but its benefit is inconsistent: it improves performance in some settings and behaves as noise in others. This paper asks when demographic features help. We analyze demographic gain as a function of both data split properties and modeling frameworks. For data splits, we measure annotator disagreement, namely how often annotators assign different labels to the same example, along with training size and train-test demographic coverage. We find that demographic gains concentrate in regimes with low training disagreement, high test disagreement, fine-grained ambiguity measurement, sufficient training data, and greater demographic overlap. Motivated by these regimes, we introduce a gated demographic residual model that treats demographics as a selective adjustment to text-only predictions. Experiments on MHS and POPQUORN show that this design is effective, especially on high disagreement or low confidence examples. Overall, our results suggest that demographics should not be assumed useful by default; their value depends jointly on the data regime and the modeling framework.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Cai, Weibin; Zafarani, Reza
Spectral Analysis of Fake News Propagation Miscellaneous
2026.
@misc{cai2026spectralanalysisfakenews,
title = {Spectral Analysis of Fake News Propagation},
author = {Weibin Cai and Reza Zafarani},
url = {https://arxiv.org/abs/2605.13861},
year = {2026},
date = {2026-01-01},
abstract = {The propagation structure of fake news has been shown to be an important cue for detecting it; yet, existing propagation-based fake news detection methods have mainly relied on ad hoc topological features, and a unified view of cascade patterns is still lacking. To address this, we study news propagation from a spectral view by connecting graph spectra to propagation-related structural properties through rigorous spectral bounds. In particular, we introduce several new bounds and integrate them with existing ones into a unified spectral representation of information propagation. We then use these spectral bounds for downstream classification and design a discrete structural optimization framework to interpret learned propagation patterns. For efficient optimization, we rely on a first-order perturbation approximation and consider both score-guided and bound-guided objectives. Experiments on real-world data reveal meaningful spectral differences between fake and real news, competitive classification performance from spectral bounds, and interpretable evolution trajectories from structural optimization. The findings demonstrate the value of spectral analysis for understanding and modeling news propagation.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Abdolazimi, Reyhaneh; Jin, Shengmin; Varshney, Pramod K.; Zafarani, Reza
Harnessing the Power of Noise: A Survey of Techniques and Applications Journal Article
In: ACM Computing Surveys, 2026.
@article{abdolazimi2026harnessingnoise,
title = {Harnessing the Power of Noise: A Survey of Techniques and Applications},
author = {Reyhaneh Abdolazimi and Shengmin Jin and Pramod K. Varshney and Reza Zafarani},
url = {https://doi.org/10.1145/3811813},
year = {2026},
date = {2026-01-01},
journal = {ACM Computing Surveys},
abstract = {Noise, traditionally considered a nuisance in computational systems, is reconsidered for its unexpected and counter-intuitive benefits across a wide spectrum of domains, including nonlinear information processing, signal processing, image processing, machine learning, network science, and natural language processing. Through a comprehensive review of both historical and contemporary research, this survey presents a dual perspective on noise, acknowledging its potential to both disrupt and enhance performance. Particularly, we highlight how noise-enhanced training strategies can lead to models that better generalize from noisy data, positioning noise not just as a challenge to overcome but as a strategic tool for improvement. This work calls for a shift in how we perceive noise, proposing that it can be a spark for innovation and advancement in the information era.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2025
Cao, Zhaoyang; Nguyen, John; Zafarani, Reza
Is Less Really More? Fake News Detection with Limited Information Journal Article
In: ACM SIGKDD Explorations Newsletter, vol. 27, no. 1, pp. 20–31, 2025.
@article{cao2025isless,
title = {Is Less Really More? Fake News Detection with Limited Information},
author = {Zhaoyang Cao and John Nguyen and Reza Zafarani},
url = {https://doi.org/10.1145/3748239.3748243},
year = {2025},
date = {2025-01-01},
journal = {ACM SIGKDD Explorations Newsletter},
volume = {27},
number = {1},
pages = {20–31},
abstract = {The threat that online fake news and misinformation pose to democracy, justice, public confidence, and especially to vulnerable populations, has led to a sharp increase in the need for fake news detection and intervention. Whether multi-modal or pure text-based, most fake news detection methods depend on textual analysis of entire articles. However, these methods face challenges including large training data requirements, sensitivity to topic changes, and the difficulty of encoding lengthy articles. This paper investigates whether effective fake news detection is feasible with limited textual information, focusing only on article source domains rather than full content.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Tian, Hao; Jin, Shengmin; Zafarani, Reza
Representing Higher-Order Networks with Spectral Moments Proceedings Article
In: Proceedings of the 29th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), Sydney, Australia, 2025.
@inproceedings{tian2025representinghigherorder,
title = {Representing Higher-Order Networks with Spectral Moments},
author = {Hao Tian and Shengmin Jin and Reza Zafarani},
year = {2025},
date = {2025-01-01},
booktitle = {Proceedings of the 29th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)},
address = {Sydney, Australia},
abstract = {The spectral properties of traditional (dyadic) graphs, where an edge connects exactly two vertices, are widely utilized in different applications. These spectral properties are closely connected to the structural properties of dyadic graphs. We generalize such connections and characterize higher-order networks by their spectral information. We first split the higher-order graphs by their orders into several uniform hypergraphs. For each uniform hypergraph, we extract the corresponding spectral information from the transition matrices of carefully designed random walks. From each spectrum, we compute the first few spectral moments and use all such spectral moments across different orders as the higher-order graph representation. We will show that these moments not only clearly indicate the return probabilities of random walks but are also closely related to various higher-order network properties such as degree distribution and clustering coefficient. Extensive experiments show the utility of this new representation in various settings.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Cai, Weibin; Li, Jiayu; Zafarani, Reza
Unpacking Hateful Memes: Presupposed Context and False Claims Miscellaneous
2025.
@misc{cai2025unpackinghateful,
title = {Unpacking Hateful Memes: Presupposed Context and False Claims},
author = {Weibin Cai and Jiayu Li and Reza Zafarani},
url = {https://arxiv.org/abs/2510.09935},
year = {2025},
date = {2025-01-01},
abstract = {While memes are often humorous, they are frequently used to disseminate hate, causing serious harm to individuals and society. Current approaches to hateful meme detection mainly rely on pre-trained language models. However, less focus has been dedicated to what makes a meme hateful. Drawing on insights from philosophy and psychology, we argue that hateful memes are characterized by two essential features: a presupposed context and the expression of false claims. To capture presupposed context, we develop PCM for modeling contextual information across modalities. To detect false claims, we introduce the FACT module, which integrates external knowledge and harnesses cross-modal reference graphs.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Cai, Weibin; Zafarani, Reza
Seeing Hate Differently: Hate Subspace Modeling for Culture-Aware Hate Speech Detection Miscellaneous
2025.
@misc{cai2025seeinghate,
title = {Seeing Hate Differently: Hate Subspace Modeling for Culture-Aware Hate Speech Detection},
author = {Weibin Cai and Reza Zafarani},
url = {https://arxiv.org/abs/2510.13837},
year = {2025},
date = {2025-01-01},
abstract = {Hate speech detection faces significant challenges when applied across cultural contexts, as expressions considered hateful in one culture may be acceptable in another. In this work, we propose a culture-aware framework for hate speech detection that models distinct "hate subspaces" reflecting how different cultural groups perceive and express hate. Our approach moves beyond one-size-fits-all classification and captures the nuanced, culturally dependent nature of harmful language online.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Cao, Zhaoyang; Schooler, Lael; Zafarani, Reza
Analyzing Memory Effects in Large Language Models through the Lens of Cognitive Psychology Miscellaneous
2025.
@misc{cao2025analyzingmemory,
title = {Analyzing Memory Effects in Large Language Models through the Lens of Cognitive Psychology},
author = {Zhaoyang Cao and Lael Schooler and Reza Zafarani},
url = {https://arxiv.org/abs/2509.17138},
year = {2025},
date = {2025-01-01},
abstract = {Large Language Models (LLMs) exhibit memory-like behaviors that resemble aspects of human cognition. In this work, we examine such behaviors through the lens of cognitive psychology, drawing parallels between LLM outputs and classical findings on human memory including primacy/recency effects, interference, and forgetting curves. By bridging machine learning and cognitive science, we offer a framework for interpreting LLM behavior and discuss implications for evaluation, reliability, and the design of more human-aligned AI systems.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
2024
Nanabala, Chiradeep; Mohan, Chilukuri K.; Zafarani, Reza
Unmasking AI-Generated Fake News Across Multiple Domains Miscellaneous
Preprints.org, 2024, (May 2024).
@misc{nanabala2024unmasking,
title = {Unmasking AI-Generated Fake News Across Multiple Domains},
author = {Chiradeep Nanabala and Chilukuri K. Mohan and Reza Zafarani},
url = {https://doi.org/10.20944/preprints202405.0686.v1},
year = {2024},
date = {2024-01-01},
abstract = {The rapid rise of large language models has enabled the automated generation of fake news at scale, with content that is increasingly difficult to distinguish from human-written text. In this work, we study AI-generated fake news across multiple domains and develop techniques to detect and characterize such content. We analyze the linguistic and stylistic signatures of machine-generated misinformation, evaluate the performance of detection methods under domain shift, and discuss the implications for combating automated disinformation campaigns.},
howpublished = {Preprints.org},
note = {May 2024},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Tian, Hao; Zafarani, Reza
Higher-Order Networks Representation and Learning: A Survey Journal Article
In: ACM SIGKDD Explorations Newsletter, vol. 26, no. 1, pp. 1–18, 2024.
@article{hao2024higherorder,
title = {Higher-Order Networks Representation and Learning: A Survey},
author = {Hao Tian and Reza Zafarani},
url = {https://doi.org/10.1145/3682112.3682114},
year = {2024},
date = {2024-01-01},
journal = {ACM SIGKDD Explorations Newsletter},
volume = {26},
number = {1},
pages = {1–18},
abstract = {Network data has become widespread, larger, and more complex over the years. Traditional network data is dyadic, capturing the relations among pairs of entities. With the need to model interactions among more than two entities, significant research has focused on higher-order networks and ways to represent, analyze, and learn from them. There are two main directions to studying higher-order networks. One direction has focused on capturing higher-order patterns in traditional (dyadic) graphs by changing the basic unit of study from nodes to small frequently observed subgraphs, called motifs. As most existing network data comes in the form of pairwise dyadic relationships, studying higher-order structures within such graphs may uncover new insights. The second direction aims to directly model higher-order interactions using new and more complex representations such as simplicial complexes or hypergraphs. Some of these models have long been proposed, but improvements in computational power and the advent of new computational techniques have increased their popularity. Our goal in this paper is to provide a succinct yet comprehensive summary of the advanced higher-order network analysis techniques. We provide a systematic review of its foundations and algorithms, along with use cases and applications of higher-order networks in various scientific domains.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2023
Li, Jiayu; Zhang, Tianyun; Jin, Shengmin; Zafarani, Reza
Semi-Supervised Graph Ultra-Sparsifier Using Reweighted <img src="https://latex.codecogs.com/svg.image?ell_1"> Optimization Proceedings Article
In: Proceedings of the 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2023.
@inproceedings{jiayu2023semisupervised,
title = {Semi-Supervised Graph Ultra-Sparsifier Using Reweighted <img src="https://latex.codecogs.com/svg.image?ell_1"> Optimization},
author = {Jiayu Li and Tianyun Zhang and Shengmin Jin and Reza Zafarani},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
abstract = {Graph representation learning with the family of graph convolution networks (GCN) provides powerful tools for prediction on graphs. As graphs grow with more edges, the GCN family suffers from sub-optimal generalization performance due to task-irrelevant connections. Recent studies solve this problem by using graph sparsification in neural networks. However, graph sparsification cannot generate ultra-sparse graphs while simultaneously maintaining the performance of the GCN family. To address this problem, we propose Graph Ultra-sparsifier, a semi-supervised graph sparsifier with dynamically-updated regularization terms based on the graph convolution. The graph ultra-sparsifier can generate ultra-sparse graphs while maintaining the performance of the GCN family with the ultra-sparse graphs as inputs. In the experiments, when compared to the state-of-the-art graph sparsifiers, our graph ultra-sparsifier generates ultra-sparse graphs and these ultra-sparse graphs can be used as inputs to maintain the performance of GCN and its variants in node classification tasks.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Abdolazimi, Reyhaneh; Zafarani, Reza
The Advantages of Adding Noise Proceedings Article
In: Companion Proceedings of The Web Conference 2023 (WWW), 2023.
@inproceedings{reyhaneh2023advantages,
title = {The Advantages of Adding Noise},
author = {Reyhaneh Abdolazimi and Reza Zafarani},
year = {2023},
date = {2023-01-01},
booktitle = {Companion Proceedings of The Web Conference 2023 (WWW)},
abstract = {Noise is often seen as unwanted signal while it has been shown beneficial in many information processing systems and algorithms. Noise enhancement has been utilized in many biological and physical systems, machine learning methods, and deep learning techniques in order to improve efficiency and performance. This tutorial presents (1) the different types of noise; (2) noise applications; (3) noise-enhanced processing systems; (4) noise-enhanced learning methods; and (5) noise injection methods in network science.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Li, Jiayu; Li, Qinzhou; Zafarani, Reza
Linguistic-style-aware Neural Networks for Fake News Detection Miscellaneous
2023.
@misc{xinyi2023linguisticstyleaware,
title = {Linguistic-style-aware Neural Networks for Fake News Detection},
author = {Xinyi Zhou and Jiayu Li and Qinzhou Li and Reza Zafarani},
year = {2023},
date = {2023-01-01},
abstract = {We propose the hierarchical recursive neural network (HERO) to predict fake news by learning its linguistic style, which is distinguishable from the truth, as psychological theories reveal. We first generate the hierarchical linguistic tree of news documents; by doing so, we translate each news document's linguistic style into its writer's usage of words and how these words are recursively structured as phrases, sentences, paragraphs, and, ultimately, the document. By integrating the hierarchical linguistic tree with the neural network, the proposed method learns and classifies the representation of news documents by capturing their locally sequential and globally recursive structures that are linguistically meaningful. It is the first work offering the hierarchical linguistic tree and the neural network preserving the tree information to our best knowledge. Experimental results based on public real-world datasets demonstrate the proposed method's effectiveness, which can outperform state-of-the-art techniques in classifying short and long news documents. We also examine the differential linguistic style of fake news and the truth and observe some patterns of fake news. The code and data have been publicly available.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
2022
Jin, Shengmin; Phoha, Vir V.; Zafarani, Reza
Graph-Based Identification and Authentication: A Stochastic Kronecker Approach Journal Article
In: IEEE Transactions on Knowledge and Data Engineering, vol. 34, no. 7, pp. 3282–3294, 2022.
@article{shengmin2022graphbased,
title = {Graph-Based Identification and Authentication: A Stochastic Kronecker Approach},
author = {Shengmin Jin and Vir V. Phoha and Reza Zafarani},
url = {https://doi.org/10.1109/TKDE.2020.3025989},
year = {2022},
date = {2022-01-01},
journal = {IEEE Transactions on Knowledge and Data Engineering},
volume = {34},
number = {7},
pages = {3282–3294},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jin, Shengmin; Tian, Hao; Li, Jiayu; Zafarani, Reza
A Spectral Representation of Networks: The Path of Subgraphs Proceedings Article
In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD), 2022.
@inproceedings{shengmin2022spectral,
title = {A Spectral Representation of Networks: The Path of Subgraphs},
author = {Shengmin Jin and Hao Tian and Jiayu Li and Reza Zafarani},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)},
abstract = {Network representation learning has played a critical role in studying networks. One way to study a graph is to focus on its spectrum, i.e., the eigenvalue distribution of its associated matrices. Recent advancements in spectral graph theory show that spectral moments of a network can be used to capture the network structure and various graph properties. However, sometimes networks with different structures or sizes can have the same or similar spectral moments, not to mention the existence of the cospectral graphs. To address such problems, we propose a 3D network representation that relies on the spectral information of subgraphs: the Spectral Path, a path connecting the spectral moments of the network and those of its subgraphs of different sizes. We show that the spectral path is interpretable and can capture relationship between a network and its subgraphs, for which we present a theoretical foundation. We demonstrate the effectiveness of the spectral path in applications such as network visualization and network identification.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jin, Shengmin; Ma, Rui; Li, Jiayu; Eftekharnejad, Sara; Zafarani, Reza
A Spectral Measure for Network Robustness: Assessment, Design, and Evolution Proceedings Article
In: Proceedings of the IEEE International Conference on Knowledge Graph (ICKG), 2022.
@inproceedings{shengmin2022spectral2,
title = {A Spectral Measure for Network Robustness: Assessment, Design, and Evolution},
author = {Shengmin Jin and Rui Ma and Jiayu Li and Sara Eftekharnejad and Reza Zafarani},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the IEEE International Conference on Knowledge Graph (ICKG)},
abstract = {A robust system should perform well under random failures or targeted attacks, and networks have been widely used to model the underlying structure of complex systems such as communication, infrastructure, and transportation networks. Hence, network robustness becomes critical to understanding system robustness. In this paper, we propose a spectral measure for network robustness: the second spectral moment m2 of the network. Our results show that a smaller second spectral moment m2 indicates a more robust network. We demonstrate both theoretically and with extensive empirical studies that the second spectral moment can help (1) capture various traditional measures of network robustness; (2) assess the robustness of networks; (3) design networks with controlled robustness; and (4) study how complex networked systems (e.g., power systems) behave under cascading failures.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Jiayu; Zhang, Tianyun; Jin, Shengmin; Fardad, Makan; Zafarani, Reza
AdverSparse: An Adversarial Attack Framework for Deep Spatial-Temporal Graph Neural Networks Proceedings Article
In: Proceedings of the 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2022.
@inproceedings{jiayu2022adversparse,
title = {AdverSparse: An Adversarial Attack Framework for Deep Spatial-Temporal Graph Neural Networks},
author = {Jiayu Li and Tianyun Zhang and Shengmin Jin and Makan Fardad and Reza Zafarani},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
abstract = {Spatial-temporal graph have been widely observed in various domains such as neuroscience, climate research, and transportation engineering. The state-of-the-art models of spatial-temporal graphs rely on Graph Neural Networks (GNNs) to obtain explicit representations for such networks and to discover hidden spatial dependencies in them, leading to superior performance in various tasks. In this paper, we propose a sparse adversarial attack framework AdverSparse to illustrate that when only a few key connections are removed in such graphs, hidden spatial dependencies learned by such spatial-temporal models are significantly impacted, leading to various issues such as increasing prediction errors. We formulate the adversarial attack on such models as an optimization problem and solve it by the Alternating Direction Method of Multipliers (ADMM). Experiments show that AdverSparse can find and remove key connections in these graphs, leading to malfunctioning models, even in models capable of learning hidden spatial dependencies.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Zafarani, Reza; Ferrara, Emilio
From Fake News to #FakeNews: Mining Direct and Indirect Relationships among Hashtags for Fake News Detection Miscellaneous
2022.
@misc{xinyi2022from,
title = {From Fake News to #FakeNews: Mining Direct and Indirect Relationships among Hashtags for Fake News Detection},
author = {Xinyi Zhou and Reza Zafarani and Emilio Ferrara},
year = {2022},
date = {2022-01-01},
abstract = {The COVID-19 pandemic has gained worldwide attention and allowed fake news, such as ``COVID-19 is the flu,'' to spread quickly and widely on social media. Combating this coronavirus infodemic demands effective methods to detect fake news. To this end, we propose a method to infer news credibility from hashtags involved in news dissemination on social media, motivated by the tight connection between hashtags and news credibility observed in our empirical analyses. We first introduce a new graph that captures all (direct and textitindirect) relationships among hashtags. Then, a language-independent semi-supervised algorithm is developed to predict fake news based on this constructed graph. This study first investigates the indirect relationship among hashtags; the proposed approach can be extended to any homogeneous graph to capture a comprehensive relationship among nodes. Language independence opens the proposed method to multilingual fake news detection. Experiments conducted on two real-world datasets demonstrate the effectiveness of our approach in identifying fake news, especially at an textitearly stage of propagation.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Zhou, Xinyi; Shu, Kai; Phoha, Vir V.; Liu, Huan; Zafarani, Reza
"This is Fake! Shared it by Mistake": Assessing the Intent of Fake News Spreaders Proceedings Article
In: Proceedings of The Web Conference 2022 (WWW), 2022.
@inproceedings{xinyi2022this,
title = {"This is Fake! Shared it by Mistake": Assessing the Intent of Fake News Spreaders},
author = {Xinyi Zhou and Kai Shu and Vir V. Phoha and Huan Liu and Reza Zafarani},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of The Web Conference 2022 (WWW)},
abstract = {Individuals can be misled by fake news and spread it unintentionally without knowing that it is false. This phenomenon has been frequently observed but has not been investigated. Our aim in this work is to assess the intent of fake news spreaders. To distinguish between intentional versus unintentional spreading, we study the psychological interpretations behind unintentional spreading. With this foundation, we then propose an influence graph, using which we assess the intent of fake news spreaders. Our extensive experiments show that the assessed intent can help significantly differentiate between intentional and unintentional fake news spreaders. Furthermore, the estimated intent can significantly improve the current techniques that detect fake news. To our best knowledge, this is the first work to model individuals' intent in fake news spreading.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jin, Shengmin; Koutra, Danai; Zafarani, Reza
Interpretable Network Representations Proceedings Article
In: Companion Proceedings of The Web Conference 2022 (WWW), 2022.
@inproceedings{shengmin2022interpretable,
title = {Interpretable Network Representations},
author = {Shengmin Jin and Danai Koutra and Reza Zafarani},
url = {https://shengminjin.github.io/tutorials/www2022},
year = {2022},
date = {2022-01-01},
booktitle = {Companion Proceedings of The Web Conference 2022 (WWW)},
abstract = {Networks (or interchangeably graphs) have been ubiquitous across the globe and within science and engineering: social networks, collaboration networks, protein-protein interaction networks, infrastructure networks, among many others. Machine learning on graphs, especially network representation learning, has shown remarkable performance in tasks related to graphs, such as node/graph classification, graph clustering, and link prediction. These tasks are closely related to the Web applications, especially social network analysis and recommendation systems. For example, node classification and graph clustering are widely used for studies on community detection, and link prediction plays a vital role in friend or item recommendation. Like performance, it is equally crucial for individuals to understand the behavior of machine learning models and be able to explain how these models arrive at a certain decision. Such needs have motivated many studies on interpretability in machine learning. Specifically, for social network analysis, we may need to know the reasons why certain users (or groups) are classified or clustered together by the machine learning models, or why a friend recommendation system considers some users similar so that they are recommended to connect with each other. Under such circumstances, an interpretable network representation is necessary and it should carry the graph information to a level understandable by humans. In this tutorial, we will (1) define interpretability and go over its definitions within different contexts in studies of networks; (2) review and summarize various interpretable network representations; (3) discuss connections to network embedding, graph summarization, and network visualization methods; (4) discuss explainability in Graph Neural Networks, as such techniques are often perceived to have limited interpretability; and (5) highlight the open research problems and future research directions. The tutorial is designed for researchers, graduate students, and practitioners in areas such as graph mining, machine learning on graphs, and machine learning interpretability. Few prerequisites are required for The Web Conferenc participants to attend.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tian, Hao; Jin, Shengmin; Zafarani, Reza
Exploiting Cross-Order Patterns and Link Prediction in Higher-Order Networks Proceedings Article
In: Proceedings of the IEEE International Conference on Data Mining Workshops (ICDMW), 2022.
@inproceedings{hao2022exploiting,
title = {Exploiting Cross-Order Patterns and Link Prediction in Higher-Order Networks},
author = {Hao Tian and Shengmin Jin and Reza Zafarani},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the IEEE International Conference on Data Mining Workshops (ICDMW)},
abstract = {With the demand to model the relationships among three or more entities, higher-order networks are now more widespread across various domains. Relationships such as multiauthor collaborations, co-appearance of keywords, and copurchases can be naturally modeled as higher-order networks. However, due to (1) computational complexity and (2) insufficient higher-order data, exploring higher-order networks is often limited to order-3 motifs (or triangles). To address these problems, we explore and quantify similarites among various network orders. Our goal is to build relationships between different network orders and to solve higher-order problems using lowerorder information. Similarities between different orders are not comparable directly. Hence, we introduce a set of general crossorder similarities, and a measure: subedge rate. Our experiments on multiple real-world datasets demonstrate that most higherorder networks have considerable consistency as we move from higher-orders to lower-orders. Utilizing this discovery, we develop a new cross-order framework for higher-order link prediction method. These methods can predict higher-order links from lower-order edges, which cannot be attained by current higherorder methods that rely on data from a single order.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Abdolazimi, Reyhaneh; Zafarani, Reza
Noise Enhancement: Techniques and Applications Proceedings Article
In: Proceedings of the 2022 SIAM International Conference on Data Mining (SDM), 2022.
@inproceedings{reyhaneh2022noise,
title = {Noise Enhancement: Techniques and Applications},
author = {Reyhaneh Abdolazimi and Reza Zafarani},
url = {https://reyhanehabdolazimi.com/tutorials/SDM2022},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 2022 SIAM International Conference on Data Mining (SDM)},
abstract = {Noise is often seen as unwanted signal while it has been shown beneficial in many information processing systems and algorithms. Noise enhancement has been utilized in many biological and physical systems, machine learning methods, and deep learning techniques in order to improve efficiency and performance. This tutorial presents (1) the different types of noise; (2) noise applications; (3) noise-enhanced processing systems; (4) noise-enhanced learning methods; and (5) noise injection methods in network science.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Abdolazimi, Reyhaneh; Zafarani, Reza
Noise-Enhanced Unsupervised Link Prediction Proceedings Article
In: Proceedings of the 25th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), 2021.
@inproceedings{reyhaneh2021noiseenhanced,
title = {Noise-Enhanced Unsupervised Link Prediction},
author = {Reyhaneh Abdolazimi and Reza Zafarani},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the 25th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)},
abstract = {Link prediction has attracted attention from multiple research areas. Although several — mostly unsupervised — link prediction methods have been proposed, improving them is still under study. In several fields of science, noise is used as an advantage to improve information processing, inspiring us to also investigate noise enhancement in link prediction. In this research, we study link prediction from a data preprocessing point of view by introducing a noise-enhanced link prediction framework that improves the links predicted by current link prediction heuristics. The framework proposes three noise methods to help predict better links. Theoretical explanation and extensive experiments on synthetic and real-world datasets show that our framework helps improve current link prediction methods.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Li, Jiayu; Zhang, Tianyun; Tian, Hao; Jin, Shengmin; Fardad, Makan; Zafarani, Reza
Graph Sparsification with Graph Convolutional Networks Journal Article
In: International Journal of Data Science and Analytics, 2021.
@article{jiayu2021graph,
title = {Graph Sparsification with Graph Convolutional Networks},
author = {Jiayu Li and Tianyun Zhang and Hao Tian and Shengmin Jin and Makan Fardad and Reza Zafarani},
year = {2021},
date = {2021-01-01},
journal = {International Journal of Data Science and Analytics},
abstract = {Graphs are ubiquitous across the globe and within science and engineering. Some powerful classifiers are proposed to classify nodes in graphs, such as Graph Convolutional Networks (GCNs). However, as graphs are growing in size, node classification on large graphs can be space and time consuming due to using whole graphs. Hence, some questions are raised, particularly, whether one can prune some of the edges of a graph while maintaining prediction performance for node classification, or train classifiers on specific subgraphs instead of a whole graph with limited performance loss in node classification. To address these questions, we propose Sparsified Graph Convolutional Network (SGCN), a neural network graph sparsifier that sparsifies a graph by pruning some edges. We formulate sparsification as an optimization problem and solve it by an Alternating Direction Method of Multipliers (ADMM). The experiment illustrates that SGCN can identify highly effective subgraphs for node classification in GCN compared to other sparsifiers such as Random Pruning, Spectral Sparsifier and DropEdge. We also show that sparsified graphs provided by SGCN can be inputs to GCN, which leads to better or comparable node classification performance with that of original graphs in GCN, DeepWalk, GraphSAGE, and GAT. We provide insights on why SGCN performs well by analyzing its performance from the view of a low-pass filter.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Yang, Chen; Zhou, Xinyi; Zafarani, Reza
CHECKED: Chinese COVID-19 Fake News Dataset Journal Article
In: Social Network Analysis and Mining, 2021.
@article{chen2021checked,
title = {CHECKED: Chinese COVID-19 Fake News Dataset},
author = {Chen Yang and Xinyi Zhou and Reza Zafarani},
year = {2021},
date = {2021-01-01},
journal = {Social Network Analysis and Mining},
abstract = {COVID-19 has impacted all lives. To maintain social distancing and avoiding exposure, works and lives have gradually moved online. Under this trend, social media usage to obtain COVID-19 news has increased. Alas, misinformation on COVID-19 is frequently spread on social media. In this work, we develop CHECKED, the first Chinese dataset on COVID-19 misinformation. CHECKED provides ground-truth on credibility, carefully obtained by ensuring the specific sources are used. CHECKED includes microblogs related to COVID-19, identified by using a specific list of keywords, covering a total 2120 microblogs published from December 2019 to August 2020. The dataset contains a rich set of multimedia information for each microblog including ground-truth label, textual, visual, response, and social network information. We hope that CHECKED can facilitate studies that target misinformation on coronavirus. The dataset is available at this https URL with measures of protecting users' privacy.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2020
Zhou, Xinyi; Zafarani, Reza
A Survey of Fake News: Fundamental Theories, Detection Methods, and Opportunities Journal Article
In: ACM Computing Surveys, vol. 53, no. 5, 2020.
@article{xinyi2020survey,
title = {A Survey of Fake News: Fundamental Theories, Detection Methods, and Opportunities},
author = {Xinyi Zhou and Reza Zafarani},
year = {2020},
date = {2020-09-01},
journal = {ACM Computing Surveys},
volume = {53},
number = {5},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jin, Shengmin; Zafarani, Reza
The Spectral Zoo of Networks: Embedding and Visualizing Networks with Spectral Moments Proceedings Article
In: Proceedings of the 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD), 2020.
@inproceedings{shengmin2020spectral,
title = {The Spectral Zoo of Networks: Embedding and Visualizing Networks with Spectral Moments},
author = {Shengmin Jin and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)},
abstract = {Network embedding techniques have been widely and successfully used in network-based applications such as node classification and link prediction. However, an ideal representation of a network should be both informative for prediction and be easy to interpret by users. In this paper, we introduce a spectral embedding method for a network, its Spectral Point, which is basically the truncated spectral moments of a network. We mathematically prove that spectral moments have close relationship with network structure (e.g. number of triangles and squares) and various network properties (e.g. degree distribution, clustering coefficient and network connectivity). Using spectral points, we introduce a visualizable and bounded 3D embedding space, where user can characterize different networks such as special graphs (e.g., cycles), or real-world networks from different categories (e.g., social or biological networks). We demonstrate that spectral points can be used for network identification (i.e., what network is this subgraph sampled from?) and the truncated spectral moments do not lose much predictive power.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Tian, Hao; Zafarani, Reza
Exploiting Common Neighbor Graph for Link Prediction Proceedings Article
In: Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM), 2020.
@inproceedings{hao2020exploiting,
title = {Exploiting Common Neighbor Graph for Link Prediction},
author = {Hao Tian and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM)},
abstract = {Link prediction aims to predict whether two nodes in a network are likely to get connected. Motivated by its applications, e.g., in friend or product recommendation, link prediction has been extensively studied over the years. Most link prediction methods are designed based on specific assumptions that may or may not hold in different networks, leading to link prediction methods that are not generalizable. Here, for the first time, we address this problem by proposing general link prediction methods that can capture network-specific patterns. Most link prediction methods rely on computing similarities between between nodes. By learning a γ-decaying model, the proposed methods can measure the pairwise similarities between nodes more accurately, even when only using common neighbor information, which is often used by current techniques.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Mulay, Apurva; Ferrara, Emilio; Zafarani, Reza
ReCOVery: A Multimodal Repository for COVID-19 News Credibility Research Proceedings Article
In: Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM), 2020.
@inproceedings{xinyi2020recovery,
title = {ReCOVery: A Multimodal Repository for COVID-19 News Credibility Research},
author = {Xinyi Zhou and Apurva Mulay and Emilio Ferrara and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 29th ACM International Conference on Information and Knowledge Management (CIKM)},
abstract = {First identified in Wuhan, China, in December 2019, the outbreak of COVID-19 has been declared as a global emergency in January, and a pandemic in March 2020 by the World Health Organization (WHO). Along with this pandemic, we are also experiencing an "infodemic" of information with low credibility such as fake news and conspiracies. In this work, we present ReCOVery, a repository designed and constructed to facilitate the studies of combating such information regarding COVID-19. We first broadly search and investigate ~2,000 news publishers, from which 61 are identified with extreme [high or low] levels of credibility. By inheriting the credibility of the media on which they were published, a total of 2,029 news articles on coronavirus, published from January to May 2020, are collected in the repository, along with 140,820 tweets that reveal how these news articles are spread on the social network. The repository provides multimodal information of news articles on coronavirus, including textual, visual, temporal, and network information. The way that news credibility is obtained allows a trade-off between dataset scalability and label accuracy. Extensive experiments are conducted to present data statistics and distributions, as well as to provide baseline performances for predicting news credibility so that future methods can be directly compared. Our repository is available at https://coronavirus-fakenews.com, which will be timely updated.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Abdolazimi, Reyhaneh; Jin, Shengmin; Zafarani, Reza
Noise-Enhanced Community Detection Proceedings Article
In: Proceedings of the 31st ACM Conference on Hypertext and Social Media (HT), 2020.
@inproceedings{reyhaneh2020noiseenhanced,
title = {Noise-Enhanced Community Detection},
author = {Reyhaneh Abdolazimi and Shengmin Jin and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 31st ACM Conference on Hypertext and Social Media (HT)},
abstract = {Community structure plays a significant role in uncovering the structure of a network. While many community detection algorithms have been introduced, improving the quality of detected communities is still an open problem. In many areas of science, adding noise improves system performance and algorithm efficiency, motivating us to also explore the possibility of adding noise to improve community detection algorithms. We propose a noise-enhanced community detection framework that improves communities detected by existing community detection methods. The framework introduces three noise methods to help detect communities better. Theoretical justification and extensive experiments on synthetic and real-world datasets show that our framework helps community detection methods find better communities.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Ma, Rui; Jin, Shengmin; Eftekharnejad, Sara; Zafarani, Reza; Philippe, Wolf Peter Jean
A Probabilistic Cascading Failure Model for Dynamic Operating Conditions Journal Article
In: IEEE Access, 2020.
@article{rui2020probabilistic,
title = {A Probabilistic Cascading Failure Model for Dynamic Operating Conditions},
author = {Rui Ma and Shengmin Jin and Sara Eftekharnejad and Reza Zafarani and Wolf Peter Jean Philippe},
year = {2020},
date = {2020-01-01},
journal = {IEEE Access},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Li, Jiayu; Zhang, Tianyun; Tian, Hao; Jin, Shengmin; Fardad, Makan; Zafarani, Reza
SGCN: A Graph Sparsifier based on Graph Convolutional Networks Proceedings Article
In: Proceedings of the 24th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), 2020.
@inproceedings{jiayu2020sgcn,
title = {SGCN: A Graph Sparsifier based on Graph Convolutional Networks},
author = {Jiayu Li and Tianyun Zhang and Hao Tian and Shengmin Jin and Makan Fardad and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 24th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)},
abstract = {Graphs are ubiquitous across the globe and within science and engineering. With graphs growing in size, node classification on large graphs can be space and time consuming, even with powerful classifiers such as Graph Convolutional Networks (GCNs). Hence, some questions are raised, particularly, whether one can keep only some of the edges of a graph while maintaining prediction performance for node classification, or train classifiers on specific subgraphs instead of a whole graph with limited performance loss in node classification. To address these questions, we propose Sparsified Graph Convolutional Network (SGCN), a neural network graph sparsifier that sparsifies a graph by pruning some edges. We formulate sparsification as an optimization problem, which we solve by an Alternating Direction Method of Multipliers (ADMM) based solution. We show that sparsified graphs provided by SGCN can be used as inputs to GCN, leading to better or comparable node classification performance with that of original graphs in GCN, DeepWalk, and GraphSAGE. We provide insights on why SGCN performs well by analyzing its performance from the view of a low-pass filter.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Wu, Jindi; Zafarani, Reza
SAFE: Similarity-Aware Multi-Modal Fake News Detection Proceedings Article
In: Proceedings of the 24th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), 2020.
@inproceedings{xinyi2020safe,
title = {SAFE: Similarity-Aware Multi-Modal Fake News Detection},
author = {Xinyi Zhou and Jindi Wu and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 24th Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)},
abstract = {Effective detection of ``fake news'' has recently attracted significant attention. Current studies have made significant contributions to predicting fake news with less focus on exploiting the relationship (similarity) between the textual and visual information in news articles. Attaching importance to such similarity helps identify fake news stories that, for example, attempt to use irrelevant images to attract readers' attention. In this work, we propose a $mathsfS$imilarity-$mathsfA$ware $mathsfF$ak$mathsfE$ news detection method ($mathsfSAFE$) which investigates multi-modal (textual and visual) information of news articles. First, neural networks are adopted to separately extract textual and visual features for news representation. We further investigate the relationship between the extracted features across modalities. Such representations of news textual and visual information along with their relationship are jointly learned and used to predict fake news. The proposed method facilitates recognizing the falsity of news articles based on their text, images, or their ``mismatches.'' We conduct extensive experiments on large-scale real-world data, which demonstrate the effectiveness of the proposed method.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Jain, Atishay; Phoha, Vir V.; Zafarani, Reza
Fake News Early Detection: A Theory-driven Model Journal Article
In: ACM Transactions on Digital Threats: Research and Practice, 2020.
@article{xinyi2020fake,
title = {Fake News Early Detection: A Theory-driven Model},
author = {Xinyi Zhou and Atishay Jain and Vir V. Phoha and Reza Zafarani},
year = {2020},
date = {2020-01-01},
journal = {ACM Transactions on Digital Threats: Research and Practice},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Sitaula, Niraj; Mohan, Chilukuri K.; Grygiel, Jennifer; Zhou, Xinyi; Zafarani, Reza
Credibility-based Fake News Detection Book Section
In: Shu, Kai; Wang, Suhang; Lee, Dongwon; Liu, Huan (Ed.): Disinformation, Misinformation, and Fake News in Social Media, Springer, 2020.
@incollection{niraj2020credibilitybased,
title = {Credibility-based Fake News Detection},
author = {Niraj Sitaula and Chilukuri K. Mohan and Jennifer Grygiel and Xinyi Zhou and Reza Zafarani},
editor = {Kai Shu and Suhang Wang and Dongwon Lee and Huan Liu},
year = {2020},
date = {2020-01-01},
booktitle = {Disinformation, Misinformation, and Fake News in Social Media},
publisher = {Springer},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
Jin, Shengmin; Wituszynski, Richard; Caiello-Gingold, Max; Zafarani, Reza
WebShapes: Network Visualization with 3D Shapes Proceedings Article
In: Proceedings of the 13th ACM International Conference on Web Search and Data Mining (WSDM), 2020.
@inproceedings{shengmin2020webshapes,
title = {WebShapes: Network Visualization with 3D Shapes},
author = {Shengmin Jin and Richard Wituszynski and Max Caiello-Gingold and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 13th ACM International Conference on Web Search and Data Mining (WSDM)},
abstract = {Network visualization has played a critical role in graph analysis, as it not only presents a big picture of a network but also helps reveal the structural information of a network. The most popular visual representation of networks is the node-link diagram. However, visualizing a large network with the node-link diagram can be challenging due to the difficulty in obtaining an optimal graph layout. To address this challenge, a recent advancement in network representation: network shape, allows one to compactly represent a network and its subgraphs with the distribution of their embeddings. Inspired by this research, we have designed a web platform WebShapes that enables researchers and practitioners to visualize their network data as customized 3D shapes (<a href="http://b.link/webshapes">http://b.link/webshapes</a>)Furthermore, we provide a case study on real-world networks to explore the sensitivity of network shapes to different graph sampling, embedding, and fitting methods, and we show examples of understanding networks through their network shapes.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Jin, Shengmin; Zafarani, Reza
Sentiment Paradoxes in Social Networks: Why Your Friends are More Positive Than You? Proceedings Article
In: Proceedings of the 14th International AAAI Conference on Web and Social Media (ICWSM), 2020.
@inproceedings{xinyi2020sentiment,
title = {Sentiment Paradoxes in Social Networks: Why Your Friends are More Positive Than You?},
author = {Xinyi Zhou and Shengmin Jin and Reza Zafarani},
year = {2020},
date = {2020-01-01},
booktitle = {Proceedings of the 14th International AAAI Conference on Web and Social Media (ICWSM)},
abstract = {Most individuals consider their friends to be more positive than themselves, exhibiting a sentiment paradox. Psychological research attributes this paradox to human cognition bias. With the goal to understand this phenomenon, we study sentiment paradoxes in social networks. Our work shows that social connections (friends, followees, or followers) of users are indeed generally (not illusively) more positive than the users themselves. Five existing sentiment paradoxes are identified at different network levels ranging from triads to large-scale communities. Empirical and theoretical evidence are provided to verify the observed and expected existence of such sentiment paradoxes. By investigating the relationships between the sentiment paradox and other well-developed network paradoxes, i.e., friendship paradox and activity paradox, we found that user sentiments are positively correlated to their number of social connections while hardly to their social activity. Finally, we demonstrate how the validated sentiment paradoxes can be used in turn to predict user sentiments.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2019
Zhou, Xinyi; Zafarani, Reza
Network-based Fake News Detection: A Pattern-driven Approach Journal Article
In: ACM SIGKDD Explorations Newsletter, 2019.
@article{xinyi2019networkbased,
title = {Network-based Fake News Detection: A Pattern-driven Approach},
author = {Xinyi Zhou and Reza Zafarani},
year = {2019},
date = {2019-12-01},
journal = {ACM SIGKDD Explorations Newsletter},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jin, Shengmin; Phoha, Vir V.; Zafarani, Reza
Network Identification and Authentication Proceedings Article
In: Proceedings of the 2019 IEEE International Conference on Data Mining (ICDM), 2019.
@inproceedings{shengmin2019network,
title = {Network Identification and Authentication},
author = {Shengmin Jin and Vir V. Phoha and Reza Zafarani},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 2019 IEEE International Conference on Data Mining (ICDM)},
abstract = {Research on networks is commonly performed using anonymized network data for various reasons such as protecting data privacy. Under such circumstances, it is difficult to verify the source of network data, which leads to questions such as: Given an anonymized graph, can we identify the network from which it is collected? Or if one claims the graph is sampled from a certain network, can we verify it? The intuitive approach is to check for subgraph isomorphism. However, subgraph isomorphism is NP-complete; hence, infeasible for most large networks. Inspired by biometrics studies, we address these challenges by formulating two new problems: network identification and network authentication. To tackle these problems, similar to research on human fingerprints, we introduce two versions of a network identity: (1) embedding-based identity and (2) distribution-based identity. We demonstrate the effectiveness of these network identities on various real-world networks. Using these identities, we propose two approaches for network identification. One method uses supervised learning and can achieve an identification accuracy rate of 94.7%, and the other, which is easier to implement, relies on distances between identities and achieves an accuracy rate of 85.5%. For network authentication, we propose two methods to build a network authentication system. The first is a supervised learner and provides a low false accept rate and the other method allows one to control the false reject rate with a reasonable false accept rate across networks. Our study can help identify or verify the source of network data, validate network-based research, and be used for network-based biometrics.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Hozhabrierdi, Pegah; Zafarani, Reza
The Impact of Graph Structure on Small-World Shortest Paths Proceedings Article
In: Proceedings of the 2019 International Conference on Social Computing, Behavioral-Cultural Modeling & Prediction and Behavior Representation in Modeling and Simulation (SBP-BRiMS), 2019.
@inproceedings{pegah2019impact,
title = {The Impact of Graph Structure on Small-World Shortest Paths},
author = {Pegah Hozhabrierdi and Reza Zafarani},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 2019 International Conference on Social Computing, Behavioral-Cultural Modeling & Prediction and Behavior Representation in Modeling and Simulation (SBP-BRiMS)},
abstract = {Numerous studies have been devoted to modeling and estimating shortest-paths in complex networks. To maintain generality, these studies have neglected a common property of complex social networks; small-world phenomenon (colloquially stated as six degrees of separation). Based on the intuition behind the flow of information in smallworlds, we propose a small-world representation for social networks. In this new representation, we study the influence of different network measures on the shortest-paths. We perform a comprehensive analysis on a large set of local and global network measures and report our findings for various social networks. The results of our analyses show that: (1) shortest path lengths in small worlds are strongly correlated to the maximum degree centrality and the diameter. In fact, using these two features one can predict the average path length more accurately than using any other feature alone; (2) when nodes are ranked according to their average shortest-path lengths, we can approximate this ranking by a shifted standard normal distribution with minimum information loss. The shift can be estimated by the rank of the node with maximum local clustering coefficient, which can be computed in linear or constant time},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zafarani, Reza; Zhou, Xinyi; Shu, Kai; Liu, Huan
Fake News Research: Theories, Detection Strategies, and Open Problems Proceedings Article
In: Proceedings of the 25th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD), 2019.
@inproceedings{reza2019fake,
title = {Fake News Research: Theories, Detection Strategies, and Open Problems},
author = {Reza Zafarani and Xinyi Zhou and Kai Shu and Huan Liu},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 25th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)},
abstract = {The explosive growth of fake news and its erosion to democracy, justice, and public trust increased the demand for fake news detection. As an interdisciplinary topic, the study of fake news encourages a concerted effort of experts in computer and information science, political science, journalism, social science, psychology, and economics. A comprehensive framework to systematically understand and detect fake news is necessary to attract and unite researchers in related areas to conduct research on fake news. This tutorial aims to clearly present (1) fake news research, its challenges, and research directions; (2) a comparison between fake news and other related concepts (e.g., rumors); (3) the fundamental theories developed across various disciplines that facilitate interdisciplinary research; (4) various detection strategies unified under a comprehensive framework for fake news detection; and (5) the state-of-the-art datasets, patterns, and models. We present fake news detection from various perspectives, which involve news content and information in social networks, and broadly adopt techniques in data mining, machine learning, natural language processing, information retrieval and social search. Facing the upcoming 2020 U.S. presidential election, challenges for automatic, effective and efficient fake news detection are also clarified in this tutorial.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Shu, Kai; Zhou, Xinyi; Wang, Suhang; Zafarani, Reza; Liu, Huan
The Role of User Profiles for Fake News Detection Proceedings Article
In: Proceedings of the IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM), 2019.
@inproceedings{kai2019role,
title = {The Role of User Profiles for Fake News Detection},
author = {Kai Shu and Xinyi Zhou and Suhang Wang and Reza Zafarani and Huan Liu},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the IEEE/ACM International Conference on Advances in Social Networks Analysis and Mining (ASONAM)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Zafarani, Reza
Fake News Detection: An Interdisciplinary Research Proceedings Article
In: Companion Proceedings of The Web Conference 2019 (WWW), 2019.
@inproceedings{xinyi2019fake,
title = {Fake News Detection: An Interdisciplinary Research},
author = {Xinyi Zhou and Reza Zafarani},
url = {https://doi.org/10.1145/3308560.3316476},
year = {2019},
date = {2019-01-01},
booktitle = {Companion Proceedings of The Web Conference 2019 (WWW)},
abstract = {The explosive growth of fake news and its erosion to democracy, journalism and economy has increased the demand for fake news detection. To achieve efficient and explainable fake news detection, an interdisciplinary approach is required, relying on scientific contributions from various disciplines, e.g., social sciences, engineering, among others. Here, we illustrate how such multidisciplinary contributions can help detect fake news by improving feature engineering, or by providing well-justified machine learning models. We demonstrate how news content, news propagation patterns, and users' engagements with news can help detect fake news.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Zhou, Xinyi; Zafarani, Reza; Shu, Kai; Liu, Huan
Fake News: Fundamental Theories, Detection Strategies and Challenges Proceedings Article
In: Proceedings of the 12th ACM International Conference on Web Search and Data Mining (WSDM), 2019.
@inproceedings{xinyi2019fake2,
title = {Fake News: Fundamental Theories, Detection Strategies and Challenges},
author = {Xinyi Zhou and Reza Zafarani and Kai Shu and Huan Liu},
url = {https://dl.acm.org/citation.cfm?id=3291382},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 12th ACM International Conference on Web Search and Data Mining (WSDM)},
abstract = {The explosive growth of fake news and its erosion to democracy, justice, and public trust increased the demand for fake news detection. As an interdisciplinary topic, the study of fake news encourages a concerted effort of experts in computer and information science, political science, journalism, social science, psychology, and economics. A comprehensive framework to systematically understand and detect fake news is necessary to attract and unite researchers in related areas to conduct research on fake news. This tutorial aims to clearly present (1) fake news research, its challenges, and research directions; (2) a comparison between fake news and other related concepts (e.g., rumors); (3) the fundamental theories developed across various disciplines that facilitate interdisciplinary research; (4) various detection strategies unified under a comprehensive framework for fake news detection; and (5) the state-of-the-art datasets, patterns, and models. We present fake news detection from various perspectives, which involve news content and information in social networks, and broadly adopt techniques in data mining, machine learning, natural language processing, information retrieval and social search. Facing the upcoming 2020 U.S. presidential election, challenges for automatic, effective and efficient fake news detection are also clarified in this tutorial.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2018
Jin, Shengmin; Zafarani, Reza
Representing Networks with 3D Shapes Proceedings Article
In: Proceedings of the 2018 IEEE International Conference on Data Mining (ICDM), 2018, (Code: https://github.com/shengminjin/KroneckerHull).
@inproceedings{shengmin2018representing,
title = {Representing Networks with 3D Shapes},
author = {Shengmin Jin and Reza Zafarani},
url = {https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=8594842},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the 2018 IEEE International Conference on Data Mining (ICDM)},
abstract = {There has been a surge of interest in machine learning in graphs, as graphs and networks are ubiquitous across the globe and within science and engineering: road networks, power grids, protein-protein interaction networks, scientific collaboration networks, social networks, to name a few. Recent machine learning research has focused on efficient and effective ways to represent graph structure. Existing graph representation methods such as network embedding techniques learn to map a node (or a graph) to a vector in a low-dimensional vector space. However, the mapped values are often difficult to interpret, lacking information on the structure of the network or its subgraphs. Instead of using a low-dimensional vector to represent a graph, we propose to represent a network with a 3-dimensional shape: the network shape. We introduce the first network shape, a Kronecker hull, which represents a network as a 3D convex polyhedron using stochastic Kronecker graphs. We present a linear time algorithm to build Kronecker hulls. Network shapes provide a compact representation of networks that is easy to visualize and interpret. They captures various properties of not only the network, but also its subgraphs. For instance, they can provide the distribution of subgraphs within a network, e.g., what proportion of subgraphs are structurally similar to the whole network? Using experiments on real-world networks, we show how network shapes can be used in various applications, from computing similarity between two graphs (using the overlap between network shapes of two networks) to graph compression, where a graph with millions of nodes can be represented with a convex hull with less than 40 boundary points.},
note = {Code: https://github.com/shengminjin/KroneckerHull},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Jin, Shengmin; Zafarani, Reza
Sentiment Prediction in Social Networks Proceedings Article
In: Proceedings of the 2018 IEEE International Conference on Data Mining (ICDM), 2018, (Dataset: http://data.syr.edu/get/EmotionPatterns/).
@inproceedings{shengmin2018sentiment,
title = {Sentiment Prediction in Social Networks},
author = {Shengmin Jin and Reza Zafarani},
url = {https://dl.acm.org/citation.cfm?id=3132932},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the 2018 IEEE International Conference on Data Mining (ICDM)},
abstract = {Sentiment analysis research has focused on using text for predicting sentiments without considering the unavoidable peer influence on user emotions and opinions. The lack of large-scale ground-truth data on sentiments of users in social networks has limited research on how predictable sentiments are from social ties. In this paper, using a large-scale dataset on human sentiments, we study sentiment prediction within social networks. We demonstrate that sentiments are predictable using structural properties of social networks alone. With social science and psychology literature, we provide evidence on sentiments being connected to social relationships at four different network levels, starting from the ego-network level and moving up to the whole-network level. We discuss emotional signals that can be captured at each level of social relationships and investigate the importance of structural features on each network levels. We demonstrate that sentiment prediction that solely relies on social network structure can be as (or more) accurate than text-based techniques. For the situations where complete posts and friendship information are difficult to get, we analyze the trade-off between the sentiment prediction performance and the available information. When computational resources are limited, we show that using only four network properties, one can predict sentiments with competitive accuracy. Our findings can be used to (1) validate the peer influence on user sentiments, (2) improve classical text-based sentiment prediction methods, (3) enhance friend recommendation by utilizing sentiments, and (4) help identify personality traits.},
note = {Dataset: http://data.syr.edu/get/EmotionPatterns/},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2017
Shu, Kai; Wang, Suhang; Tang, Jiliang; Zafarani, Reza; Liu, Huan
User Identity Linkage across Online Social Networks: A Review. Journal Article
In: ACM SIGKDD Explorations Newsletter, 2017.
@article{kai2017user,
title = {User Identity Linkage across Online Social Networks: A Review.},
author = {Kai Shu and Suhang Wang and Jiliang Tang and Reza Zafarani and Huan Liu},
url = {https://dl.acm.org/citation.cfm?id=3068781},
year = {2017},
date = {2017-03-01},
journal = {ACM SIGKDD Explorations Newsletter},
abstract = {The increasing popularity and diversity of social media sites has encouraged more and more people to participate on multiple online social networks to enjoy their services. Each user may create a user identity, which can includes profile, content, or network information, to represent his or her unique public figure in every social network. Thus, a fundamental question arises – can we link user identities across online social networks? User identity linkage across online social networks is an emerging task in social media and has attracted increasing attention in recent years. Advancements in user identity linkage could potentially impact various domains such as recommendation and link prediction. Due to the unique characteristics of social network data, this problem faces tremendous challenges. To tackle these challenges, recent approaches generally consist of (1) extracting features and (2) constructing predictive models from a variety of perspectives. In this paper, we review key achievements of user identity linkage across online social networks including stateof- the-art algorithms, evaluation metrics, and representative datasets. We also discuss related research areas, open problems, and future research directions for user identity linkage across online social networks.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Jin, Shengmin; Zafarani, Reza
Emotions in Social Networks: Distributions, Patterns, and Models Proceedings Article
In: Proceedings of the 2017 ACM Conference on Information and Knowledge Management (CIKM), 2017, (Dataset: http://data.syr.edu/get/EmotionPatterns/).
@inproceedings{shengmin2017emotions,
title = {Emotions in Social Networks: Distributions, Patterns, and Models},
author = {Shengmin Jin and Reza Zafarani},
url = {https://ieeexplore.ieee.org/document/8637419},
year = {2017},
date = {2017-01-01},
booktitle = {Proceedings of the 2017 ACM Conference on Information and Knowledge Management (CIKM)},
abstract = {Understanding the role emotions play in social interactions has been a central research question in the social sciences. However, the challenge of obtaining large-scale data on human emotions has left the most fundamental questions on emotions less explored: How do emotions vary across individuals, evolve over time, and are connected to social ties? We address these questions using a large-scale dataset of users that contains both their emotions and social ties. Using this dataset, we identify patterns of human emotions on five different network levels, starting from the user-level and moving up to the whole-network level. At the user-level, we identify how human emotions are distributed and vary over time. At the ego-network level, we find that assortativity is only observed with respect to positive moods. This observation allows us to introduce emotional balance, the "dual'' of structural balance theory. We show that emotional balance has a natural connection to structural balance theory. At the community-level, we find that community members are emotionally-similar and that this similarity is stronger in smaller communities. Structural properties of communities, such as their sparseness or isolatedness, are also connected to the emotions of their members. At the whole-network level, we show that there is a tight connection between the global structure of a network and the emotions of its members. As a result, we demonstrate how one can accurately predict the proportion of positive/negative users within a network by only looking at the network structure. Based on our observations, we propose the Emotional-Tie model – a network model that can simulate the formation of friendships based on emotions. This model generates graphs that exhibit both patterns of human emotions identified in this work and those observed in real-world social networks, such as having a high clustering coefficient. Our findings can help better understand the interplay between emotions and social ties.},
note = {Dataset: http://data.syr.edu/get/EmotionPatterns/},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2016
Zafarani, Reza; Liu, Huan
Users Joining Multiple Sites: Friendship and Popularity Variations across Sites Journal Article
In: Information Fusion, 2016.
@article{reza2016users,
title = {Users Joining Multiple Sites: Friendship and Popularity Variations across Sites},
author = {Reza Zafarani and Huan Liu},
url = {http://www.sciencedirect.com/science/article/pii/S1566253515000676},
year = {2016},
date = {2016-03-01},
journal = {Information Fusion},
abstract = {Our social media experience is no longer limited to a single site. We use different social media sites for different purposes and our information on each site is often partial. By collecting complementary information for the same individual across sites, one can better profile users. These profiles can help improve online services such as advertising or recommendation across sites. To combine complementary information across sites, it is critical to understand how information for the same individual varies across sites. In this study, we aim to understand how two fundamental properties of users vary across social media sites. First, we study how user friendship behavior varies across sites. Our findings show how friend distributions for individuals change as they join new sites. Next, we analyze how user popularity changes across sites as individuals join different sites. We evaluate our findings and demonstrate how our findings can be employed to predict how popular users are likely to be on new sites they join.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Liu, Huan; Morstatter, Fred; Tang, Jiliang; Zafarani, Reza
The good, the bad, and the ugly: uncovering novel research opportunities in social media mining Journal Article
In: International Journal of Data Science and Analytics, vol. 1, no. 3-4, pp. 137–143, 2016.
@article{huan2016good,
title = {The good, the bad, and the ugly: uncovering novel research opportunities in social media mining},
author = {Huan Liu and Fred Morstatter and Jiliang Tang and Reza Zafarani},
url = {https://link.springer.com/article/10.1007/s41060-016-0023-0},
year = {2016},
date = {2016-01-01},
journal = {International Journal of Data Science and Analytics},
volume = {1},
number = {3-4},
pages = {137–143},
abstract = {Big data is ubiquitous and can only become bigger, which challenges traditional data mining and machine learning methods. Social media is a new source of data that is significantly different from conventional ones. Social media data are mostly user-generated, and are big, linked, and heterogeneous. We present the good, the bad and the ugly associated with the multi-faceted social media data and exemplify the importance of some original problems with real-world examples. We discuss bias in social media data, evaluation dilemma, data reduction, inferring invisible information, and big-data paradox. We illuminate new opportunities of developing novel algorithms and tools for data science. In our endeavor of employing the good to tame the bad with the help of the ugly, we deepen the understanding of ever growing and continuously evolving data and create innovative solutions with interdisciplinary and collaborative research of data science.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
