Skip to content

Commit f99d9d0

Browse files
committed
more team descriptions
1 parent 79bb8f1 commit f99d9d0

26 files changed

+295
-247
lines changed

_bibliography/references.bib

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ @inproceedings{
2020
year={2025},
2121
url={https://openreview.net/forum?id=g6v09VxgFw},
2222
pdf={https://openreview.net/pdf?id=g6v09VxgFw},
23+
img={gnns-getting-comfy.png},
2324
abstract={Maximizing the spectral gap through graph rewiring has been proposed to enhance the performance of message-passing graph neural networks (GNNs) by addressing over-squashing. However, as we show, minimizing the spectral gap can also improve generalization. To explain this, we analyze how rewiring can benefit GNNs within the context of stochastic block models. Since spectral gap optimization primarily influences community strength, it improves performance when the community structure aligns with node labels. Building on this insight, we propose three distinct rewiring strategies that explicitly target community structure, node labels, and their alignment: (a) community structure-based rewiring (ComMa), a more computationally efficient alternative to spectral gap optimization that achieves similar goals; (b) feature similarity-based rewiring (FeaSt), which focuses on maximizing global homophily; and (c) a hybrid approach (ComFy), which enhances local feature similarity while preserving community structure to optimize label-community alignment. Extensive experiments confirm the effectiveness of these strategies and support our theoretical insights.},
2425
}
2526

@@ -31,19 +32,22 @@ @inproceedings{
3132
year={2024},
3233
url={https://openreview.net/forum?id=EMkrwJY2de},
3334
pdf={https://openreview.net/pdf?id=EMkrwJY2de},
35+
img={spectral-graph-pruning.png},
3436
abstract={Message Passing Graph Neural Networks are known to suffer from two problems that are sometimes believed to be diametrically opposed: over-squashing and over-smoothing. The former results from topological bottlenecks that hamper the information flow from distant nodes and are mitigated by spectral gap maximization, primarily, by means of edge additions. However, such additions often promote over-smoothing that renders nodes of different classes less distinguishable. Inspired by the Braess phenomenon, we argue that deleting edges can address over-squashing and over-smoothing simultaneously. This insight explains how edge deletions can improve generalization, thus connecting spectral gap optimization to a seemingly disconnected objective of reducing computational resources by pruning graphs for lottery tickets. To this end, we propose a computationally effective spectral gap optimization framework to add or delete edges and demonstrate its effectiveness on the long range graph benchmark and on larger heterophilous datasets.},
3537
code={https://github.com/RelationalML/SpectralPruningBraess}
3638
}
3739

3840
@inproceedings{
39-
mustafa2024training,
40-
title={Training GNNs in Balance by Dynamic Rescaling},
41+
mustafa2024dynamic,
42+
title={Dynamic Rescaling for Training {GNN}s},
4143
author={Nimrah Mustafa and Rebekka Burkholz},
42-
booktitle={Thirty-eighth Conference on Neural Information Processing Systems},
44+
booktitle={Thirty-eighth Annual Conference on Neural Information Processing Systems},
4345
year={2024},
4446
url={https://openreview.net/forum?id=IfZwSRpqHl},
4547
pdf={https://openreview.net/pdf?id=IfZwSRpqHl},
46-
abstract={Graph neural networks exhibiting a rescale invariance, like GATs, obey a conservation law of its parameters, which has been exploited to derive a balanced state that induces good initial trainability. Yet, finite learning rates as used in practice topple the network out of balance during training. This effect is even more pronounced with larger learning rates that tend to induce improved generalization but make the training dynamics less robust. To support even larger learning rates, we propose to dynamically balance the network according to a different criterion, based on relative gradients, that promotes faster and better. In combination with large learning rates and gradient clipping, dynamic rebalancing significantly improves generalization on real-world data. We observe that rescaling provides us with the flexibility to control the order in which network layers are trained. This leads to novel insights into similar phenomena as grokking, which can further boost generalization performance.}
48+
abstract={Graph neural networks (GNNs) with a rescale invariance, such as GATs, can be re-parameterized during optimization through dynamic rescaling of network parameters and gradients while keeping the loss invariant. In this work, we explore dynamic rescaling as a tool to influence GNN training dynamics in two key ways: i) balancing the network with respect to various criteria, and ii) controlling the relative learning speeds of different layers. We gain novel insights, unique to GNNs, that reveal distinct training modes for different tasks. For heterophilic graphs, achieving balance based on relative gradients leads to faster training and better generalization. In contrast, homophilic graphs benefit from delaying the learning of later layers. Additionally, we show that training in balance supports larger learning rates, which can improve generalization. Moreover, controlling layer-wise training speeds is linked to grokking-like phenomena, which may be of independent interest.},
49+
code={https://github.com/RelationalML/Dynamic_Rescaling_GAT},
50+
img={dynamic-rescaling-training-gnns.png}
4751
}
4852

4953
@inproceedings{
@@ -77,7 +81,8 @@ @inproceedings{
7781
url={https://openreview.net/forum?id=Sjv5RcqfuH},
7882
pdf={https://openreview.net/pdf?id=Sjv5RcqfuH},
7983
abstract={Graph Attention Networks (GATs) are designed to provide flexible neighborhood aggregation that assigns weights to neighbors according to their importance. In practice, however, GATs are often unable to switch off task-irrelevant neighborhood aggregation, as we show experimentally and analytically. To address this challenge, we propose GATE, a GAT extension that holds three major advantages: i) It alleviates over-smoothing by addressing its root cause of unnecessary neighborhood aggregation. ii) Similarly to perceptrons, it benefits from higher depth as it can still utilize additional layers for (non-)linear feature transformations in case of (nearly) switched-off neighborhood aggregation. iii) By down-weighting connections to unrelated neighbors, it often outperforms GATs on real-world heterophilic datasets. To further validate our claims, we construct a synthetic test bed to analyze a model's ability to utilize the appropriate amount of neighborhood aggregation, which could be of independent interest.},
80-
code={https://github.com/RelationalML/GATE}
84+
code={https://github.com/RelationalML/GATE},
85+
img={gate-how-to.png}
8186
}
8287

8388
@inproceedings{
@@ -89,7 +94,8 @@ @inproceedings{
8994
url={https://openreview.net/forum?id=qODvxQ8TXW},
9095
pdf={https://openreview.net/pdf?id=qODvxQ8TXW},
9196
abstract={Learning Rate Rewinding (LRR) has been established as a strong variant of Iterative Magnitude Pruning (IMP) to find lottery tickets in deep overparameterized neural networks. While both iterative pruning schemes couple structure and parameter learning, understanding how LRR excels in both aspects can bring us closer to the design of more flexible deep learning algorithms that can optimize diverse sets of sparse architectures. To this end, we conduct experiments that disentangle the effect of mask learning and parameter optimization and how both benefit from overparameterization. The ability of LRR to flip parameter signs early and stay robust to sign perturbations seems to make it not only more effective in mask identification but also in optimizing diverse sets of masks, including random ones. In support of this hypothesis, we prove in a simplified single hidden neuron setting that LRR succeeds in more cases than IMP, as it can escape initially problematic sign configurations.},
92-
spotlight={true}
97+
spotlight={true},
98+
img={masks-signs-lrr.png}
9399
}
94100

95101
@inproceedings{
@@ -112,7 +118,8 @@ @inproceedings{
112118
url={https://openreview.net/forum?id=qY7UqLoora},
113119
pdf={https://openreview.net/pdf?id=qY7UqLoora},
114120
code={https://github.com/RelationalML/GAT_Balanced_Initialization},
115-
abstract={While the expressive power and computational capabilities of graph neural networks (GNNs) have been theoretically studied, their optimization and learning dynamics, in general, remain largely unexplored. Our study undertakes the Graph Attention Network (GAT), a popular GNN architecture in which a node's neighborhood aggregation is weighted by parameterized attention coefficients. We derive a conservation law of GAT gradient flow dynamics, which explains why a high portion of parameters in GATs with standard initialization struggle to change during training. This effect is amplified in deeper GATs, which perform significantly worse than their shallow counterparts. To alleviate this problem, we devise an initialization scheme that balances the GAT network. Our approach i) allows more effective propagation of gradients and in turn enables trainability of deeper networks, and ii) attains a considerable speedup in training and convergence time in comparison to the standard initialization. Our main theorem serves as a stepping stone to studying the learning dynamics of positive homogeneous models with attention mechanisms.}
121+
abstract={While the expressive power and computational capabilities of graph neural networks (GNNs) have been theoretically studied, their optimization and learning dynamics, in general, remain largely unexplored. Our study undertakes the Graph Attention Network (GAT), a popular GNN architecture in which a node's neighborhood aggregation is weighted by parameterized attention coefficients. We derive a conservation law of GAT gradient flow dynamics, which explains why a high portion of parameters in GATs with standard initialization struggle to change during training. This effect is amplified in deeper GATs, which perform significantly worse than their shallow counterparts. To alleviate this problem, we devise an initialization scheme that balances the GAT network. Our approach i) allows more effective propagation of gradients and in turn enables trainability of deeper networks, and ii) attains a considerable speedup in training and convergence time in comparison to the standard initialization. Our main theorem serves as a stepping stone to studying the learning dynamics of positive homogeneous models with attention mechanisms.},
122+
img={are-gats-out-of-balance.png}
116123
}
117124

118125
@InProceedings{pmlr-v202-gadhikar23a,

_data/alumni_members.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
1-
- role: Research assistants (HiWis)
1+
- role: Research assistants
22
members:
33
- name: Adarsh Jamadandi
44
last_name: Jamadandi
55
photo: c01adja.jpg
6-
start_date: Nov 2022
7-
end_date: Oct 2024
6+
start_date: Nov 22
7+
end_date: Oct 24
88
99
url: https://adarshmj.github.io
1010

1111
- name: Harsha Nelaturu
1212
last_name: Nelaturu
1313
photo: c02hane.jpg
14-
start_date: Aug 2023
15-
end_date: Jul 2024
14+
start_date: Aug 23
15+
end_date: Jul 24
1616
url: https://nelaturuharsha.github.io/
1717

1818
- role: Visiting students
1919
members:
2020
- name: Otto Piramuthu
2121
last_name: Piramuthu
22-
start_date: June 2024
23-
end_date: August 2024
22+
start_date: Jun 24
23+
end_date: Aug 24

_data/team_members.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@
1515
start_date: May 2024
1616
1717
url: https://chaoedisonzhouucl.github.io/
18-
description: "My recent research interests include Science4AI, representation learning and generative machine learning."
18+
description: "I focus on understanding the intricate dynamics of training and fine-tuning in machine learning models, with the goal of developing more efficient and effective learning algorithms. My research explores how optimization processes evolve and how we can refine these methods to improve performance. Currently, I am particularly interested in gradient compression techniques."
1919

2020
- name: Dr. Gowtham Abbavaram
2121
last_name: Abbavaram
2222
photo: c01goab.jpg
2323
start_date: Oct 2024
2424
2525
url: https://gautam0707.github.io/
26-
description: "My research focuses on causality inspired machine learning and statistical causal inference."
26+
description: "I work on research problems at the intersection of machine learning and causality, focusing on modeling, inference, and interpreting machine learning models from a causal perspective to enhance their robustness and trustworthiness."
2727

2828
- role: PhD students
2929
members:
@@ -48,14 +48,15 @@
4848
start_date: Oct 2023
4949
5050
url: https://celrm.github.io/
51-
description: "My research focuses on generalization challenges in graph learning, particularly how input graphs serve as both data and computational models, and the implications of modifying them under different criteria."
51+
description: "My research addresses generalization challenges in graph learning, focusing on the dual role of input graphs as both data and computation structures, and the effects of modifying them under different criteria."
5252

5353
- name: Tom Jacobs
5454
last_name: Jacobs
5555
photo: c01toja.jpg
5656
start_date: Feb 2024
5757
58-
url: https://cispa.de/en/people/c01toja
58+
url: https://tomjacobs05.github.io/
59+
description: "My research interests lie at the intersection of understanding neural network training dynamics and designing efficient deep learning methods. Concretely, I work with theoretical tools such as mirror flow, regularization techniques, and mean field descriptions to study the effect of overparameterization and improve model efficiency."
5960

6061
- name: Rahul Nittala
6162
last_name: Nittala
@@ -78,4 +79,5 @@
7879
photo: c01mive.jpg
7980
start_date: Dec 2024
8081
81-
url: https://cispa.de/en/people/c01mive
82+
url: https://cispa.de/en/people/c01mive
83+
description: "I am interesting in making modern AI models efficient. In particular, I work on discovering and exploiting structure in Neural Networks (sparsity, low-dimensional representations and similar) for efficient training, fine-tuning and inference. I am a former full-time core developer for [PyTorch](https://github.com/pytorch/pytorch) and [Lightning Thunder](https://github.com/Lightning-AI/lightning-thunder). Check my [GitHub](https://github.com/nikitaved) to see what I work on now."

_layouts/homelay.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
</ol>
1313
<div class="carousel-inner">
1414
<div class="item active">
15-
<img src="{{ site.url }}{{ site.baseurl }}/images/group/1.jpg" alt="Slide 1"
15+
<img src="{{ site.url }}{{ site.baseurl }}/images/group/2.jpg" alt="03-10-2024"
1616
style="width: 100%; border-radius: 40px; margin-left: auto;">
1717
</div>
1818
<div class="item">
19-
<img src="{{ site.url }}{{ site.baseurl }}/images/group/2.jpg" alt="Slide 2"
19+
<img src="{{ site.url }}{{ site.baseurl }}/images/group/1.jpg" alt="21-10-2023"
2020
style="width: 100%; border-radius: 40px; margin-left: auto;">
2121
</div>
2222
</div>

0 commit comments

Comments
 (0)