Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing content to 24.04_linux-nvidia-internal-6.11-next #43

Open
wants to merge 20 commits into
base: 24.04_linux-nvidia-internal-6.11-next
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
9ee51e4
cppc_cpufreq: Use desired perf if feedback ctrs are 0 or unchanged
Sep 29, 2024
79cd5c7
cppc_cpufreq: Remove HiSilicon CPPC workaround
Sep 29, 2024
8f54d23
NVIDIA: SAUCE: ACPI/HMAT: Move HMAT messages to pr_debug()
djbw Jan 31, 2024
acf302a
perf cs-etm: Create decoders after both AUX and HW_ID search passes
James-A-Clark Jul 22, 2024
5244a6b
perf: cs-etm: Allocate queues for all CPUs
James-A-Clark Jul 22, 2024
05195a6
perf: cs-etm: Move traceid_list to each queue
James-A-Clark Jul 22, 2024
ee4f646
perf: cs-etm: Create decoders based on the trace ID mappings
James-A-Clark Jul 22, 2024
289df79
perf: cs-etm: Only save valid trace IDs into files
James-A-Clark Jul 22, 2024
179784c
perf: cs-etm: Support version 0.1 of HW_ID packets
James-A-Clark Jul 22, 2024
7c9e1fa
perf: cs-etm: Print queue number in raw trace dump
James-A-Clark Jul 22, 2024
fc0cb6b
coresight: Remove unused ETM Perf stubs
James-A-Clark Jul 22, 2024
307b282
coresight: Clarify comments around the PID of the sink owner
James-A-Clark Jul 22, 2024
27a688b
coresight: Move struct coresight_trace_id_map to common header
James-A-Clark Jul 22, 2024
9c56cbe
coresight: Expose map arguments in trace ID API
James-A-Clark Jul 22, 2024
4781120
coresight: Make CPU id map a property of a trace ID map
James-A-Clark Jul 22, 2024
5842418
coresight: Use per-sink trace ID maps for Perf sessions
James-A-Clark Jul 22, 2024
9806d89
coresight: Remove pending trace ID release mechanism
James-A-Clark Jul 22, 2024
3cc1cbc
coresight: Emit sink ID in the HW_ID packets
James-A-Clark Jul 22, 2024
30e8b85
coresight: Make trace ID map spinlock local to the map
James-A-Clark Jul 22, 2024
5c528de
NVIDIA: [Config]: Add PREEMPT and CPU_FREQ_DEFAULT annotations
jamieNguyenNVIDIA Jan 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions debian.nvidia-internal-6.11/config/annotations
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ CONFIG_CORESIGHT_TPDM note<'Required for Grace enablem
CONFIG_CORESIGHT_TRBE policy<{'arm64': 'm'}>
CONFIG_CORESIGHT_TRBE note<'Required for Grace enablement'>

CONFIG_PREEMPT_NONE policy<{'amd64': 'n', 'arm64': 'y'}>
CONFIG_PREEMPT_NONE note<'Required for NVIDIA workloads'>

CONFIG_PREEMPT_VOLUNTARY policy<{'amd64': 'y', 'arm64': 'n'}>
CONFIG_PREEMPT_VOLUNTARY note<'Required for NVIDIA workloads'>

CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND policy<{'arm64': 'n'}>
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND note<'Required for NVIDIA workloads'>

CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE policy<{'amd64': 'n', 'arm64': 'y'}>
CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE note<'Required for NVIDIA workloads'>

CONFIG_DRM_NOUVEAU policy<{'amd64': 'n', 'arm64': 'n'}>
CONFIG_DRM_NOUVEAU note<'Disable nouveau for NVIDIA kernels'>

Expand Down
24 changes: 12 additions & 12 deletions drivers/acpi/numa/hmat.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,9 +442,9 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
return -EINVAL;
}

pr_info("Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n",
hmat_loc->flags, hmat_data_type(type), ipds, tpds,
hmat_loc->entry_base_unit);
pr_debug("Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n",
hmat_loc->flags, hmat_data_type(type), ipds, tpds,
hmat_loc->entry_base_unit);

inits = (u32 *)(hmat_loc + 1);
targs = inits + ipds;
Expand All @@ -455,9 +455,9 @@ static __init int hmat_parse_locality(union acpi_subtable_headers *header,
value = hmat_normalize(entries[init * tpds + targ],
hmat_loc->entry_base_unit,
type);
pr_info(" Initiator-Target[%u-%u]:%u%s\n",
inits[init], targs[targ], value,
hmat_data_type_suffix(type));
pr_debug(" Initiator-Target[%u-%u]:%u%s\n",
inits[init], targs[targ], value,
hmat_data_type_suffix(type));

hmat_update_target(targs[targ], inits[init],
mem_hier, type, value);
Expand Down Expand Up @@ -485,9 +485,9 @@ static __init int hmat_parse_cache(union acpi_subtable_headers *header,
}

attrs = cache->cache_attributes;
pr_info("Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
cache->memory_PD, cache->cache_size, attrs,
cache->number_of_SMBIOShandles);
pr_debug("Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
cache->memory_PD, cache->cache_size, attrs,
cache->number_of_SMBIOShandles);

target = find_mem_target(cache->memory_PD);
if (!target)
Expand Down Expand Up @@ -546,9 +546,9 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
}

if (hmat_revision == 1)
pr_info("Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->reserved3, p->reserved4, p->flags, p->processor_PD,
p->memory_PD);
pr_debug("Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->reserved3, p->reserved4, p->flags, p->processor_PD,
p->memory_PD);
else
pr_info("Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n",
p->flags, p->processor_PD, p->memory_PD);
Expand Down
130 changes: 47 additions & 83 deletions drivers/cpufreq/cppc_cpufreq.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,33 +36,15 @@ static LIST_HEAD(cpu_data_list);

static bool boost_supported;

struct cppc_workaround_oem_info {
char oem_id[ACPI_OEM_ID_SIZE + 1];
char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
u32 oem_revision;
};

static struct cppc_workaround_oem_info wa_info[] = {
{
.oem_id = "HISI ",
.oem_table_id = "HIP07 ",
.oem_revision = 0,
}, {
.oem_id = "HISI ",
.oem_table_id = "HIP08 ",
.oem_revision = 0,
}
};

static struct cpufreq_driver cppc_cpufreq_driver;

#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
static enum {
FIE_UNSET = -1,
FIE_ENABLED,
FIE_DISABLED
} fie_disabled = FIE_UNSET;

#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
module_param(fie_disabled, int, 0444);
MODULE_PARM_DESC(fie_disabled, "Disable Frequency Invariance Engine (FIE)");

Expand All @@ -78,7 +60,6 @@ struct cppc_freq_invariance {
static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
static struct kthread_worker *kworker_fie;

static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
struct cppc_perf_fb_ctrs *fb_ctrs_t0,
struct cppc_perf_fb_ctrs *fb_ctrs_t1);
Expand Down Expand Up @@ -118,6 +99,9 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)

perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
&fb_ctrs);
if (!perf)
return;

cppc_fi->prev_perf_fb_ctrs = fb_ctrs;

perf <<= SCHED_CAPACITY_SHIFT;
Expand Down Expand Up @@ -724,13 +708,31 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
delta_delivered = get_delta(fb_ctrs_t1->delivered,
fb_ctrs_t0->delivered);

/* Check to avoid divide-by zero and invalid delivered_perf */
/*
* Avoid divide-by zero and unchanged feedback counters.
* Leave it for callers to handle.
*/
if (!delta_reference || !delta_delivered)
return cpu_data->perf_ctrls.desired_perf;
return 0;

return (reference_perf * delta_delivered) / delta_reference;
}

static int cppc_get_perf_ctrs_sample(int cpu,
struct cppc_perf_fb_ctrs *fb_ctrs_t0,
struct cppc_perf_fb_ctrs *fb_ctrs_t1)
{
int ret;

ret = cppc_get_perf_ctrs(cpu, fb_ctrs_t0);
if (ret)
return ret;

udelay(2); /* 2usec delay between sampling */

return cppc_get_perf_ctrs(cpu, fb_ctrs_t1);
}

static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
{
struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
Expand All @@ -746,18 +748,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)

cpufreq_cpu_put(policy);

ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
if (ret)
return 0;

udelay(2); /* 2usec delay between sampling */

ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
if (ret)
return 0;
ret = cppc_get_perf_ctrs_sample(cpu, &fb_ctrs_t0, &fb_ctrs_t1);
if (ret) {
if (ret == -EFAULT)
/* Any of the associated CPPC regs is 0. */
goto out_invalid_counters;
else
return 0;
}

delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
&fb_ctrs_t1);
if (!delivered_perf)
goto out_invalid_counters;

return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);

out_invalid_counters:
/*
* Feedback counters could be unchanged or 0 when a cpu enters a
* low-power idle state, e.g. clock-gated or power-gated.
* Use desired perf for reflecting frequency. Get the latest register
* value first as some platforms may update the actual delivered perf
* there; if failed, resort to the cached desired perf.
*/
if (cppc_get_desired_perf(cpu, &delivered_perf))
delivered_perf = cpu_data->perf_ctrls.desired_perf;

return cppc_perf_to_khz(&cpu_data->perf_caps, delivered_perf);
}
Expand Down Expand Up @@ -812,65 +828,13 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
.name = "cppc_cpufreq",
};

/*
* HISI platform does not support delivered performance counter and
* reference performance counter. It can calculate the performance using the
* platform specific mechanism. We reuse the desired performance register to
* store the real performance calculated by the platform.
*/
static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct cppc_cpudata *cpu_data;
u64 desired_perf;
int ret;

if (!policy)
return -ENODEV;

cpu_data = policy->driver_data;

cpufreq_cpu_put(policy);

ret = cppc_get_desired_perf(cpu, &desired_perf);
if (ret < 0)
return -EIO;

return cppc_perf_to_khz(&cpu_data->perf_caps, desired_perf);
}

static void cppc_check_hisi_workaround(void)
{
struct acpi_table_header *tbl;
acpi_status status = AE_OK;
int i;

status = acpi_get_table(ACPI_SIG_PCCT, 0, &tbl);
if (ACPI_FAILURE(status) || !tbl)
return;

for (i = 0; i < ARRAY_SIZE(wa_info); i++) {
if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) &&
!memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) &&
wa_info[i].oem_revision == tbl->oem_revision) {
/* Overwrite the get() callback */
cppc_cpufreq_driver.get = hisi_cppc_cpufreq_get_rate;
fie_disabled = FIE_DISABLED;
break;
}
}

acpi_put_table(tbl);
}

static int __init cppc_cpufreq_init(void)
{
int ret;

if (!acpi_cpc_valid())
return -ENODEV;

cppc_check_hisi_workaround();
cppc_freq_invariance_init();
populate_efficiency_class();

Expand Down
37 changes: 25 additions & 12 deletions drivers/hwtracing/coresight/coresight-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,23 +487,25 @@ struct coresight_device *coresight_get_sink(struct list_head *path)
return csdev;
}

u32 coresight_get_sink_id(struct coresight_device *csdev)
{
if (!csdev->ea)
return 0;

/*
* See function etm_perf_add_symlink_sink() to know where
* this comes from.
*/
return (u32) (unsigned long) csdev->ea->var;
}

static int coresight_sink_by_id(struct device *dev, const void *data)
{
struct coresight_device *csdev = to_coresight_device(dev);
unsigned long hash;

if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {

if (!csdev->ea)
return 0;
/*
* See function etm_perf_add_symlink_sink() to know where
* this comes from.
*/
hash = (unsigned long)csdev->ea->var;

if ((u32)hash == *(u32 *)data)
csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
if (coresight_get_sink_id(csdev) == *(u32 *)data)
return 1;
}

Expand Down Expand Up @@ -902,6 +904,7 @@ static void coresight_device_release(struct device *dev)
struct coresight_device *csdev = to_coresight_device(dev);

fwnode_handle_put(csdev->dev.fwnode);
free_percpu(csdev->perf_sink_id_map.cpu_map);
kfree(csdev);
}

Expand Down Expand Up @@ -1159,6 +1162,16 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
dev_set_name(&csdev->dev, "%s", desc->name);

if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
spin_lock_init(&csdev->perf_sink_id_map.lock);
csdev->perf_sink_id_map.cpu_map = alloc_percpu(atomic_t);
if (!csdev->perf_sink_id_map.cpu_map) {
kfree(csdev);
ret = -ENOMEM;
goto err_out;
}
}
/*
* Make sure the device registration and the connection fixup
* are synchronised, so that we don't see uninitialised devices
Expand Down
3 changes: 2 additions & 1 deletion drivers/hwtracing/coresight/coresight-dummy.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ DEFINE_CORESIGHT_DEVLIST(source_devs, "dummy_source");
DEFINE_CORESIGHT_DEVLIST(sink_devs, "dummy_sink");

static int dummy_source_enable(struct coresight_device *csdev,
struct perf_event *event, enum cs_mode mode)
struct perf_event *event, enum cs_mode mode,
__maybe_unused struct coresight_trace_id_map *id_map)
{
dev_dbg(csdev->dev.parent, "Dummy source enabled\n");

Expand Down
Loading