Home Home > GIT Browse
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2003-06-13 04:27:34 -0700
committerLinus Torvalds <torvalds@home.transmeta.com>2003-06-13 04:27:34 -0700
commit3ef076bb685a461bbaff37a1f06010fc4d7ce733 (patch)
tree82fd2d57a0c669cda8ed8980dd7fbd6f9f15be50
parentccfd4cc59696f0e9e749395cb97e1ae5d8c76ad6 (diff)
[PATCH] x86-64 merge
This brings the x86-64 port uptodate. Only architecture specific changes. The biggest change is the forward port of the 2.4 timing code with full HPET support. This should improve timing stability on some Opteron boxes considerably. Also add the optimized low level functions from 2.4 (clear_page, copy_page, memcpy, csum_copy etc.) They were supposed to be merged earlier, but got dropped due to some SNAFU. Especially the clear_page changes should improve performance considerably, because the old version used write combining writes which put all the new process data out of cache. New version serves cache hot. Also some other bugfixes. Full changelog: - Re-add some lost patches: improved copy_page, clear_page, memset, memcpy, csum_copy from 2.4. - New timing code from 2.4 (Bryan O'Sullivan, John Stultz, Vojtech Pavlik) - Use correct MSR to write northbridge MCE configuration - Fix and reenable simics check in APIC timer calibration - Check if BIOS enabled APIC and don't use APIC mode if not. - Remove some obsolete code in APIC handling. - Fix potential races in the IOMMU code. - Don't print backtrace twice on oops. - Fix compilation of swsuspend (Pavel Machek) - Add oops locking to kernel page faults. - Use prefetcht0 for C level kernel prefetches.
-rw-r--r--arch/x86_64/Kconfig12
-rw-r--r--arch/x86_64/ia32/ia32entry.S1
-rw-r--r--arch/x86_64/kernel/acpi/boot.c6
-rw-r--r--arch/x86_64/kernel/apic.c8
-rw-r--r--arch/x86_64/kernel/bluesmoke.c14
-rw-r--r--arch/x86_64/kernel/pci-gart.c15
-rw-r--r--arch/x86_64/kernel/process.c6
-rw-r--r--arch/x86_64/kernel/smpboot.c11
-rw-r--r--arch/x86_64/kernel/time.c258
-rw-r--r--arch/x86_64/kernel/traps.c2
-rw-r--r--arch/x86_64/kernel/vsyscall.c18
-rw-r--r--arch/x86_64/lib/clear_page.S32
-rw-r--r--arch/x86_64/lib/copy_page.S149
-rw-r--r--arch/x86_64/lib/csum-copy.S294
-rw-r--r--arch/x86_64/lib/csum-partial.c100
-rw-r--r--arch/x86_64/lib/csum-wrappers.c62
-rw-r--r--arch/x86_64/lib/memcpy.S95
-rw-r--r--arch/x86_64/lib/memset.S57
-rw-r--r--arch/x86_64/vmlinux.lds.S6
-rw-r--r--include/asm-x86_64/checksum.h12
-rw-r--r--include/asm-x86_64/fixmap.h2
-rw-r--r--include/asm-x86_64/mc146818rtc.h5
-rw-r--r--include/asm-x86_64/processor.h2
-rw-r--r--include/asm-x86_64/proto.h2
-rw-r--r--include/asm-x86_64/timex.h30
-rw-r--r--include/asm-x86_64/vsyscall.h18
26 files changed, 735 insertions, 482 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 0bc2d8c94181..10f78014d6e6 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -52,6 +52,18 @@ config EARLY_PRINTK
klogd/syslogd or the X server. You should normally N here, unless
you want to debug such a crash.
+config HPET_TIMER
+ bool
+ default y
+ help
+ Use the IA-PC HPET (High Precision Event Timer) to manage
+ time in preference to the PIT and RTC, if a HPET is
+ present. The HPET provides a stable time base on SMP
+ systems, unlike the RTC, but it is more expensive to access,
+ as it is off-chip. You can find the HPET spec at
+ <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
+
+ If unsure, say Y.
config GENERIC_ISA_DMA
bool
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index a3cb3f13e9a7..4032f0720c2a 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -47,6 +47,7 @@
ENTRY(ia32_cstar_target)
swapgs
movl %esp,%r8d
+ movq %r8,%gs:pda_oldrsp
movq %gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1
diff --git a/arch/x86_64/kernel/acpi/boot.c b/arch/x86_64/kernel/acpi/boot.c
index e88862d9127b..d222d13acd7e 100644
--- a/arch/x86_64/kernel/acpi/boot.c
+++ b/arch/x86_64/kernel/acpi/boot.c
@@ -244,9 +244,11 @@ acpi_parse_hpet (
return -1;
}
- hpet.address = hpet_tbl->addr.addrl | ((long) hpet_tbl->addr.addrh << 32);
+ vxtime.hpet_address = hpet_tbl->addr.addrl |
+ ((long) hpet_tbl->addr.addrh << 32);
- printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n", hpet_tbl->id, hpet.address);
+ printk(KERN_INFO "acpi: HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, vxtime.hpet_address);
return 0;
}
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 98440f87fc84..1addbb749ec2 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -690,7 +690,13 @@ static void setup_APIC_timer(unsigned int clocks)
}
/* wait for irq slice */
- {
+ if (vxtime.hpet_address) {
+ int trigger = hpet_readl(HPET_T0_CMP);
+ while (hpet_readl(HPET_COUNTER) >= trigger)
+ /* do nothing */ ;
+ while (hpet_readl(HPET_COUNTER) < trigger)
+ /* do nothing */ ;
+ } else {
int c1, c2;
outb_p(0x00, 0x43);
c2 = inb_p(0x40);
diff --git a/arch/x86_64/kernel/bluesmoke.c b/arch/x86_64/kernel/bluesmoke.c
index 00f2abe0ba46..392b8bab96ee 100644
--- a/arch/x86_64/kernel/bluesmoke.c
+++ b/arch/x86_64/kernel/bluesmoke.c
@@ -363,22 +363,12 @@ static void __init k8_mcheck_init(struct cpuinfo_x86 *c)
machine_check_vector = k8_machine_check;
for (i = 0; i < banks; i++) {
u64 val = ((1UL<<i) & disabled_banks) ? 0 : ~0UL;
+ if (val && i == 4)
+ val = k8_nb_flags;
wrmsrl(MSR_IA32_MC0_CTL+4*i, val);
wrmsrl(MSR_IA32_MC0_STATUS+4*i,0);
}
- nb = find_k8_nb();
- if (nb != NULL) {
- u32 reg, reg2;
- pci_read_config_dword(nb, 0x40, &reg);
- pci_write_config_dword(nb, 0x40, k8_nb_flags);
- pci_read_config_dword(nb, 0x44, &reg2);
- pci_write_config_dword(nb, 0x44, reg2);
- printk(KERN_INFO "Machine Check for K8 Northbridge %d enabled (%x,%x)\n",
- nb->devfn, reg, reg2);
- ignored_banks |= (1UL<<4);
- }
-
set_in_cr4(X86_CR4_MCE);
if (mcheck_interval && (smp_processor_id() == 0)) {
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 8e69eb12d341..12047fff5230 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -173,12 +173,10 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
if (iommu_page == -1)
goto error;
- /* Fill in the GATT, allocating pages as needed. */
+ /* Fill in the GATT */
for (i = 0; i < size; i++) {
unsigned long phys_mem;
void *mem = memory + i*PAGE_SIZE;
- if (i > 0)
- atomic_inc(&virt_to_page(mem)->count);
phys_mem = virt_to_phys(mem);
BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
@@ -206,16 +204,14 @@ void pci_free_consistent(struct pci_dev *hwdev, size_t size,
size = round_up(size, PAGE_SIZE);
if (bus >= iommu_bus_base && bus <= iommu_bus_base + iommu_size) {
unsigned pages = size >> PAGE_SHIFT;
+ int i;
iommu_page = (bus - iommu_bus_base) >> PAGE_SHIFT;
vaddr = __va(GPTE_DECODE(iommu_gatt_base[iommu_page]));
-#ifdef CONFIG_IOMMU_DEBUG
- int i;
for (i = 0; i < pages; i++) {
u64 pte = iommu_gatt_base[iommu_page + i];
BUG_ON((pte & GPTE_VALID) == 0);
iommu_gatt_base[iommu_page + i] = 0;
}
-#endif
free_iommu(iommu_page, pages);
}
free_pages((unsigned long)vaddr, get_order(size));
@@ -319,11 +315,6 @@ dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size, int dir)
*/
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
-#ifdef CONFIG_IOMMU_DEBUG
- /* paranoia check */
- BUG_ON(GPTE_DECODE(iommu_gatt_base[iommu_page+i]) != phys_mem);
-#endif
-
#ifdef CONFIG_IOMMU_LEAK
/* XXX need eventually caller of pci_map_sg */
if (iommu_leak_tab)
@@ -350,7 +341,6 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
return;
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
npages = round_up(size + (dma_addr & ~PAGE_MASK), PAGE_SIZE) >> PAGE_SHIFT;
-#ifdef CONFIG_IOMMU_DEBUG
int i;
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = 0;
@@ -359,7 +349,6 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
iommu_leak_tab[iommu_page + i] = 0;
#endif
}
-#endif
free_iommu(iommu_page, npages);
}
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index e2c5421dcb1f..5a5b2fe81e6c 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -150,7 +150,7 @@ __setup("idle=", idle_setup);
/* Prints also some state that isn't saved in the pt_regs */
-void show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned int fsindex,gsindex;
@@ -192,7 +192,11 @@ void show_regs(struct pt_regs * regs)
fs,fsindex,gs,gsindex,shadowgs);
printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+}
+void show_regs(struct pt_regs *regs)
+{
+ __show_regs(regs);
show_trace(&regs->rsp);
}
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index d554126c5e64..00ee45c7a21b 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -67,6 +67,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
/* Set when the idlers are all forked */
int smp_threads_ready;
+extern void time_init_smp(void);
+
/*
* Trampoline 80x86 program as an array.
*/
@@ -760,7 +762,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
if (APIC_init_uniprocessor())
printk(KERN_NOTICE "Local APIC not detected."
" Using dummy APIC emulation.\n");
- return;
+ goto smp_done;
}
/*
@@ -784,7 +786,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
cpu_online_map = phys_cpu_present_map = 1;
phys_cpu_present_map = 1;
disable_apic = 1;
- return;
+ goto smp_done;
}
verify_local_APIC();
@@ -799,7 +801,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
cpu_online_map = phys_cpu_present_map = 1;
phys_cpu_present_map = 1;
disable_apic = 1;
- return;
+ goto smp_done;
}
connect_bsp_APIC();
@@ -883,6 +885,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
*/
if (cpu_has_tsc && cpucount)
synchronize_tsc_bp();
+
+ smp_done:
+ time_init_smp();
}
/* These are wrappers to interface to the new boot process. Someone
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 18c60397a8ea..aaa976327a82 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -24,6 +24,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/bcd.h>
+#include <asm/pgtable.h>
#include <asm/vsyscall.h>
#include <asm/timex.h>
#ifdef CONFIG_X86_LOCAL_APIC
@@ -35,41 +36,61 @@ u64 jiffies_64 = INITIAL_JIFFIES;
extern int using_apic_timer;
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
extern int using_apic_timer;
extern void smp_local_timer_interrupt(struct pt_regs * regs);
+#undef HPET_HACK_ENABLE_DANGEROUS
+
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
unsigned long hpet_period; /* fsecs / HPET clock */
unsigned long hpet_tick; /* HPET clocks / interrupt */
-int hpet_report_lost_ticks; /* command line option */
+unsigned long vxtime_hz = 1193182;
+int report_lost_ticks; /* command line option */
-struct hpet_data __hpet __section_hpet; /* address, quotient, trigger, hz */
+struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
struct timespec __xtime __section_xtime;
struct timezone __sys_tz __section_sys_tz;
+static inline void rdtscll_sync(unsigned long *tsc)
+{
+#ifdef CONFIG_SMP
+ sync_core();
+#endif
+ rdtscll(*tsc);
+}
+
/*
* do_gettimeoffset() returns microseconds since last timer interrupt was
* triggered by hardware. A memory read of HPET is slower than a register read
* of TSC, but much more reliable. It's also synchronized to the timer
* interrupt. Note that do_gettimeoffset() may return more than hpet_tick, if a
- * timer interrupt has happened already, but hpet.trigger wasn't updated yet.
+ * timer interrupt has happened already, but vxtime.trigger wasn't updated yet.
* This is not a problem, because jiffies hasn't updated either. They are bound
* together by xtime_lock.
*/
-inline unsigned int do_gettimeoffset(void)
+static inline unsigned int do_gettimeoffset_tsc(void)
{
unsigned long t;
- sync_core();
- rdtscll(t);
- return (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset;
+ unsigned long x;
+ rdtscll_sync(&t);
+ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
+ return x;
+}
+
+static inline unsigned int do_gettimeoffset_hpet(void)
+{
+ return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32;
}
+unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
+
/*
* This version of gettimeofday() has microsecond resolution and better than
* microsecond precision, as we're using at least a 10 MHz (usually 14.31818
@@ -87,7 +108,8 @@ void do_gettimeofday(struct timeval *tv)
sec = xtime.tv_sec;
usec = xtime.tv_nsec / 1000;
- t = (jiffies - wall_jiffies) * (1000000L / HZ) + do_gettimeoffset();
+ t = (jiffies - wall_jiffies) * (1000000L / HZ) +
+ do_gettimeoffset();
usec += t;
} while (read_seqretry(&xtime_lock, seq));
@@ -107,7 +129,7 @@ void do_settimeofday(struct timeval *tv)
write_seqlock_irq(&xtime_lock);
tv->tv_usec -= do_gettimeoffset() +
- (jiffies - wall_jiffies) * tick_usec;
+ (jiffies - wall_jiffies) * (USEC_PER_SEC/HZ);
while (tv->tv_usec < 0) {
tv->tv_usec += 1000000;
@@ -178,8 +200,8 @@ static void set_rtc_mmss(unsigned long nowtime)
CMOS_WRITE(real_seconds, RTC_SECONDS);
CMOS_WRITE(real_minutes, RTC_MINUTES);
} else
- printk(KERN_WARNING "time.c: can't update CMOS clock from %d to %d\n",
- cmos_minutes, real_minutes);
+ printk(KERN_WARNING "time.c: can't update CMOS clock "
+ "from %d to %d\n", cmos_minutes, real_minutes);
/*
* The following flags have to be released exactly in this order, otherwise the
@@ -198,6 +220,8 @@ static void set_rtc_mmss(unsigned long nowtime)
static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
static unsigned long rtc_update = 0;
+ unsigned long tsc, lost = 0;
+ int delay, offset = 0;
/*
* Here we are in the timer irq handler. We have irqs locally disabled (so we
@@ -208,17 +232,53 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
write_seqlock(&xtime_lock);
- {
- unsigned long t;
+ if (vxtime.hpet_address) {
+ offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ delay = hpet_readl(HPET_COUNTER) - offset;
+ } else {
+ spin_lock(&i8253_lock);
+ outb_p(0x00, 0x43);
+ delay = inb_p(0x40);
+ delay |= inb(0x40) << 8;
+ spin_unlock(&i8253_lock);
+ delay = LATCH - 1 - delay;
+ }
- sync_core();
- rdtscll(t);
- hpet.offset = (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset - 1000000L / HZ;
- if (hpet.offset >= 1000000L / HZ)
- hpet.offset = 0;
- hpet.ticks = min_t(long, max_t(long, (t - hpet.last_tsc) * (1000000L / HZ) / (1000000L / HZ - hpet.offset),
- cpu_khz * 1000/HZ * 15 / 16), cpu_khz * 1000/HZ * 16 / 15);
- hpet.last_tsc = t;
+ rdtscll_sync(&tsc);
+
+ if (vxtime.mode == VXTIME_HPET) {
+ if (offset - vxtime.last > hpet_tick) {
+ lost = (offset - vxtime.last) / hpet_tick - 1;
+ }
+
+ vxtime.last = offset;
+ } else {
+ offset = (((tsc - vxtime.last_tsc) *
+ vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
+
+ if (offset < 0)
+ offset = 0;
+
+ if (offset > (USEC_PER_SEC / HZ)) {
+ lost = offset / (USEC_PER_SEC / HZ);
+ offset %= (USEC_PER_SEC / HZ);
+ }
+
+ vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
+
+ if ((((tsc - vxtime.last_tsc) *
+ vxtime.tsc_quot) >> 32) < offset)
+ vxtime.last_tsc = tsc -
+ (((long) offset << 32) / vxtime.tsc_quot) - 1;
+ }
+
+ if (lost) {
+ if (report_lost_ticks)
+ printk(KERN_WARNING "time.c: Lost %ld timer "
+ "tick(s)! (rip %016lx)\n",
+ (offset - vxtime.last) / hpet_tick - 1,
+ regs->rip);
+ jiffies += lost;
}
/*
@@ -244,7 +304,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
* If we have an externally synchronized Linux clock, then update CMOS clock
* accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
* closest to exactly 500 ms before the next second. If the update fails, we
- * don'tcare, as it'll be updated on the next turn, and the problem (time way
+ * don't care, as it'll be updated on the next turn, and the problem (time way
* off) isn't likely to go away much sooner anyway.
*/
@@ -263,6 +323,7 @@ unsigned long get_cmos_time(void)
{
unsigned int timeout, year, mon, day, hour, min, sec;
unsigned char last, this;
+ unsigned long flags;
/*
* The Linux interpretation of the CMOS clock register contents: When the
@@ -272,7 +333,7 @@ unsigned long get_cmos_time(void)
* standard 8.3 MHz ISA bus.
*/
- spin_lock(&rtc_lock);
+ spin_lock_irqsave(&rtc_lock, flags);
timeout = 1000000;
last = this = 0;
@@ -295,7 +356,7 @@ unsigned long get_cmos_time(void)
mon = CMOS_READ(RTC_MONTH);
year = CMOS_READ(RTC_YEAR);
- spin_unlock(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
/*
* We know that x86-64 always uses BCD format, no need to check the config
@@ -326,6 +387,32 @@ unsigned long get_cmos_time(void)
#define TICK_COUNT 100000000
+static unsigned int __init hpet_calibrate_tsc(void)
+{
+ int tsc_start, hpet_start;
+ int tsc_now, hpet_now;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ local_irq_disable();
+
+ hpet_start = hpet_readl(HPET_COUNTER);
+ rdtscl(tsc_start);
+
+ do {
+ local_irq_disable();
+ hpet_now = hpet_readl(HPET_COUNTER);
+ sync_core();
+ rdtscl(tsc_now);
+ local_irq_restore(flags);
+ } while ((tsc_now - tsc_start) < TICK_COUNT &&
+ (hpet_now - hpet_start) < TICK_COUNT);
+
+ return (tsc_now - tsc_start) * 1000000000L
+ / ((hpet_now - hpet_start) * hpet_period / 1000);
+}
+
+
/*
* pit_calibrate_tsc() uses the speaker output (channel 2) of
* the PIT. This is better than using the timer interrupt output,
@@ -339,10 +426,9 @@ static unsigned int __init pit_calibrate_tsc(void)
unsigned long start, end;
unsigned long flags;
- outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+ spin_lock_irqsave(&i8253_lock, flags);
- local_irq_save(flags);
- local_irq_disable();
+ outb((inb(0x61) & ~0x02) | 0x01, 0x61);
outb(0xb0, 0x43);
outb((1193182 / (1000 / 50)) & 0xff, 0x42);
@@ -353,42 +439,146 @@ static unsigned int __init pit_calibrate_tsc(void)
sync_core();
rdtscll(end);
-
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&i8253_lock, flags);
return (end - start) / 50;
}
+static int hpet_init(void)
+{
+ unsigned int cfg, id;
+
+ if (!vxtime.hpet_address)
+ return -1;
+ set_fixmap_nocache(FIX_HPET_BASE, vxtime.hpet_address);
+
+/*
+ * Read the period, compute tick and quotient.
+ */
+
+ id = hpet_readl(HPET_ID);
+
+ if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) ||
+ !(id & HPET_ID_LEGSUP))
+ return -1;
+
+ hpet_period = hpet_readl(HPET_PERIOD);
+ if (hpet_period < 100000 || hpet_period > 100000000)
+ return -1;
+
+ hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
+ hpet_period;
+
+/*
+ * Stop the timers and reset the main counter.
+ */
+
+ cfg = hpet_readl(HPET_CFG);
+ cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
+ hpet_writel(cfg, HPET_CFG);
+ hpet_writel(0, HPET_COUNTER);
+ hpet_writel(0, HPET_COUNTER + 4);
+
+/*
+ * Set up timer 0, as periodic with first interrupt to happen at hpet_tick,
+ * and period also hpet_tick.
+ */
+
+ hpet_writel(HPET_T0_ENABLE | HPET_T0_PERIODIC | HPET_T0_SETVAL |
+ HPET_T0_32BIT, HPET_T0_CFG);
+ hpet_writel(hpet_tick, HPET_T0_CMP);
+ hpet_writel(hpet_tick, HPET_T0_CMP);
+
+/*
+ * Go!
+ */
+
+ cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY;
+ hpet_writel(cfg, HPET_CFG);
+
+ return 0;
+}
+
void __init pit_init(void)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&i8253_lock, flags);
outb_p(0x34, 0x43); /* binary, mode 2, LSB/MSB, ch 0 */
outb_p(LATCH & 0xff, 0x40); /* LSB */
outb_p(LATCH >> 8, 0x40); /* MSB */
+ spin_unlock_irqrestore(&i8253_lock, flags);
}
int __init time_setup(char *str)
{
- hpet_report_lost_ticks = 1;
+ report_lost_ticks = 1;
return 1;
}
-static struct irqaction irq0 = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
+static struct irqaction irq0 = {
+ timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL
+};
extern void __init config_acpi_tables(void);
void __init time_init(void)
{
+ char *timename;
+
+#ifdef HPET_HACK_ENABLE_DANGEROUS
+ if (!vxtime.hpet_address) {
+ printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
+ "manually!\n");
+ outl(0x800038a0, 0xcf8);
+ outl(0xff000001, 0xcfc);
+ outl(0x800038a0, 0xcf8);
+ hpet_address = inl(0xcfc) & 0xfffffffe;
+ printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
+ "at %#lx.\n", hpet_address);
+ }
+#endif
+
xtime.tv_sec = get_cmos_time();
xtime.tv_nsec = 0;
+ if (!hpet_init()) {
+ vxtime_hz = (1000000000000000L + hpet_period / 2) /
+ hpet_period;
+ cpu_khz = hpet_calibrate_tsc();
+ timename = "HPET";
+ } else {
pit_init();
- printk(KERN_INFO "time.c: Using 1.1931816 MHz PIT timer.\n");
cpu_khz = pit_calibrate_tsc();
+ timename = "PIT";
+ }
+
+ printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n",
+ vxtime_hz / 1000000, vxtime_hz % 1000000, timename);
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
- hpet.ticks = cpu_khz * (1000 / HZ);
- rdtscll(hpet.last_tsc);
+ vxtime.mode = VXTIME_TSC;
+ vxtime.quot = (1000000L << 32) / vxtime_hz;
+ vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+ vxtime.hz = vxtime_hz;
+ rdtscll_sync(&vxtime.last_tsc);
setup_irq(0, &irq0);
}
+void __init time_init_smp(void)
+{
+ char *timetype;
+
+ if (vxtime.hpet_address) {
+ timetype = "HPET";
+ vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick;
+ vxtime.mode = VXTIME_HPET;
+ do_gettimeoffset = do_gettimeoffset_hpet;
+ } else {
+ timetype = "PIT/TSC";
+ vxtime.mode = VXTIME_TSC;
+ }
+ printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype);
+}
+
__setup("report_lost_ticks", time_setup);
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index e1620dc69800..24d5862bdcd7 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -263,7 +263,7 @@ void show_registers(struct pt_regs *regs)
rsp = regs->rsp;
printk("CPU %d ", cpu);
- show_regs(regs);
+ __show_regs(regs);
printk("Process %s (pid: %d, stackpage=%08lx)\n",
cur->comm, cur->pid, 4096+(unsigned long)cur);
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 4162dd9bf8dc..cb52162ae495 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -78,13 +78,21 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
do {
sequence = read_seqbegin(&__xtime_lock);
- sync_core();
- rdtscll(t);
sec = __xtime.tv_sec;
usec = (__xtime.tv_nsec / 1000) +
- (__jiffies - __wall_jiffies) * (1000000 / HZ) +
- (t - __hpet.last_tsc) * (1000000 / HZ) / __hpet.ticks + __hpet.offset;
-
+ (__jiffies - __wall_jiffies) * (1000000 / HZ);
+
+ if (__vxtime.mode == VXTIME_TSC) {
+ sync_core();
+ rdtscll(t);
+ usec += ((t - __vxtime.last_tsc) *
+ __vxtime.tsc_quot) >> 32;
+ } else {
+#if 0
+ usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) -
+ __vxtime.last) * __vxtime.quot) >> 32;
+#endif
+ }
} while (read_seqretry(&__xtime_lock, sequence));
tv->tv_sec = sec + usec / 1000000;
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index b34c34f85fc6..5867f704c0ad 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -1,19 +1,18 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- */
- #include <linux/linkage.h>
/*
* Zero a page.
* rdi page
*/
-ENTRY(clear_page)
+ .globl clear_page
+ .p2align 4
+clear_page:
xorl %eax,%eax
- movl $4096/128,%ecx
- movl $128,%edx
-loop:
+ movl $4096/64,%ecx
+ .p2align 4
+.Lloop:
+ decl %ecx
#define PUT(x) movq %rax,x*8(%rdi)
- PUT(0)
+ movq %rax,(%rdi)
PUT(1)
PUT(2)
PUT(3)
@@ -21,17 +20,8 @@ loop:
PUT(5)
PUT(6)
PUT(7)
- PUT(8)
- PUT(9)
- PUT(10)
- PUT(11)
- PUT(12)
- PUT(13)
- PUT(14)
- PUT(15)
- addq %rdx,%rdi
- decl %ecx
- jnz loop
- sfence
+ leaq 64(%rdi),%rdi
+ jnz .Lloop
+ nop
ret
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index 71d3c57441fa..72eb9314bd37 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -1,74 +1,91 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
- */
+/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
- #include <linux/linkage.h>
- #include <linux/config.h>
- #ifdef CONFIG_PREEMPT
- #warning "check your fpu context saving!"
- #endif
+/* Don't use streaming store because it's better when the target
+ ends up in cache. */
+
+/* Could vary the prefetch distance based on SMP/UP */
-/*
- * Copy a page.
- *
- * rdi destination page
- * rsi source page
- *
- * src/dst must be aligned to 16 bytes.
- *
- * Warning: in case of super lazy FP save this needs to be preempt_stop
- */
-
.globl copy_page
- .p2align
+ .p2align 4
copy_page:
- prefetchnta (%rsi)
- prefetchnta 64(%rsi)
-
- movq %rsp,%rax
- subq $16*4,%rsp
- andq $~15,%rsp
- movdqa %xmm0,(%rsp)
- movdqa %xmm1,16(%rsp)
- movdqa %xmm2,32(%rsp)
- movdqa %xmm3,48(%rsp)
-
- movl $(4096/128)-2,%ecx
- movl $128,%edx
-loop:
- prefetchnta (%rsi)
- prefetchnta 64(%rsi)
-loop_no_prefetch:
- movdqa (%rsi),%xmm0
- movdqa 16(%rsi),%xmm1
- movdqa 32(%rsi),%xmm2
- movdqa 48(%rsi),%xmm3
- movntdq %xmm0,(%rdi)
- movntdq %xmm1,16(%rdi)
- movntdq %xmm2,32(%rdi)
- movntdq %xmm3,48(%rdi)
-
- movdqa 64(%rsi),%xmm0
- movdqa 80(%rsi),%xmm1
- movdqa 96(%rsi),%xmm2
- movdqa 112(%rsi),%xmm3
- movntdq %xmm0,64(%rdi)
- movntdq %xmm1,80(%rdi)
- movntdq %xmm2,96(%rdi)
- movntdq %xmm3,112(%rdi)
+ prefetch (%rsi)
+ prefetch 1*64(%rsi)
+ prefetch 2*64(%rsi)
+ prefetch 3*64(%rsi)
+ prefetch 4*64(%rsi)
+ prefetchw (%rdi)
+ prefetchw 1*64(%rdi)
+ prefetchw 2*64(%rdi)
+ prefetchw 3*64(%rdi)
+ prefetchw 4*64(%rdi)
+
+ subq $3*8,%rsp
+ movq %rbx,(%rsp)
+ movq %r12,1*8(%rsp)
+ movq %r13,2*8(%rsp)
+
+ movl $(4096/64)-5,%ecx
+ .p2align 4
+.Loop64:
+ dec %rcx
+
+ movq (%rsi), %rax
+ movq 8 (%rsi), %rbx
+ movq 16 (%rsi), %rdx
+ movq 24 (%rsi), %r8
+ movq 32 (%rsi), %r9
+ movq 40 (%rsi), %r10
+ movq 48 (%rsi), %r11
+ movq 56 (%rsi), %r12
+
+ prefetch 5*64(%rsi)
+
+ movq %rax, (%rdi)
+ movq %rbx, 8 (%rdi)
+ movq %rdx, 16 (%rdi)
+ movq %r8, 24 (%rdi)
+ movq %r9, 32 (%rdi)
+ movq %r10, 40 (%rdi)
+ movq %r11, 48 (%rdi)
+ movq %r12, 56 (%rdi)
- addq %rdx,%rdi
- addq %rdx,%rsi
+ prefetchw 5*64(%rdi)
+
+ leaq 64 (%rsi), %rsi
+ leaq 64 (%rdi), %rdi
+
+ jnz .Loop64
+
+ movl $5,%ecx
+ .p2align 4
+.Loop2:
decl %ecx
- jns loop
- cmpl $-1,%ecx
- je loop_no_prefetch
-
- sfence
- movdqa (%rsp),%xmm0
- movdqa 16(%rsp),%xmm1
- movdqa 32(%rsp),%xmm2
- movdqa 48(%rsp),%xmm3
- movq %rax,%rsp
+ movq (%rsi), %rax
+ movq 8 (%rsi), %rbx
+ movq 16 (%rsi), %rdx
+ movq 24 (%rsi), %r8
+ movq 32 (%rsi), %r9
+ movq 40 (%rsi), %r10
+ movq 48 (%rsi), %r11
+ movq 56 (%rsi), %r12
+
+ movq %rax, (%rdi)
+ movq %rbx, 8 (%rdi)
+ movq %rdx, 16 (%rdi)
+ movq %r8, 24 (%rdi)
+ movq %r9, 32 (%rdi)
+ movq %r10, 40 (%rdi)
+ movq %r11, 48 (%rdi)
+ movq %r12, 56 (%rdi)
+
+ leaq 64(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+
+ jnz .Loop2
+
+ movq (%rsp),%rbx
+ movq 1*8(%rsp),%r12
+ movq 2*8(%rsp),%r13
+ addq $3*8,%rsp
ret
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
index ccd89d25fe85..7aa2cec7ab8f 100644
--- a/arch/x86_64/lib/csum-copy.S
+++ b/arch/x86_64/lib/csum-copy.S
@@ -1,5 +1,5 @@
/*
- * Copyright 2002 Andi Kleen
+ * Copyright 2002,2003 Andi Kleen, SuSE Labs.
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive
@@ -8,7 +8,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
-// #define FIX_ALIGNMENT 1
/*
* Checksum copy with exception handling.
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
@@ -26,17 +25,14 @@
* eax 64bit sum. undefined in case of exception.
*
* Wrappers need to take care of valid exception sum and zeroing.
+ * They also should align source or destination to 8 bytes.
*/
-/* for now - should vary this based on direction */
- #define prefetch prefetcht2
- #define movnti movq
-
.macro source
10:
.section __ex_table,"a"
.align 8
- .quad 10b,bad_source
+ .quad 10b,.Lbad_source
.previous
.endm
@@ -44,57 +40,74 @@
20:
.section __ex_table,"a"
.align 8
- .quad 20b,bad_dest
+ .quad 20b,.Lbad_dest
.previous
.endm
+ .macro ignore L=.Lignore
+30:
+ .section __ex_table,"a"
+ .align 8
+ .quad 30b,\L
+ .previous
+ .endm
+
+
.globl csum_partial_copy_generic
- .p2align
+ .p2align 4
csum_partial_copy_generic:
- prefetchnta (%rdi)
+ cmpl $3*64,%edx
+ jle .Lignore
+
+ ignore
+ prefetch (%rdi)
+ ignore
+ prefetch 1*64(%rdi)
+ ignore
+ prefetch 2*64(%rdi)
+ ignore
+ prefetch 3*64(%rdi)
+ ignore
+ prefetch 4*64(%rdi)
+ ignore
+ prefetchw (%rsi)
+ ignore
+ prefetchw 1*64(%rsi)
+ ignore
+ prefetchw 2*64(%rsi)
+ ignore
+ prefetchw 3*64(%rsi)
+ ignore
+ prefetchw 4*64(%rsi)
+
+.Lignore:
+ subq $7*8,%rsp
+ movq %rbx,2*8(%rsp)
+ movq %r12,3*8(%rsp)
+ movq %r14,4*8(%rsp)
+ movq %r13,5*8(%rsp)
+ movq %rbp,6*8(%rsp)
+
+ movq %r8,(%rsp)
+ movq %r9,1*8(%rsp)
- pushq %rbx
- pushq %r12
- pushq %r14
- pushq %r15
- movq %r8,%r14
- movq %r9,%r15
movl %ecx,%eax
movl %edx,%ecx
-#ifdef FIX_ALIGNMENT
- /* align source to 8 bytes */
- movl %edi,%r8d
- andl $7,%r8d
- jnz bad_alignment
-after_bad_alignment:
-#endif
-
- movl $64,%r10d
xorl %r9d,%r9d
movq %rcx,%r12
shrq $6,%r12
- /* loopcounter is maintained as one less to test efficiently for the
- previous to last iteration. This is needed to stop the prefetching. */
- decq %r12
- js handle_tail /* < 64 */
- jz loop_no_prefetch /* = 64 + X */
+ jz .Lhandle_tail /* < 64 */
+
+ clc
/* main loop. clear in 64 byte blocks */
- /* tries hard not to prefetch over the boundary */
- /* r10: 64, r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
+ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
/* r11: temp3, rdx: temp4, r12 loopcnt */
- .p2align
-loop:
- /* Could prefetch more than one loop, but then it would be even
- trickier to avoid prefetching over the boundary. The hardware prefetch
- should take care of this anyways. The reason for this prefetch is
- just the non temporal hint to avoid cache pollution. Hopefully this
- will be handled properly by the hardware. */
- prefetchnta 64(%rdi)
-
-loop_no_prefetch:
+ /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
+ .p2align 4
+.Lloop:
source
movq (%rdi),%rbx
source
@@ -104,175 +117,136 @@ loop_no_prefetch:
source
movq 24(%rdi),%rdx
- dest
- movnti %rbx,(%rsi)
- dest
- movnti %r8,8(%rsi)
- dest
- movnti %r11,16(%rsi)
- dest
- movnti %rdx,24(%rsi)
+ source
+ movq 32(%rdi),%r10
+ source
+ movq 40(%rdi),%rbp
+ source
+ movq 48(%rdi),%r14
+ source
+ movq 56(%rdi),%r13
- addq %rbx,%rax
+ ignore 2f
+ prefetch 5*64(%rdi)
+2:
+ adcq %rbx,%rax
adcq %r8,%rax
adcq %r11,%rax
adcq %rdx,%rax
+ adcq %r10,%rax
+ adcq %rbp,%rax
+ adcq %r14,%rax
+ adcq %r13,%rax
- source
- movq 32(%rdi),%rbx
- source
- movq 40(%rdi),%r8
- source
- movq 48(%rdi),%r11
- source
- movq 56(%rdi),%rdx
+ decl %r12d
dest
- movnti %rbx,32(%rsi)
+ movq %rbx,(%rsi)
dest
- movnti %r8,40(%rsi)
+ movq %r8,8(%rsi)
dest
- movnti %r11,48(%rsi)
+ movq %r11,16(%rsi)
dest
- movnti %rdx,56(%rsi)
+ movq %rdx,24(%rsi)
- adcq %rbx,%rax
- adcq %r8,%rax
- adcq %r11,%rax
- adcq %rdx,%rax
+ dest
+ movq %r10,32(%rsi)
+ dest
+ movq %rbp,40(%rsi)
+ dest
+ movq %r14,48(%rsi)
+ dest
+ movq %r13,56(%rsi)
- adcq %r9,%rax /* add in carry */
+ ignore 3f
+ prefetchw 5*64(%rsi)
+3:
- addq %r10,%rdi
- addq %r10,%rsi
+ leaq 64(%rdi),%rdi
+ leaq 64(%rsi),%rsi
- decq %r12
- jz loop_no_prefetch /* previous to last iteration? */
- jns loop
+ jnz .Lloop
+
+ adcq %r9,%rax
/* do last upto 56 bytes */
-handle_tail:
+.Lhandle_tail:
/* ecx: count */
movl %ecx,%r10d
andl $63,%ecx
shrl $3,%ecx
- jz fold
+ jz .Lfold
clc
- movl $8,%edx
-loop_8:
+ .p2align 4
+.Lloop_8:
source
movq (%rdi),%rbx
adcq %rbx,%rax
- dest
- movnti %rbx,(%rsi)
- leaq (%rsi,%rdx),%rsi /* preserve carry */
- leaq (%rdi,%rdx),%rdi
decl %ecx
- jnz loop_8
+ dest
+ movq %rbx,(%rsi)
+ leaq 8(%rsi),%rsi /* preserve carry */
+ leaq 8(%rdi),%rdi
+ jnz .Lloop_8
adcq %r9,%rax /* add in carry */
-fold:
+.Lfold:
+ /* reduce checksum to 32bits */
movl %eax,%ebx
shrq $32,%rax
- addq %rbx,%rax
+ addl %ebx,%eax
+ adcl %r9d,%eax
/* do last upto 6 bytes */
-handle_7:
+.Lhandle_7:
movl %r10d,%ecx
andl $7,%ecx
shrl $1,%ecx
- jz handle_1
+ jz .Lhandle_1
movl $2,%edx
xorl %ebx,%ebx
clc
-loop_1:
+ .p2align 4
+.Lloop_1:
source
movw (%rdi),%bx
- adcq %rbx,%rax
+ adcl %ebx,%eax
dest
- movw %bx,(%rsi)
- addq %rdx,%rdi
- addq %rdx,%rsi
decl %ecx
- jnz loop_1
- adcw %r9w,%ax /* add in carry */
+ movw %bx,(%rsi)
+ leaq 2(%rdi),%rdi
+ leaq 2(%rsi),%rsi
+ jnz .Lloop_1
+ adcl %r9d,%eax /* add in carry */
/* handle last odd byte */
-handle_1:
+.Lhandle_1:
testl $1,%r10d
- jz ende
+ jz .Lende
xorl %ebx,%ebx
source
movb (%rdi),%bl
dest
movb %bl,(%rsi)
- addw %bx,%ax
- adcw %r9w,%ax /* carry */
+ addl %ebx,%eax
+ adcl %r9d,%eax /* carry */
-ende:
- sfence
- popq %r15
- popq %r14
- popq %r12
- popq %rbx
+.Lende:
+ movq 2*8(%rsp),%rbx
+ movq 3*8(%rsp),%r12
+ movq 4*8(%rsp),%r14
+ movq 5*8(%rsp),%r13
+ movq 6*8(%rsp),%rbp
+ addq $7*8,%rsp
ret
-#ifdef FIX_ALIGNMENT
- /* align source to 8 bytes. */
- /* r8d: unalignedness, ecx len */
-bad_alignment:
- testl $1,%edi
- jnz odd_source
-
- /* compute distance to next aligned position */
- movl $8,%r8d
- xchgl %r8d,%ecx
- subl %r8d,%ecx
-
- /* handle unaligned part */
- shrl $1,%ecx
- xorl %ebx,%ebx
- movl $2,%r10d
-align_loop:
- source
- movw (%rdi),%bx
- addq %rbx,%rax /* carry cannot happen */
- dest
- movw %bx,(%rsi)
- addq %r10,%rdi
- addq %r10,%rsi
- decl %ecx
- jnz align_loop
- jmp after_bad_alignment
-
- /* weird case. need to swap the sum at the end because the spec requires
- 16 bit words of the sum to be always paired.
- handle it recursively because it should be rather rare. */
-odd_source:
- /* copy odd byte */
- xorl %ebx,%ebx
- source
- movb (%rdi),%bl
- addl %ebx,%eax /* add to old checksum */
- adcl $0,%ecx
- dest
- movb %al,(%rsi)
-
- /* fix arguments */
- movl %eax,%ecx
- incq %rsi
- incq %rdi
- decq %rdx
- call csum_partial_copy_generic
- bswap %eax /* this should work, but check */
- jmp ende
-#endif
-
/* Exception handlers. Very simple, zeroing is done in the wrappers */
-bad_source:
- movl $-EFAULT,(%r14)
- jmp ende
+.Lbad_source:
+ movq (%rsp),%rax
+ movl $-EFAULT,(%rax)
+ jmp .Lende
-bad_dest:
- movl $-EFAULT,(%r15)
- jmp ende
+.Lbad_dest:
+ movq 8(%rsp),%rax
+ movl $-EFAULT,(%rax)
+ jmp .Lende
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c
index 180e7a5810cb..2eb49de9b027 100644
--- a/arch/x86_64/lib/csum-partial.c
+++ b/arch/x86_64/lib/csum-partial.c
@@ -7,35 +7,39 @@
#include <linux/compiler.h>
#include <linux/module.h>
+#include <asm/checksum.h>
-/* Better way for this sought */
-static inline unsigned short from64to16(unsigned long x)
+#define __force_inline inline __attribute__((always_inline))
+
+static inline unsigned short from32to16(unsigned a)
{
- /* add up 32-bit words for 33 bits */
- x = (x & 0xffffffff) + (x >> 32);
- /* add up 16-bit and 17-bit words for 17+c bits */
- x = (x & 0xffff) + (x >> 16);
- /* add up 16-bit and 2-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
- return x;
+ unsigned short b = a >> 16;
+ asm("addw %w2,%w0\n\t"
+ "adcw $0,%w0\n"
+ : "=r" (b)
+ : "0" (b), "r" (a));
+ return b;
}
/*
* Do a 64-bit checksum on an arbitrary memory area.
* Returns a 32bit checksum.
*
- * This isn't a great routine, but it's not _horrible_ either.
- * We rely on the compiler to unroll.
+ * This isn't as time critical as it used to be because many NICs
+ * do hardware checksumming these days.
+ *
+ * Things tried and found to not make it faster:
+ * Manual Prefetching
+ * Unrolling to an 128 bytes inner loop.
+ * Using interleaving with more registers to break the carry chains.
*/
-static inline unsigned do_csum(const unsigned char * buff, int len)
+static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len)
{
- int odd, count;
+ unsigned odd, count;
unsigned long result = 0;
- if (len <= 0)
- goto out;
+ if (unlikely(len == 0))
+ return result;
odd = 1 & (unsigned long) buff;
if (unlikely(odd)) {
result = *buff << 8;
@@ -45,7 +49,7 @@ static inline unsigned do_csum(const unsigned char * buff, int len)
count = len >> 1; /* nr of 16-bit words.. */
if (count) {
if (2 & (unsigned long) buff) {
- result += *(unsigned short *) buff;
+ result += *(unsigned short *)buff;
count--;
len -= 2;
buff += 2;
@@ -59,18 +63,41 @@ static inline unsigned do_csum(const unsigned char * buff, int len)
buff += 4;
}
count >>= 1; /* nr of 64-bit words.. */
- if (count) {
+
+ /* main loop using 64byte blocks */
unsigned long zero = 0;
- do {
- asm(" addq %1,%0\n"
- " adcq %2,%0\n"
+ unsigned count64 = count >> 3;
+ while (count64) {
+ asm("addq 0*8(%[src]),%[res]\n\t"
+ "adcq 1*8(%[src]),%[res]\n\t"
+ "adcq 2*8(%[src]),%[res]\n\t"
+ "adcq 3*8(%[src]),%[res]\n\t"
+ "adcq 4*8(%[src]),%[res]\n\t"
+ "adcq 5*8(%[src]),%[res]\n\t"
+ "adcq 6*8(%[src]),%[res]\n\t"
+ "adcq 7*8(%[src]),%[res]\n\t"
+ "adcq %[zero],%[res]"
+ : [res] "=r" (result)
+ : [src] "r" (buff), [zero] "r" (zero),
+ "[res]" (result));
+ buff += 64;
+ count64--;
+ }
+
+ /* last upto 7 8byte blocks */
+ count %= 8;
+ while (count) {
+ asm("addq %1,%0\n\t"
+ "adcq %2,%0\n"
: "=r" (result)
- : "m" (*buff), "r" (zero), "0" (result));
- count--;
+ : "m" (*(unsigned long *)buff),
+ "r" (zero), "0" (result));
+ --count;
buff += 8;
- } while (count);
- result = (result & 0xffffffff) + (result >> 32);
}
+ result = add32_with_carry(result>>32,
+ result&0xffffffff);
+
if (len & 4) {
result += *(unsigned int *) buff;
buff += 4;
@@ -83,10 +110,11 @@ static inline unsigned do_csum(const unsigned char * buff, int len)
}
if (len & 1)
result += *buff;
- result = from64to16(result);
- if (unlikely(odd))
- return ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
+ result = add32_with_carry(result>>32, result & 0xffffffff);
+ if (unlikely(odd)) {
+ result = from32to16(result);
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+ }
return result;
}
@@ -102,18 +130,11 @@ out:
*
* it's best to have buff aligned on a 64-bit boundary
*/
-unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum)
{
- unsigned result = do_csum(buff, len);
-
- /* add in old sum, and carry.. */
- asm("addl %1,%0\n\t"
- "adcl $0,%0" : "=r" (result) : "r" (sum), "0" (result));
- return result;
+ return add32_with_carry(do_csum(buff, len), sum);
}
-//EXPORT_SYMBOL(csum_partial);
-
/*
* this routine is used for miscellaneous IP-like checksums, mainly
* in icmp.c
@@ -123,4 +144,3 @@ unsigned short ip_compute_csum(unsigned char * buff, int len)
return ~csum_partial(buff,len,0);
}
-EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c
index a55a6e3e4136..b89aca460ec5 100644
--- a/arch/x86_64/lib/csum-wrappers.c
+++ b/arch/x86_64/lib/csum-wrappers.c
@@ -1,4 +1,4 @@
-/* Copyright 2002 Andi Kleen, SuSE Labs.
+/* Copyright 2002,2003 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v.2
*
* Wrappers of assembly checksum functions for x86-64.
@@ -7,18 +7,6 @@
#include <asm/checksum.h>
#include <linux/module.h>
-/* Better way for this sought */
-static inline unsigned from64to32(unsigned long x)
-{
- /* add up 32-bit words for 33 bits */
- x = (x & 0xffffffff) + (x >> 32);
- /* add up 16-bit and 17-bit words for 17+c bits */
- x = (x & 0xffff) + (x >> 16);
- /* add up 16-bit and 2-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- return x;
-}
-
/**
* csum_partial_copy_from_user - Copy and checksum from user space.
* @src: source address (user space)
@@ -36,14 +24,32 @@ csum_partial_copy_from_user(const char *src, char *dst,
{
*errp = 0;
if (likely(access_ok(VERIFY_READ,src, len))) {
- unsigned long sum;
- sum = csum_partial_copy_generic(src,dst,len,isum,errp,NULL);
+ /* Why 6, not 7? To handle odd addresses aligned we
+ would need to do considerable complications to fix the
+ checksum which is defined as an 16bit accumulator. The
+ fix alignment code is primarily for performance
+ compatibility with 32bit and that will handle odd
+ addresses slowly too. */
+ if (unlikely((unsigned long)src & 6)) {
+ while (((unsigned long)src & 6) && len >= 2) {
+ __u16 val16;
+ *errp = __get_user(val16, (__u16 *)src);
+ if (*errp)
+ return isum;
+ *(__u16 *)dst = val16;
+ isum = add32_with_carry(isum, val16);
+ src += 2;
+ dst += 2;
+ len -= 2;
+ }
+ }
+ isum = csum_partial_copy_generic(src,dst,len,isum,errp,NULL);
if (likely(*errp == 0))
- return from64to32(sum);
+ return isum;
}
*errp = -EFAULT;
memset(dst,0,len);
- return 0;
+ return isum;
}
EXPORT_SYMBOL(csum_partial_copy_from_user);
@@ -67,8 +73,22 @@ csum_partial_copy_to_user(const char *src, char *dst,
*errp = -EFAULT;
return 0;
}
+
+ if (unlikely((unsigned long)dst & 6)) {
+ while (((unsigned long)dst & 6) && len >= 2) {
+ __u16 val16 = *(__u16 *)src;
+ isum = add32_with_carry(isum, val16);
+ *errp = __put_user(val16, (__u16 *)dst);
+ if (*errp)
+ return isum;
+ src += 2;
+ dst += 2;
+ len -= 2;
+ }
+ }
+
*errp = 0;
- return from64to32(csum_partial_copy_generic(src,dst,len,isum,NULL,errp));
+ return csum_partial_copy_generic(src,dst,len,isum,NULL,errp);
}
EXPORT_SYMBOL(csum_partial_copy_to_user);
@@ -85,11 +105,9 @@ EXPORT_SYMBOL(csum_partial_copy_to_user);
unsigned int
csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum)
{
- return from64to32(csum_partial_copy_generic(src,dst,len,sum,NULL,NULL));
+ return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
}
-//EXPORT_SYMBOL(csum_partial_copy_nocheck);
-
unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
__u32 len, unsigned short proto, unsigned int sum)
{
@@ -103,7 +121,7 @@ unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
" adcq $0,%[sum]\n"
: [sum] "=r" (sum64)
: "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
- return csum_fold(from64to32(sum64));
+ return csum_fold(add32_with_carry(sum64 & 0xffffffff, sum64>>32));
}
EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 72d13ec58732..0204140937f2 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -12,103 +12,78 @@
* rax original destination
*/
- // #define FIX_ALIGNMENT
.globl __memcpy
.globl memcpy
- .p2align
+ .p2align 4
__memcpy:
memcpy:
pushq %rbx
movq %rdi,%rax
-#ifdef FIX_ALIGNMENT
- movl %edi,%ecx
- andl $7,%ecx
- jnz bad_alignment
-after_bad_alignment:
-#endif
-
- movq %rdx,%rcx
- movl $64,%ebx
- shrq $6,%rcx
- jz handle_tail
+ movl %edx,%ecx
+ shrl $6,%ecx
+ jz .Lhandle_tail
+
+ .p2align 4
+.Lloop_64:
+ decl %ecx
-loop_64:
movq (%rsi),%r11
movq 8(%rsi),%r8
- movq 2*8(%rsi),%r9
- movq 3*8(%rsi),%r10
+
movq %r11,(%rdi)
movq %r8,1*8(%rdi)
+
+ movq 2*8(%rsi),%r9
+ movq 3*8(%rsi),%r10
+
movq %r9,2*8(%rdi)
movq %r10,3*8(%rdi)
movq 4*8(%rsi),%r11
movq 5*8(%rsi),%r8
- movq 6*8(%rsi),%r9
- movq 7*8(%rsi),%r10
+
movq %r11,4*8(%rdi)
movq %r8,5*8(%rdi)
+
+ movq 6*8(%rsi),%r9
+ movq 7*8(%rsi),%r10
+
movq %r9,6*8(%rdi)
movq %r10,7*8(%rdi)
- addq %rbx,%rsi
- addq %rbx,%rdi
- decl %ecx
- jnz loop_64
+ leaq 64(%rsi),%rsi
+ leaq 64(%rdi),%rdi
+ jnz .Lloop_64
-handle_tail:
+.Lhandle_tail:
movl %edx,%ecx
andl $63,%ecx
shrl $3,%ecx
- jz handle_7
- movl $8,%ebx
-loop_8:
+ jz .Lhandle_7
+ .p2align 4
+.Lloop_8:
+ decl %ecx
movq (%rsi),%r8
movq %r8,(%rdi)
- addq %rbx,%rdi
- addq %rbx,%rsi
- decl %ecx
- jnz loop_8
+ leaq 8(%rdi),%rdi
+ leaq 8(%rsi),%rsi
+ jnz .Lloop_8
-handle_7:
+.Lhandle_7:
movl %edx,%ecx
andl $7,%ecx
- jz ende
-loop_1:
+ jz .Lende
+ .p2align 4
+.Lloop_1:
movb (%rsi),%r8b
movb %r8b,(%rdi)
incq %rdi
incq %rsi
decl %ecx
- jnz loop_1
+ jnz .Lloop_1
-ende:
- sfence
+.Lende:
popq %rbx
ret
-
-#ifdef FIX_ALIGNMENT
- /* align destination */
- /* This is simpleminded. For bigger blocks it may make sense to align
- src and dst to their aligned subset and handle the rest separately */
-bad_alignment:
- movl $8,%r9d
- subl %ecx,%r9d
- movl %r9d,%ecx
- subq %r9,%rdx
- js small_alignment
- jz small_alignment
-align_1:
- movb (%rsi),%r8b
- movb %r8b,(%rdi)
- incq %rdi
- incq %rsi
- decl %ecx
- jnz align_1
- jmp after_bad_alignment
-small_alignment:
- addq %r9,%rdx
- jmp handle_7
-#endif
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index 64b53971fc1c..4877825dd065 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -11,7 +11,7 @@
*/
.globl __memset
.globl memset
- .p2align
+ .p2align 4
memset:
__memset:
movq %rdi,%r10
@@ -25,15 +25,16 @@ __memset:
/* align dst */
movl %edi,%r9d
andl $7,%r9d
- jnz bad_alignment
-after_bad_alignment:
+ jnz .Lbad_alignment
+.Lafter_bad_alignment:
- movq %r11,%rcx
- movl $64,%r8d
- shrq $6,%rcx
- jz handle_tail
+ movl %r11d,%ecx
+ shrl $6,%ecx
+ jz .Lhandle_tail
-loop_64:
+ .p2align 4
+.Lloop_64:
+ decl %ecx
movq %rax,(%rdi)
movq %rax,8(%rdi)
movq %rax,16(%rdi)
@@ -42,43 +43,45 @@ loop_64:
movq %rax,40(%rdi)
movq %rax,48(%rdi)
movq %rax,56(%rdi)
- addq %r8,%rdi
- decl %ecx
- jnz loop_64
+ leaq 64(%rdi),%rdi
+ jnz .Lloop_64
/* Handle tail in loops. The loops should be faster than hard
to predict jump tables. */
-handle_tail:
+ .p2align 4
+.Lhandle_tail:
movl %r11d,%ecx
andl $63&(~7),%ecx
- jz handle_7
+ jz .Lhandle_7
shrl $3,%ecx
-loop_8:
- movq %rax,(%rdi)
- addq $8,%rdi
+ .p2align 4
+.Lloop_8:
decl %ecx
- jnz loop_8
+ movq %rax,(%rdi)
+ leaq 8(%rdi),%rdi
+ jnz .Lloop_8
-handle_7:
+.Lhandle_7:
movl %r11d,%ecx
andl $7,%ecx
- jz ende
-loop_1:
- movb %al,(%rdi)
- addq $1,%rdi
+ jz .Lende
+ .p2align 4
+.Lloop_1:
decl %ecx
- jnz loop_1
+ movb %al,(%rdi)
+ leaq 1(%rdi),%rdi
+ jnz .Lloop_1
-ende:
+.Lende:
movq %r10,%rax
ret
-bad_alignment:
+.Lbad_alignment:
cmpq $7,%r11
- jbe handle_7
+ jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
subq %r8,%r11
- jmp after_bad_alignment
+ jmp .Lafter_bad_alignment
diff --git a/arch/x86_64/vmlinux.lds.S b/arch/x86_64/vmlinux.lds.S
index 36deddcc9b47..61e402904eec 100644
--- a/arch/x86_64/vmlinux.lds.S
+++ b/arch/x86_64/vmlinux.lds.S
@@ -50,10 +50,10 @@ SECTIONS
.xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
xtime_lock = LOADADDR(.xtime_lock);
. = ALIGN(16);
- .hpet : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.hpet) }
- hpet = LOADADDR(.hpet);
+ .vxtime : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.vxtime) }
+ vxtime = LOADADDR(.vxtime);
. = ALIGN(16);
- .wall_jiffies : AT ((LOADADDR(.hpet) + SIZEOF(.hpet) + 15) & ~(15)) { *(.wall_jiffies) }
+ .wall_jiffies : AT ((LOADADDR(.vxtime) + SIZEOF(.vxtime) + 15) & ~(15)) { *(.wall_jiffies) }
wall_jiffies = LOADADDR(.wall_jiffies);
. = ALIGN(16);
.sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
diff --git a/include/asm-x86_64/checksum.h b/include/asm-x86_64/checksum.h
index 1b84eeab1cc7..35d3e37d9378 100644
--- a/include/asm-x86_64/checksum.h
+++ b/include/asm-x86_64/checksum.h
@@ -125,7 +125,7 @@ csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
* Before filling it in it needs to be csum_fold()'ed.
* buff should be aligned to a 64bit boundary if possible.
*/
-extern unsigned int csum_partial(const unsigned char *buff, int len, unsigned int sum);
+extern unsigned int csum_partial(const unsigned char *buff, unsigned len, unsigned int sum);
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
#define HAVE_CSUM_COPY_USER 1
@@ -179,4 +179,14 @@ extern unsigned short
csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
__u32 len, unsigned short proto, unsigned int sum);
+static inline unsigned add32_with_carry(unsigned a, unsigned b)
+{
+ asm("addl %2,%0\n\t"
+ "adcl $0,%0"
+ : "=r" (a)
+ : "0" (a), "r" (b));
+ return a;
+}
+
#endif
+
diff --git a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h
index 1133a85d40ed..a03fc84b2b62 100644
--- a/include/asm-x86_64/fixmap.h
+++ b/include/asm-x86_64/fixmap.h
@@ -35,6 +35,8 @@
enum fixed_addresses {
VSYSCALL_LAST_PAGE,
VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+ VSYSCALL_HPET,
+ FIX_HPET_BASE,
#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
#endif
diff --git a/include/asm-x86_64/mc146818rtc.h b/include/asm-x86_64/mc146818rtc.h
index d6e3009430c1..9333b6bfacec 100644
--- a/include/asm-x86_64/mc146818rtc.h
+++ b/include/asm-x86_64/mc146818rtc.h
@@ -24,6 +24,11 @@ outb_p((addr),RTC_PORT(0)); \
outb_p((val),RTC_PORT(1)); \
})
+#ifndef CONFIG_HPET_TIMER
#define RTC_IRQ 8
+#else
+/* Temporary workaround due to IRQ routing problem. */
+#define RTC_IRQ 0
+#endif
#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index c1fbae233c35..243c83f7e222 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -313,7 +313,7 @@ extern inline void sync_core(void)
#define ARCH_HAS_PREFETCH
static inline void prefetch(void *x)
{
- asm volatile("2: prefetchnta %0\n1:\t"
+ asm volatile("2: prefetcht0 %0\n1:\t"
".section __ex_table,\"a\"\n\t"
" .align 8\n\t"
" .quad 2b,1b\n\t"
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index c5900a1356b3..78f6a491449d 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -54,6 +54,8 @@ extern void swap_low_mappings(void);
extern void oops_begin(void);
extern void die(const char *,struct pt_regs *,long);
extern void __die(const char * str, struct pt_regs * regs, long err);
+extern void __show_regs(struct pt_regs * regs);
+extern void show_regs(struct pt_regs * regs);
extern int map_syscall32(struct mm_struct *mm, unsigned long address);
extern char *syscall32_page;
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
index 7e0a1309bfce..bc5620ace301 100644
--- a/include/asm-x86_64/timex.h
+++ b/include/asm-x86_64/timex.h
@@ -30,6 +30,34 @@ static inline cycles_t get_cycles (void)
extern unsigned int cpu_khz;
-extern struct hpet_data hpet;
+/*
+ * Documentation on HPET can be found at:
+ * http://www.intel.com/ial/home/sp/pcmmspec.htm
+ * ftp://download.intel.com/ial/home/sp/mmts098.pdf
+ */
+
+#define HPET_ID 0x000
+#define HPET_PERIOD 0x004
+#define HPET_CFG 0x010
+#define HPET_STATUS 0x020
+#define HPET_COUNTER 0x0f0
+#define HPET_T0_CFG 0x100
+#define HPET_T0_CMP 0x108
+#define HPET_T0_ROUTE 0x110
+
+#define HPET_ID_VENDOR 0xffff0000
+#define HPET_ID_LEGSUP 0x00008000
+#define HPET_ID_NUMBER 0x00000f00
+#define HPET_ID_REV 0x000000ff
+
+#define HPET_CFG_ENABLE 0x001
+#define HPET_CFG_LEGACY 0x002
+
+#define HPET_T0_ENABLE 0x004
+#define HPET_T0_PERIODIC 0x008
+#define HPET_T0_SETVAL 0x040
+#define HPET_T0_32BIT 0x100
+
+extern struct vxtime_data vxtime;
#endif
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
index 86947472e9fd..b78e931a39c8 100644
--- a/include/asm-x86_64/vsyscall.h
+++ b/include/asm-x86_64/vsyscall.h
@@ -15,7 +15,7 @@ enum vsyscall_num {
#ifdef __KERNEL__
-#define __section_hpet __attribute__ ((unused, __section__ (".hpet"), aligned(16)))
+#define __section_vxtime __attribute__ ((unused, __section__ (".vxtime"), aligned(16)))
#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
@@ -23,22 +23,24 @@ enum vsyscall_num {
#define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(L1_CACHE_BYTES)))
+#define VXTIME_TSC 1
+#define VXTIME_HPET 2
-struct hpet_data {
- long address; /* base address */
+struct vxtime_data {
+ long hpet_address; /* HPET base address */
unsigned long hz; /* HPET clocks / sec */
- int trigger; /* value at last interrupt */
int last;
- int offset;
unsigned long last_tsc;
- long ticks;
+ long quot;
+ long tsc_quot;
+ int mode;
};
#define hpet_readl(a) readl(fix_to_virt(FIX_HPET_BASE) + a)
#define hpet_writel(d,a) writel(d, fix_to_virt(FIX_HPET_BASE) + a)
/* vsyscall space (readonly) */
-extern struct hpet_data __hpet;
+extern struct vxtime_data __vxtime;
extern struct timespec __xtime;
extern volatile unsigned long __jiffies;
extern unsigned long __wall_jiffies;
@@ -46,7 +48,7 @@ extern struct timezone __sys_tz;
extern seqlock_t __xtime_lock;
/* kernel space (writeable) */
-extern struct hpet_data hpet;
+extern struct vxtime_data vxtime;
extern unsigned long wall_jiffies;
extern struct timezone sys_tz;
extern int sysctl_vsyscall;