mirror of
				git://git.openwrt.org/openwrt/openwrt.git
				synced 2025-10-26 03:24:26 -04:00 
			
		
		
		
	This reverts commit a03afef7f2e8ae363a97357ec75ffbfef372a9ea. Signed-off-by: Felix Fietkau <nbd@nbd.name>
		
			
				
	
	
		
			372 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			372 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From: Felix Fietkau <nbd@nbd.name>
 | |
| Subject: mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to
 | |
| 
 | |
| lede-commit: 3b3d64743ba2a874df9d70cd19e242205b0a788c
 | |
| Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | |
| ---
 | |
|  arch/mips/Makefile             |   5 +
 | |
|  arch/mips/include/asm/module.h |   5 +
 | |
|  arch/mips/kernel/module.c      | 279 ++++++++++++++++++++++++++++++++++++++++-
 | |
|  3 files changed, 284 insertions(+), 5 deletions(-)
 | |
| 
 | |
| --- a/arch/mips/Makefile
 | |
| +++ b/arch/mips/Makefile
 | |
| @@ -93,8 +93,18 @@ all-$(CONFIG_SYS_SUPPORTS_ZBOOT)+= vmlin
 | |
|  cflags-y			+= -G 0 -mno-abicalls -fno-pic -pipe -mno-branch-likely
 | |
|  cflags-y			+= -msoft-float
 | |
|  LDFLAGS_vmlinux			+= -G 0 -static -n -nostdlib
 | |
| +ifdef CONFIG_64BIT
 | |
|  KBUILD_AFLAGS_MODULE		+= -mlong-calls
 | |
|  KBUILD_CFLAGS_MODULE		+= -mlong-calls
 | |
| +else
 | |
| +  ifdef CONFIG_DYNAMIC_FTRACE
 | |
| +    KBUILD_AFLAGS_MODULE	+= -mlong-calls
 | |
| +    KBUILD_CFLAGS_MODULE	+= -mlong-calls
 | |
| +  else
 | |
| +    KBUILD_AFLAGS_MODULE	+= -mno-long-calls
 | |
| +    KBUILD_CFLAGS_MODULE	+= -mno-long-calls
 | |
| +  endif
 | |
| +endif
 | |
|  
 | |
|  ifeq ($(CONFIG_RELOCATABLE),y)
 | |
|  LDFLAGS_vmlinux			+= --emit-relocs
 | |
| --- a/arch/mips/include/asm/module.h
 | |
| +++ b/arch/mips/include/asm/module.h
 | |
| @@ -12,6 +12,11 @@ struct mod_arch_specific {
 | |
|  	const struct exception_table_entry *dbe_start;
 | |
|  	const struct exception_table_entry *dbe_end;
 | |
|  	struct mips_hi16 *r_mips_hi16_list;
 | |
| +
 | |
| +	void *phys_plt_tbl;
 | |
| +	void *virt_plt_tbl;
 | |
| +	unsigned int phys_plt_offset;
 | |
| +	unsigned int virt_plt_offset;
 | |
|  };
 | |
|  
 | |
|  typedef uint8_t Elf64_Byte;		/* Type for a 8-bit quantity.  */
 | |
| --- a/arch/mips/kernel/module.c
 | |
| +++ b/arch/mips/kernel/module.c
 | |
| @@ -44,14 +44,221 @@ struct mips_hi16 {
 | |
|  static LIST_HEAD(dbe_list);
 | |
|  static DEFINE_SPINLOCK(dbe_lock);
 | |
|  
 | |
| -#ifdef MODULE_START
 | |
| +/*
 | |
| + * Get the potential max trampolines size required of the init and
 | |
| + * non-init sections. Only used if we cannot find enough contiguous
 | |
| + * physically mapped memory to put the module into.
 | |
| + */
 | |
| +static unsigned int
 | |
| +get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 | |
| +             const char *secstrings, unsigned int symindex, bool is_init)
 | |
| +{
 | |
| +	unsigned long ret = 0;
 | |
| +	unsigned int i, j;
 | |
| +	Elf_Sym *syms;
 | |
| +
 | |
| +	/* Everything marked ALLOC (this includes the exported symbols) */
 | |
| +	for (i = 1; i < hdr->e_shnum; ++i) {
 | |
| +		unsigned int info = sechdrs[i].sh_info;
 | |
| +
 | |
| +		if (sechdrs[i].sh_type != SHT_REL
 | |
| +		    && sechdrs[i].sh_type != SHT_RELA)
 | |
| +			continue;
 | |
| +
 | |
| +		/* Not a valid relocation section? */
 | |
| +		if (info >= hdr->e_shnum)
 | |
| +			continue;
 | |
| +
 | |
| +		/* Don't bother with non-allocated sections */
 | |
| +		if (!(sechdrs[info].sh_flags & SHF_ALLOC))
 | |
| +			continue;
 | |
| +
 | |
| +		/* If it's called *.init*, and we're not init, we're
 | |
| +                   not interested */
 | |
| +		if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
 | |
| +		    != is_init)
 | |
| +			continue;
 | |
| +
 | |
| +		syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
 | |
| +		if (sechdrs[i].sh_type == SHT_REL) {
 | |
| +			Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
 | |
| +			unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
 | |
| +
 | |
| +			for (j = 0; j < size; ++j) {
 | |
| +				Elf_Sym *sym;
 | |
| +
 | |
| +				if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
 | |
| +					continue;
 | |
| +
 | |
| +				sym = syms + ELF_MIPS_R_SYM(rel[j]);
 | |
| +				if (!is_init && sym->st_shndx != SHN_UNDEF)
 | |
| +					continue;
 | |
| +
 | |
| +				ret += 4 * sizeof(int);
 | |
| +			}
 | |
| +		} else {
 | |
| +			Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
 | |
| +			unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
 | |
| +
 | |
| +			for (j = 0; j < size; ++j) {
 | |
| +				Elf_Sym *sym;
 | |
| +
 | |
| +				if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
 | |
| +					continue;
 | |
| +
 | |
| +				sym = syms + ELF_MIPS_R_SYM(rela[j]);
 | |
| +				if (!is_init && sym->st_shndx != SHN_UNDEF)
 | |
| +					continue;
 | |
| +
 | |
| +				ret += 4 * sizeof(int);
 | |
| +			}
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +#ifndef MODULE_START
 | |
| +static void *alloc_phys(unsigned long size)
 | |
| +{
 | |
| +	unsigned order;
 | |
| +	struct page *page;
 | |
| +	struct page *p;
 | |
| +
 | |
| +	size = PAGE_ALIGN(size);
 | |
| +	order = get_order(size);
 | |
| +
 | |
| +	page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
 | |
| +			__GFP_THISNODE, order);
 | |
| +	if (!page)
 | |
| +		return NULL;
 | |
| +
 | |
| +	split_page(page, order);
 | |
| +
 | |
| +	/* mark all pages except for the last one */
 | |
| +	for (p = page; p + 1 < page + (size >> PAGE_SHIFT); ++p)
 | |
| +		set_bit(PG_owner_priv_1, &p->flags);
 | |
| +
 | |
| +	for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
 | |
| +		__free_page(p);
 | |
| +
 | |
| +	return page_address(page);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +static void free_phys(void *ptr)
 | |
| +{
 | |
| +	struct page *page;
 | |
| +	bool free;
 | |
| +
 | |
| +	page = virt_to_page(ptr);
 | |
| +	do {
 | |
| +		free = test_and_clear_bit(PG_owner_priv_1, &page->flags);
 | |
| +		__free_page(page);
 | |
| +		page++;
 | |
| +	} while (free);
 | |
| +}
 | |
| +
 | |
| +
 | |
|  void *module_alloc(unsigned long size)
 | |
|  {
 | |
| +#ifdef MODULE_START
 | |
|  	return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END,
 | |
|  				GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
 | |
|  				__builtin_return_address(0));
 | |
| +#else
 | |
| +	void *ptr;
 | |
| +
 | |
| +	if (size == 0)
 | |
| +		return NULL;
 | |
| +
 | |
| +	ptr = alloc_phys(size);
 | |
| +
 | |
| +	/* If we failed to allocate physically contiguous memory,
 | |
| +	 * fall back to regular vmalloc. The module loader code will
 | |
| +	 * create jump tables to handle long jumps */
 | |
| +	if (!ptr)
 | |
| +		return vmalloc(size);
 | |
| +
 | |
| +	return ptr;
 | |
| +#endif
 | |
|  }
 | |
| +
 | |
| +static inline bool is_phys_addr(void *ptr)
 | |
| +{
 | |
| +#ifdef CONFIG_64BIT
 | |
| +	return (KSEGX((unsigned long)ptr) == CKSEG0);
 | |
| +#else
 | |
| +	return (KSEGX(ptr) == KSEG0);
 | |
|  #endif
 | |
| +}
 | |
| +
 | |
| +/* Free memory returned from module_alloc */
 | |
| +void module_memfree(void *module_region)
 | |
| +{
 | |
| +	if (is_phys_addr(module_region))
 | |
| +		free_phys(module_region);
 | |
| +	else
 | |
| +		vfree(module_region);
 | |
| +}
 | |
| +
 | |
| +static void *__module_alloc(int size, bool phys)
 | |
| +{
 | |
| +	void *ptr;
 | |
| +
 | |
| +	if (phys)
 | |
| +		ptr = kmalloc(size, GFP_KERNEL);
 | |
| +	else
 | |
| +		ptr = vmalloc(size);
 | |
| +	return ptr;
 | |
| +}
 | |
| +
 | |
| +static void __module_free(void *ptr)
 | |
| +{
 | |
| +	if (is_phys_addr(ptr))
 | |
| +		kfree(ptr);
 | |
| +	else
 | |
| +		vfree(ptr);
 | |
| +}
 | |
| +
 | |
| +int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 | |
| +			      char *secstrings, struct module *mod)
 | |
| +{
 | |
| +	unsigned int symindex = 0;
 | |
| +	unsigned int core_size, init_size;
 | |
| +	int i;
 | |
| +
 | |
| +	mod->arch.phys_plt_offset = 0;
 | |
| +	mod->arch.virt_plt_offset = 0;
 | |
| +	mod->arch.phys_plt_tbl = NULL;
 | |
| +	mod->arch.virt_plt_tbl = NULL;
 | |
| +
 | |
| +	if (IS_ENABLED(CONFIG_64BIT))
 | |
| +		return 0;
 | |
| +
 | |
| +	for (i = 1; i < hdr->e_shnum; i++)
 | |
| +		if (sechdrs[i].sh_type == SHT_SYMTAB)
 | |
| +			symindex = i;
 | |
| +
 | |
| +	core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
 | |
| +	init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
 | |
| +
 | |
| +	if ((core_size + init_size) == 0)
 | |
| +		return 0;
 | |
| +
 | |
| +	mod->arch.phys_plt_tbl = __module_alloc(core_size + init_size, 1);
 | |
| +	if (!mod->arch.phys_plt_tbl)
 | |
| +		return -ENOMEM;
 | |
| +
 | |
| +	mod->arch.virt_plt_tbl = __module_alloc(core_size + init_size, 0);
 | |
| +	if (!mod->arch.virt_plt_tbl) {
 | |
| +		__module_free(mod->arch.phys_plt_tbl);
 | |
| +		mod->arch.phys_plt_tbl = NULL;
 | |
| +		return -ENOMEM;
 | |
| +	}
 | |
| +
 | |
| +	return 0;
 | |
| +}
 | |
|  
 | |
|  static int apply_r_mips_none(struct module *me, u32 *location,
 | |
|  			     u32 base, Elf_Addr v, bool rela)
 | |
| @@ -67,9 +274,40 @@ static int apply_r_mips_32(struct module
 | |
|  	return 0;
 | |
|  }
 | |
|  
 | |
| +static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
 | |
| +				 void *start, Elf_Addr v)
 | |
| +{
 | |
| +	unsigned *tramp = start + *plt_offset;
 | |
| +	*plt_offset += 4 * sizeof(int);
 | |
| +
 | |
| +	/* adjust carry for addiu */
 | |
| +	if (v & 0x00008000)
 | |
| +		v += 0x10000;
 | |
| +
 | |
| +	tramp[0] = 0x3c190000 | (v >> 16);      /* lui t9, hi16 */
 | |
| +	tramp[1] = 0x27390000 | (v & 0xffff);   /* addiu t9, t9, lo16 */
 | |
| +	tramp[2] = 0x03200008;                  /* jr t9 */
 | |
| +	tramp[3] = 0x00000000;                  /* nop */
 | |
| +
 | |
| +	return (Elf_Addr) tramp;
 | |
| +}
 | |
| +
 | |
| +static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
 | |
| +{
 | |
| +	if (is_phys_addr(location))
 | |
| +		return add_plt_entry_to(&me->arch.phys_plt_offset,
 | |
| +				me->arch.phys_plt_tbl, v);
 | |
| +	else
 | |
| +		return add_plt_entry_to(&me->arch.virt_plt_offset,
 | |
| +				me->arch.virt_plt_tbl, v);
 | |
| +
 | |
| +}
 | |
| +
 | |
|  static int apply_r_mips_26(struct module *me, u32 *location,
 | |
|  			   u32 base, Elf_Addr v, bool rela)
 | |
|  {
 | |
| +	u32 ofs = base & 0x03ffffff;
 | |
| +
 | |
|  	if (v % 4) {
 | |
|  		pr_err("module %s: dangerous R_MIPS_26 relocation\n",
 | |
|  		       me->name);
 | |
| @@ -77,13 +315,17 @@ static int apply_r_mips_26(struct module
 | |
|  	}
 | |
|  
 | |
|  	if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
 | |
| -		pr_err("module %s: relocation overflow\n",
 | |
| -		       me->name);
 | |
| -		return -ENOEXEC;
 | |
| +		v = add_plt_entry(me, location, v + (ofs << 2));
 | |
| +		if (!v) {
 | |
| +			pr_err("module %s: relocation overflow\n",
 | |
| +			       me->name);
 | |
| +			return -ENOEXEC;
 | |
| +		}
 | |
| +		ofs = 0;
 | |
|  	}
 | |
|  
 | |
|  	*location = (*location & ~0x03ffffff) |
 | |
| -		    ((base + (v >> 2)) & 0x03ffffff);
 | |
| +		    ((ofs + (v >> 2)) & 0x03ffffff);
 | |
|  
 | |
|  	return 0;
 | |
|  }
 | |
| @@ -459,9 +701,36 @@ int module_finalize(const Elf_Ehdr *hdr,
 | |
|  		list_add(&me->arch.dbe_list, &dbe_list);
 | |
|  		spin_unlock_irq(&dbe_lock);
 | |
|  	}
 | |
| +
 | |
| +	/* Get rid of the fixup trampoline if we're running the module
 | |
| +	 * from physically mapped address space */
 | |
| +	if (me->arch.phys_plt_offset == 0) {
 | |
| +		__module_free(me->arch.phys_plt_tbl);
 | |
| +		me->arch.phys_plt_tbl = NULL;
 | |
| +	}
 | |
| +	if (me->arch.virt_plt_offset == 0) {
 | |
| +		__module_free(me->arch.virt_plt_tbl);
 | |
| +		me->arch.virt_plt_tbl = NULL;
 | |
| +	}
 | |
| +
 | |
|  	return 0;
 | |
|  }
 | |
|  
 | |
| +void module_arch_freeing_init(struct module *mod)
 | |
| +{
 | |
| +	if (mod->state == MODULE_STATE_LIVE)
 | |
| +		return;
 | |
| +
 | |
| +	if (mod->arch.phys_plt_tbl) {
 | |
| +		__module_free(mod->arch.phys_plt_tbl);
 | |
| +		mod->arch.phys_plt_tbl = NULL;
 | |
| +	}
 | |
| +	if (mod->arch.virt_plt_tbl) {
 | |
| +		__module_free(mod->arch.virt_plt_tbl);
 | |
| +		mod->arch.virt_plt_tbl = NULL;
 | |
| +	}
 | |
| +}
 | |
| +
 | |
|  void module_arch_cleanup(struct module *mod)
 | |
|  {
 | |
|  	spin_lock_irq(&dbe_lock);
 |