mirror of
				git://git.openwrt.org/openwrt/openwrt.git
				synced 2025-10-29 13:04:27 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			318 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			318 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From: James Hogan <james.hogan@imgtec.com>
 | |
| Date: Mon, 25 Jan 2016 21:30:00 +0000
 | |
| Subject: [PATCH] MIPS: c-r4k: Use IPI calls for CM indexed cache ops
 | |
| 
 | |
| The Coherence Manager (CM) can propagate address-based ("hit") cache
 | |
| operations to other cores in the coherent system, alleviating software
 | |
| of the need to use IPI calls, however indexed cache operations are not
 | |
| propagated since doing so makes no sense for separate caches.
 | |
| 
 | |
| r4k_on_each_cpu() previously had a special case for CONFIG_MIPS_MT_SMP,
 | |
| intended to avoid the IPIs when the only other CPUs in the system were
 | |
| other VPEs in the same core, and hence sharing the same caches. This was
 | |
| changed by commit cccf34e9411c ("MIPS: c-r4k: Fix cache flushing for MT
 | |
| cores") to apparently handle multi-core multi-VPE systems, but it
 | |
| focussed mainly on hit cache ops, so the IPI calls were still disabled
 | |
| entirely for CM systems.
 | |
| 
 | |
| This doesn't normally cause problems, but tests can be written to hit
 | |
| these corner cases by using multiple threads, or changing task
 | |
| affinities to force the process to migrate cores. For example the
 | |
| failure of mprotect RW->RX to globally sync icaches (via
 | |
| flush_cache_range) can be detected by modifying and mprotecting a code
 | |
| page on one core, and migrating to a different core to execute from it.
 | |
| 
 | |
| Most of the functions called by r4k_on_each_cpu() perform cache
 | |
| operations exclusively with a single addressing-type (virtual address vs
 | |
| indexed), so add a type argument and modify the callers to pass in
 | |
| R4K_USER (user virtual addressing), R4K_KERN (global kernel virtual
 | |
| addressing) or R4K_INDEX (index into cache).
 | |
| 
 | |
| local_r4k_flush_icache_range() is split up, to allow it to be called
 | |
| from the rest of the kernel, or from r4k_flush_icache_range() where it
 | |
| will choose either indexed or hit cache operations based on the size of
 | |
| the range and the cache sizes.
 | |
| 
 | |
| local_r4k_flush_kernel_vmap_range() is split into two functions, each of
 | |
| which uses cache operations with a single addressing-type, with
 | |
| r4k_flush_kernel_vmap_range() making the decision whether to use indexed
 | |
| cache ops or not.
 | |
| 
 | |
| Signed-off-by: James Hogan <james.hogan@imgtec.com>
 | |
| Cc: Ralf Baechle <ralf@linux-mips.org>
 | |
| Cc: Paul Burton <paul.burton@imgtec.com>
 | |
| Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com>
 | |
| Cc: linux-mips@linux-mips.org
 | |
| ---
 | |
| 
 | |
| --- a/arch/mips/mm/c-r4k.c
 | |
| +++ b/arch/mips/mm/c-r4k.c
 | |
| @@ -40,6 +40,50 @@
 | |
|  #include <asm/mips-cm.h>
 | |
|  
 | |
|  /*
 | |
| + * Bits describing what cache ops an IPI callback function may perform.
 | |
| + *
 | |
| + * R4K_USER   -	Virtual user address based cache operations.
 | |
| + *		Ineffective on other CPUs.
 | |
| + * R4K_KERN   -	Virtual kernel address based cache operations (including kmap).
 | |
| + *		Effective on other CPUs.
 | |
| + * R4K_INDEX -	Index based cache operations.
 | |
| + *		Effective on other CPUs.
 | |
| + */
 | |
| +
 | |
| +#define R4K_USER	BIT(0)
 | |
| +#define R4K_KERN	BIT(1)
 | |
| +#define R4K_INDEX	BIT(2)
 | |
| +
 | |
| +#ifdef CONFIG_SMP
 | |
| +/* The Coherence manager propagates address-based cache ops to other cores */
 | |
| +#define r4k_hit_globalized	mips_cm_present()
 | |
| +#define r4k_index_globalized	0
 | |
| +#else
 | |
| +/* If there's only 1 CPU, then all cache ops are globalized to that 1 CPU */
 | |
| +#define r4k_hit_globalized	1
 | |
| +#define r4k_index_globalized	1
 | |
| +#endif
 | |
| +
 | |
| +/**
 | |
| + * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core.
 | |
| + * @type:	Type of cache operations (R4K_USER, R4K_KERN or R4K_INDEX).
 | |
| + *
 | |
| + * Returns:	1 if the cache operation @type should be done on every core in
 | |
| + *		the system.
 | |
| + *		0 if the cache operation @type is globalized and only needs to
 | |
| + *		be performed on a simple CPU.
 | |
| + */
 | |
| +static inline bool r4k_op_needs_ipi(unsigned int type)
 | |
| +{
 | |
| +	/*
 | |
| +	 * If hardware doesn't globalize the required cache ops we must use IPIs
 | |
| +	 * to do so.
 | |
| +	 */
 | |
| +	return (type & R4K_KERN  && !r4k_hit_globalized) ||
 | |
| +	       (type & R4K_INDEX && !r4k_index_globalized);
 | |
| +}
 | |
| +
 | |
| +/*
 | |
|   * Special Variant of smp_call_function for use by cache functions:
 | |
|   *
 | |
|   *  o No return value
 | |
| @@ -48,19 +92,11 @@
 | |
|   *    primary cache.
 | |
|   *  o doesn't disable interrupts on the local CPU
 | |
|   */
 | |
| -static inline void r4k_on_each_cpu(void (*func) (void *info), void *info)
 | |
| +static inline void r4k_on_each_cpu(unsigned int type,
 | |
| +				   void (*func) (void *info), void *info)
 | |
|  {
 | |
|  	preempt_disable();
 | |
| -
 | |
| -	/*
 | |
| -	 * The Coherent Manager propagates address-based cache ops to other
 | |
| -	 * cores but not index-based ops. However, r4k_on_each_cpu is used
 | |
| -	 * in both cases so there is no easy way to tell what kind of op is
 | |
| -	 * executed to the other cores. The best we can probably do is
 | |
| -	 * to restrict that call when a CM is not present because both
 | |
| -	 * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
 | |
| -	 */
 | |
| -	if (!mips_cm_present())
 | |
| +	if (r4k_op_needs_ipi(type))
 | |
|  		smp_call_function_many(&cpu_foreign_map, func, info, 1);
 | |
|  	func(info);
 | |
|  	preempt_enable();
 | |
| @@ -456,7 +492,7 @@ static inline void local_r4k___flush_cac
 | |
|  
 | |
|  static void r4k___flush_cache_all(void)
 | |
|  {
 | |
| -	r4k_on_each_cpu(local_r4k___flush_cache_all, NULL);
 | |
| +	r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL);
 | |
|  }
 | |
|  
 | |
|  static inline int has_valid_asid(const struct mm_struct *mm)
 | |
| @@ -503,7 +539,7 @@ static void r4k_flush_cache_range(struct
 | |
|  	int exec = vma->vm_flags & VM_EXEC;
 | |
|  
 | |
|  	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc))
 | |
| -		r4k_on_each_cpu(local_r4k_flush_cache_range, vma);
 | |
| +		r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma);
 | |
|  }
 | |
|  
 | |
|  static inline void local_r4k_flush_cache_mm(void * args)
 | |
| @@ -535,7 +571,7 @@ static void r4k_flush_cache_mm(struct mm
 | |
|  	if (!cpu_has_dc_aliases)
 | |
|  		return;
 | |
|  
 | |
| -	r4k_on_each_cpu(local_r4k_flush_cache_mm, mm);
 | |
| +	r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm);
 | |
|  }
 | |
|  
 | |
|  struct flush_cache_page_args {
 | |
| @@ -629,7 +665,7 @@ static void r4k_flush_cache_page(struct
 | |
|  	args.addr = addr;
 | |
|  	args.pfn = pfn;
 | |
|  
 | |
| -	r4k_on_each_cpu(local_r4k_flush_cache_page, &args);
 | |
| +	r4k_on_each_cpu(R4K_KERN, local_r4k_flush_cache_page, &args);
 | |
|  }
 | |
|  
 | |
|  static inline void local_r4k_flush_data_cache_page(void * addr)
 | |
| @@ -642,18 +678,23 @@ static void r4k_flush_data_cache_page(un
 | |
|  	if (in_atomic())
 | |
|  		local_r4k_flush_data_cache_page((void *)addr);
 | |
|  	else
 | |
| -		r4k_on_each_cpu(local_r4k_flush_data_cache_page, (void *) addr);
 | |
| +		r4k_on_each_cpu(R4K_KERN, local_r4k_flush_data_cache_page,
 | |
| +				(void *) addr);
 | |
|  }
 | |
|  
 | |
|  struct flush_icache_range_args {
 | |
|  	unsigned long start;
 | |
|  	unsigned long end;
 | |
| +	unsigned int type;
 | |
|  };
 | |
|  
 | |
| -static inline void local_r4k_flush_icache_range(unsigned long start, unsigned long end)
 | |
| +static inline void __local_r4k_flush_icache_range(unsigned long start,
 | |
| +						  unsigned long end,
 | |
| +						  unsigned int type)
 | |
|  {
 | |
|  	if (!cpu_has_ic_fills_f_dc) {
 | |
| -		if (end - start >= dcache_size) {
 | |
| +		if (type == R4K_INDEX ||
 | |
| +		    (type & R4K_INDEX && end - start >= dcache_size)) {
 | |
|  			r4k_blast_dcache();
 | |
|  		} else {
 | |
|  			R4600_HIT_CACHEOP_WAR_IMPL;
 | |
| @@ -661,7 +702,8 @@ static inline void local_r4k_flush_icach
 | |
|  		}
 | |
|  	}
 | |
|  
 | |
| -	if (end - start > icache_size)
 | |
| +	if (type == R4K_INDEX ||
 | |
| +	    (type & R4K_INDEX && end - start > icache_size))
 | |
|  		r4k_blast_icache();
 | |
|  	else {
 | |
|  		switch (boot_cpu_type()) {
 | |
| @@ -687,23 +729,59 @@ static inline void local_r4k_flush_icach
 | |
|  #endif
 | |
|  }
 | |
|  
 | |
| +static inline void local_r4k_flush_icache_range(unsigned long start,
 | |
| +						unsigned long end)
 | |
| +{
 | |
| +	__local_r4k_flush_icache_range(start, end, R4K_KERN | R4K_INDEX);
 | |
| +}
 | |
| +
 | |
|  static inline void local_r4k_flush_icache_range_ipi(void *args)
 | |
|  {
 | |
|  	struct flush_icache_range_args *fir_args = args;
 | |
|  	unsigned long start = fir_args->start;
 | |
|  	unsigned long end = fir_args->end;
 | |
| +	unsigned int type = fir_args->type;
 | |
|  
 | |
| -	local_r4k_flush_icache_range(start, end);
 | |
| +	__local_r4k_flush_icache_range(start, end, type);
 | |
|  }
 | |
|  
 | |
|  static void r4k_flush_icache_range(unsigned long start, unsigned long end)
 | |
|  {
 | |
|  	struct flush_icache_range_args args;
 | |
| +	unsigned long size, cache_size;
 | |
|  
 | |
|  	args.start = start;
 | |
|  	args.end = end;
 | |
| +	args.type = R4K_KERN | R4K_INDEX;
 | |
|  
 | |
| -	r4k_on_each_cpu(local_r4k_flush_icache_range_ipi, &args);
 | |
| +	if (in_atomic()) {
 | |
| +		/*
 | |
| +		 * We can't do blocking IPI calls from atomic context, so fall
 | |
| +		 * back to pure address-based cache ops if they globalize.
 | |
| +		 */
 | |
| +		if (!r4k_index_globalized && r4k_hit_globalized) {
 | |
| +			args.type &= ~R4K_INDEX;
 | |
| +		} else {
 | |
| +			/* Just do it locally instead. */
 | |
| +			local_r4k_flush_icache_range(start, end);
 | |
| +			instruction_hazard();
 | |
| +			return;
 | |
| +		}
 | |
| +	} else if (!r4k_index_globalized && r4k_hit_globalized) {
 | |
| +		/*
 | |
| +		 * If address-based cache ops are globalized, then we may be
 | |
| +		 * able to avoid the IPI for small flushes.
 | |
| +		 */
 | |
| +		size = start - end;
 | |
| +		cache_size = icache_size;
 | |
| +		if (!cpu_has_ic_fills_f_dc) {
 | |
| +			size *= 2;
 | |
| +			cache_size += dcache_size;
 | |
| +		}
 | |
| +		if (size <= cache_size)
 | |
| +			args.type &= ~R4K_INDEX;
 | |
| +	}
 | |
| +	r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args);
 | |
|  	instruction_hazard();
 | |
|  }
 | |
|  
 | |
| @@ -823,7 +901,12 @@ static void local_r4k_flush_cache_sigtra
 | |
|  
 | |
|  static void r4k_flush_cache_sigtramp(unsigned long addr)
 | |
|  {
 | |
| -	r4k_on_each_cpu(local_r4k_flush_cache_sigtramp, (void *) addr);
 | |
| +	/*
 | |
| +	 * FIXME this is a bit broken when !r4k_hit_globalized, since the user
 | |
| +	 * code probably won't be mapped on other CPUs, so if the process is
 | |
| +	 * migrated, it could end up hitting stale icache lines.
 | |
| +	 */
 | |
| +	r4k_on_each_cpu(R4K_USER, local_r4k_flush_cache_sigtramp, (void *)addr);
 | |
|  }
 | |
|  
 | |
|  static void r4k_flush_icache_all(void)
 | |
| @@ -837,6 +920,15 @@ struct flush_kernel_vmap_range_args {
 | |
|  	int		size;
 | |
|  };
 | |
|  
 | |
| +static inline void local_r4k_flush_kernel_vmap_range_index(void *args)
 | |
| +{
 | |
| +	/*
 | |
| +	 * Aliases only affect the primary caches so don't bother with
 | |
| +	 * S-caches or T-caches.
 | |
| +	 */
 | |
| +	r4k_blast_dcache();
 | |
| +}
 | |
| +
 | |
|  static inline void local_r4k_flush_kernel_vmap_range(void *args)
 | |
|  {
 | |
|  	struct flush_kernel_vmap_range_args *vmra = args;
 | |
| @@ -847,12 +939,8 @@ static inline void local_r4k_flush_kerne
 | |
|  	 * Aliases only affect the primary caches so don't bother with
 | |
|  	 * S-caches or T-caches.
 | |
|  	 */
 | |
| -	if (cpu_has_safe_index_cacheops && size >= dcache_size)
 | |
| -		r4k_blast_dcache();
 | |
| -	else {
 | |
| -		R4600_HIT_CACHEOP_WAR_IMPL;
 | |
| -		blast_dcache_range(vaddr, vaddr + size);
 | |
| -	}
 | |
| +	R4600_HIT_CACHEOP_WAR_IMPL;
 | |
| +	blast_dcache_range(vaddr, vaddr + size);
 | |
|  }
 | |
|  
 | |
|  static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size)
 | |
| @@ -862,7 +950,12 @@ static void r4k_flush_kernel_vmap_range(
 | |
|  	args.vaddr = (unsigned long) vaddr;
 | |
|  	args.size = size;
 | |
|  
 | |
| -	r4k_on_each_cpu(local_r4k_flush_kernel_vmap_range, &args);
 | |
| +	if (cpu_has_safe_index_cacheops && size >= dcache_size)
 | |
| +		r4k_on_each_cpu(R4K_INDEX,
 | |
| +				local_r4k_flush_kernel_vmap_range_index, NULL);
 | |
| +	else
 | |
| +		r4k_on_each_cpu(R4K_KERN, local_r4k_flush_kernel_vmap_range,
 | |
| +				&args);
 | |
|  }
 | |
|  
 | |
|  static inline void rm7k_erratum31(void)
 |