mirror of
				https://github.com/laosb/ReadabilityFramework.git
				synced 2025-11-04 01:41:36 +00:00 
			
		
		
		
	Basics.
This commit is contained in:
		
							parent
							
								
									2640bebf90
								
							
						
					
					
						commit
						d3dcfc84de
					
				
					 13 changed files with 352 additions and 41 deletions
				
			
		
							
								
								
									
										74
									
								
								Sources/Readability/CommonMetrics.swift
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								Sources/Readability/CommonMetrics.swift
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,74 @@
 | 
			
		|||
//
 | 
			
		||||
//  CommonMetrics.swift
 | 
			
		||||
//  Readability
 | 
			
		||||
//
 | 
			
		||||
//  Created by Shibo Lyu on 2022/5/19.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
import SyllableCounter
 | 
			
		||||
 | 
			
		||||
enum RACommonMetric {
 | 
			
		||||
  case sentenceCount
 | 
			
		||||
  case wordCount
 | 
			
		||||
  case syllableCount
 | 
			
		||||
  case avgWordsPerSentence
 | 
			
		||||
  case avgSyllablesPerWord
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct RACommonMetricsCalculator {
 | 
			
		||||
  typealias Results = [RACommonMetric: Double]
 | 
			
		||||
 | 
			
		||||
  private var metrics: Set<RACommonMetric>
 | 
			
		||||
 | 
			
		||||
  init(metrics: Set<RACommonMetric>) {
 | 
			
		||||
    self.metrics = metrics
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private func countSyllables(word: String) -> Int {
 | 
			
		||||
    SyllableCounter.shared.count(word: word)
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  func calculate(on text: String) -> Results {
 | 
			
		||||
    let shouldDoSentences = metrics.contains(.sentenceCount) || metrics.contains(.avgWordsPerSentence)
 | 
			
		||||
    let shouldDoWords = metrics.contains(.wordCount)
 | 
			
		||||
      || metrics.contains(.avgWordsPerSentence)
 | 
			
		||||
      || metrics.contains(.avgSyllablesPerWord)
 | 
			
		||||
    let shouldCountSyllables = metrics.contains(.avgSyllablesPerWord) || metrics.contains(.syllableCount)
 | 
			
		||||
 | 
			
		||||
    var sentenceCount = 0
 | 
			
		||||
    var wordCount = 0
 | 
			
		||||
    var syllableCount = 0
 | 
			
		||||
 | 
			
		||||
    if shouldDoSentences {
 | 
			
		||||
      sentenceCount = RATokenizer(text, unit: .sentence).enumerateTokens { _ in }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if shouldDoWords {
 | 
			
		||||
      let tokenizer = RATokenizer(text, unit: .word)
 | 
			
		||||
      if shouldCountSyllables {
 | 
			
		||||
        wordCount = tokenizer.enumerateTokens { word in
 | 
			
		||||
          syllableCount += countSyllables(word: word)
 | 
			
		||||
        }
 | 
			
		||||
      } else {
 | 
			
		||||
        wordCount = tokenizer.enumerateTokens { _ in }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return metrics.reduce([:]) { dict, metric in
 | 
			
		||||
      var value = 0.0
 | 
			
		||||
 | 
			
		||||
      switch metric {
 | 
			
		||||
      case .syllableCount: value = Double(syllableCount)
 | 
			
		||||
      case .sentenceCount: value = Double(sentenceCount)
 | 
			
		||||
      case .wordCount: value = Double(wordCount)
 | 
			
		||||
      case .avgWordsPerSentence: value = Double(wordCount) / Double(sentenceCount)
 | 
			
		||||
      case .avgSyllablesPerWord: value = Double(syllableCount) / Double(wordCount)
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      var d = dict
 | 
			
		||||
      d[metric] = value
 | 
			
		||||
      return d
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										24
									
								
								Sources/Readability/Scorer.swift
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										24
									
								
								Sources/Readability/Scorer.swift
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,24 @@
 | 
			
		|||
//
 | 
			
		||||
//  ReadabilityScorer.swift
 | 
			
		||||
//  Readability
 | 
			
		||||
//
 | 
			
		||||
//  Created by Shibo Lyu on 2022/5/19.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
import SwiftUI
 | 
			
		||||
 | 
			
		||||
struct RAScorerMeta {
 | 
			
		||||
  let name: String
 | 
			
		||||
  let creator: String
 | 
			
		||||
  /** Should be in APA format. */
 | 
			
		||||
  let citation: String
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
protocol RAScorer {
 | 
			
		||||
  static var meta: RAScorerMeta { get }
 | 
			
		||||
  static var requiresCommonMetrics: Set<RACommonMetric>? { get }
 | 
			
		||||
 | 
			
		||||
  init()
 | 
			
		||||
  func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										28
									
								
								Sources/Readability/Scorers/FleschKincaidGrade.swift
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								Sources/Readability/Scorers/FleschKincaidGrade.swift
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,28 @@
 | 
			
		|||
//
 | 
			
		||||
//  File.swift
 | 
			
		||||
//  Readability
 | 
			
		||||
//
 | 
			
		||||
//  Created by Shibo Lyu on 2022/5/20.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
 | 
			
		||||
struct RAFleschKincaidGradeScorer: RAScorer {
 | 
			
		||||
  static let requiresCommonMetrics: Set<RACommonMetric>? = [
 | 
			
		||||
    .avgWordsPerSentence,
 | 
			
		||||
    .avgSyllablesPerWord
 | 
			
		||||
  ]
 | 
			
		||||
 | 
			
		||||
  static let meta = RAScorerMeta(
 | 
			
		||||
    name: "Flesch-Kincaid Grade",
 | 
			
		||||
    creator: "John P. Kincaid",
 | 
			
		||||
    citation: "Kincaid, J. P., Fishburne Jr, R. P., Rogers, R. L., & Chissom, B. S. (1975). Derivation of new readability formulas (automated readability index, fog count and flesch reading ease formula) for navy enlisted personnel. Naval Technical Training Command Millington TN Research Branch."
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
  func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
 | 
			
		||||
    let asl = metrics![.avgWordsPerSentence]!
 | 
			
		||||
    let asw = metrics![.avgSyllablesPerWord]!
 | 
			
		||||
 | 
			
		||||
    return (0.39 * asl) + (11.8 * asw) - 15.59
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										28
									
								
								Sources/Readability/Scorers/FleschReadingEase.swift
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								Sources/Readability/Scorers/FleschReadingEase.swift
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,28 @@
 | 
			
		|||
//
 | 
			
		||||
//  File.swift
 | 
			
		||||
//  Readability
 | 
			
		||||
//
 | 
			
		||||
//  Created by Shibo Lyu on 2022/5/19.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
 | 
			
		||||
struct RAFleschReadingEaseScorer: RAScorer {
 | 
			
		||||
  static let requiresCommonMetrics: Set<RACommonMetric>? = [
 | 
			
		||||
    .avgWordsPerSentence,
 | 
			
		||||
    .avgSyllablesPerWord
 | 
			
		||||
  ]
 | 
			
		||||
 | 
			
		||||
  static let meta = RAScorerMeta(
 | 
			
		||||
    name: "Flesch Reading Ease",
 | 
			
		||||
    creator: "Rudolf Flesch",
 | 
			
		||||
    citation: "Flesch, R. (1948). A new readability yardstick. Journal of applied psychology, 32(3), 221."
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
  func score(_ text: String, metrics: RACommonMetricsCalculator.Results?) -> Double {
 | 
			
		||||
    let asl = metrics![.avgWordsPerSentence]!
 | 
			
		||||
    let asw = metrics![.avgSyllablesPerWord]!
 | 
			
		||||
 | 
			
		||||
    return 206.835 - (1.015 * asl) - (84.6 * asw)
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										57
									
								
								Sources/Readability/ScoringTask.swift
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								Sources/Readability/ScoringTask.swift
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,57 @@
 | 
			
		|||
//
 | 
			
		||||
//  ScoringTask.swift
 | 
			
		||||
//  Readability
 | 
			
		||||
//
 | 
			
		||||
//  Created by Shibo Lyu on 2022/5/19.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
 | 
			
		||||
class RAScoringTask {
 | 
			
		||||
  typealias Results = [Scorer: Double]
 | 
			
		||||
 | 
			
		||||
  enum Scorer: Hashable, CaseIterable, Comparable {
 | 
			
		||||
    case fleschReadingEase
 | 
			
		||||
    case fleschKincaidGrade
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static let availableScorers: [Scorer: RAScorer.Type] = [
 | 
			
		||||
    .fleschReadingEase: RAFleschReadingEaseScorer.self,
 | 
			
		||||
    .fleschKincaidGrade: RAFleschKincaidGradeScorer.self
 | 
			
		||||
  ]
 | 
			
		||||
 | 
			
		||||
  var scorersToRun: Set<Scorer> = Set(Scorer.allCases)
 | 
			
		||||
 | 
			
		||||
  var scorers: [Scorer: RAScorer.Type] {
 | 
			
		||||
    scorersToRun.reduce([:]) { partialResult, scorer in
 | 
			
		||||
      var d = partialResult
 | 
			
		||||
      d[scorer] = Self.availableScorers[scorer]!
 | 
			
		||||
      return d
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  var commonMetricsToGet: Set<RACommonMetric> {
 | 
			
		||||
    scorers
 | 
			
		||||
      .map { $0.1.requiresCommonMetrics }
 | 
			
		||||
      .reduce([]) { partialResult, scorerMetrics in
 | 
			
		||||
        guard let metrics = scorerMetrics else { return partialResult }
 | 
			
		||||
        return partialResult.union(metrics)
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  func run(on text: String) -> Results {
 | 
			
		||||
    var commonMetrics: RACommonMetricsCalculator.Results? = nil
 | 
			
		||||
 | 
			
		||||
    if !commonMetricsToGet.isEmpty {
 | 
			
		||||
      commonMetrics = RACommonMetricsCalculator(metrics: commonMetricsToGet)
 | 
			
		||||
        .calculate(on: text)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return scorers.reduce([:]) { partialResult, scorerPair in
 | 
			
		||||
      var d = partialResult
 | 
			
		||||
      let (scorer, Scorer) = scorerPair
 | 
			
		||||
      d[scorer] = Scorer.init().score(text, metrics: commonMetrics)
 | 
			
		||||
      return d
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										39
									
								
								Sources/Readability/Tokenization.swift
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								Sources/Readability/Tokenization.swift
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,39 @@
 | 
			
		|||
//
 | 
			
		||||
//  Tokenization.swift
 | 
			
		||||
//  Readability
 | 
			
		||||
//
 | 
			
		||||
//  Created by Shibo Lyu on 2022/5/19.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
import Foundation
 | 
			
		||||
import NaturalLanguage
 | 
			
		||||
 | 
			
		||||
struct RATokenizer {
 | 
			
		||||
  private var unit: NLTokenUnit
 | 
			
		||||
  private var text: String
 | 
			
		||||
 | 
			
		||||
  private var tokenizer: NLTokenizer
 | 
			
		||||
 | 
			
		||||
  init (_ text: String, unit: NLTokenUnit, language: NLLanguage? = nil) {
 | 
			
		||||
    self.text = text
 | 
			
		||||
    self.unit = unit
 | 
			
		||||
    tokenizer = NLTokenizer(unit: unit)
 | 
			
		||||
    if let language = language {
 | 
			
		||||
      tokenizer.setLanguage(language)
 | 
			
		||||
    }
 | 
			
		||||
    tokenizer.string = text
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Returns token count. */
 | 
			
		||||
  func enumerateTokens(using callBack: (String) -> Void) -> Int {
 | 
			
		||||
    var tokenCount = 0
 | 
			
		||||
    tokenizer.enumerateTokens(in: text.startIndex..<text.endIndex) { range, _ in
 | 
			
		||||
      tokenCount += 1
 | 
			
		||||
      callBack(String(text[range]))
 | 
			
		||||
 | 
			
		||||
      return true
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return tokenCount
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue