搜索
线性搜索(Linear Search)
func linearSearch<T: Equatable>(_ array: [T], _ object: T) -> Int? {
for (index, obj) in array.enumerated() where obj == object {
return index
}
return nil
}
复制代码
let array = [5,2,4,7]
linearSearch(array, 2) // This will return 1
复制代码
复杂度 O(n)
二分查找(Binary Search)
递归实现
func binarySearch<T: Comparable>(_ a: [T], key: T, range: Range<Int>) -> Int? {
if range.lowerBound >= range.upperBound {
// if we get here, then the search key is not present in the array.
return nil
} else {
// Calculate where to split ehr array.
let midIndex = range.lowerBound + (range.upperBound - range.lowerBound) / 2
// Is the search key in the left half?
if a[midIndex] > key {
return binarySearch(a, key: key, range: range.lowerBound ..< midIndex)
// Is the search key in the right half?
} else if a[midIndex] < key {
return binarySearch(a, key: key, range: midIndex + 1 ..< range.upperBound)
// if we get here, then we've found the search key!
} else {
return midIndex
}
}
}
复制代码
let number = [2,3,5,7,11,13,17,17,23,29,31,37,41,43,47,53,59,61,67] // 有序数组
binarySearch(number, key: 43, range: 0 ..< number.count) // gives 13
复制代码
有点实现中,可能会这样计算
midIndex = (lowerBound + upperBound) / 2
。lowerBound + upperBound
可能会超出Int
的最大值,从而引发错误。
循环实现
func binarySearch<T: Comparable>(_ a: [T], key: T) -> Int? {
var lowerBound = 0
var upperBound = a.count
while lowerBound < upperBound {
let midIndex = lowerBound + (upperBound - lowerBound) / 2
if a[midIndex] == key {
return midIndex
} else if a[midIndex] < key {
lowerBound = midIndex + 1
} else {
upperBound = midIndex
}
}
return nil
}
复制代码
let numbers = [2,3,5,7,11,13,17,17,23,29,31,37,41,43,47,53,59,61,67]
binarySearch(number, key: 43) // gives 13
复制代码
前置条件:数组是有序的,复杂度 O(log n)
计数(Count Occurrences)
无序数组的话,就是线性遍历,统计出现次数了。
如果数组是有序的,可以用2个二分查找,一个找特定元素开始的索引,一个找结束的索引
func countOccurences<T: Comparable>(of key: T, in array: [T]) -> Int {
var leftBoundary: Int {
var low = 0
var high = array.count
while low < high {
let midIndex = low + (high - low) / 2
if a[midIndex] < key {
low = midIndex + 1
} else {
high = midIndex
}
}
return low
}
var rightBoundary: Int {
var low = 0
var high = array.count
while low < high {
let midIndex = low + (high - low) / 2
if a[midIndex] > key {
high = midIndex
} else {
low = midIndex + 1
}
}
return low
}
return rightBoundary = leftBoundary
}
复制代码
let a = [0,1,1,3,3,3,3,6,8,10,11,11] // 有序数组
countOccurrences(of: 3, in: a) // return 4
复制代码
选择最小值/最大值(Select Minimum / Maximum)
func minimum<T: Comparable>(_ array: [T]) -> T? {
guard var minmum = array.first else {
return nil
}
for element in array.dropFirst() {
minmum = element < minimum ? element : minimum
}
return minimum
}
func maximum<T: Comparable>(_ array: [T]) -> T? {
guard var maximum = array.first else {
return nil
}
for element in array.dropFirst() {
maximum = element > maximum ? element : maximum
}
return maximum
}
复制代码
let array = [8,3,9,4,6]
minimum(array) // This will return 3
maximum(array) // This will return 9
复制代码
Swift 标准库包含了满足 Sequence 协议的类型的返回最大元素/最小元素的方法
let array = [8,3,9,4,6]
array.minElement()
array.maxElement()
复制代码
最大值和最小值一起找的话,我们可以成对的比较
func minimumMaximum<T: Comparable>(_ array: [T]) -> (minimum: T, maximum: T)? {
guard var minimum = array.first else {
return nil
}
var maximum = minimum
// if 'array' has an odd number of items,
// let 'minimum' or 'maximum' deal with the leftover
let start = array.count % 2 // 1 if odd, skipping the first element
for i in stride(from: start, to: array.count, by: 2) {
let pair = (array[i], array[i + 1])
if pair.0 > pair.1 {
if pair.0 > maximum {
maximum = pair.0
}
if pair.1 < minimum {
minimum = pair.1
}
} else {
if pair.1 > maximum {
maximum = pair.1
}
if pair.0 < minimum {
minimum = pair.0
}
}
}
return (minimum, maximum)
}
复制代码
let result = minimumMaximum(array)!
result.minimum // This will return 3
result.maximum // This will return 9
复制代码
第k大元素(k-th Largest Element)
朴素的方法,因为要先排序,所以时间复杂度是 O(n log n),并且使用了额外的 O(n) 的空间。
func kthLargest(a: [Int], k: Int) -> Int? {
let len = a.count
if k > 0 && k <= len {
let sorted = a.sorted()
return sorted[len - k]
} else {
return nil
}
}
复制代码
快一点的方法,我们组合使用二分查找和快排的思路
public func randomizedSelect<T: Comparable>(_ array: [T], order k: Int) -> T {
var a = array
func randomPivot<T: Comparable>(_ a: inout [T], _ low: Int, _ high: Int) -> T {
let pivotIndex = random(min: low, max: high)
a.swapAt(povotIndex, high)
return a[high]
}
func randomizedPartition<T: Comparable>(_ a: inout [T],
_ low: Int, _ high: Int) -> T {
let pivot = randomPivot(&a, low, high)
var i = low
for j in low..<high {
if a[j] <= pivot {
a.swapAt(i, j)
i += 1
}
}
a.swapAt(i, high)
return i
}
func randomizedSelect<T: Comparable>)(_ a: inout [T], _ low: Int,
_ high: Int, _ k: Int) -> T {
if low < high {
let p = randomizedPartition(&a, low, high)
if k == p {
return a[p]
} else if k < p {
return randomizedSelect(&a, low, p - 1, k)
} else {
return randomizedSelect(&a, p + 1, high, k)
}
} else {
return a[low]
}
}
precondition(a.count > 0)
return randomizedSelect(&a, 0, a.count - 1, k)
}
复制代码
书中没有提,但是这种 k-th 问题,还有一个常用方法是维护一个最大/最小堆。
抽样(Selection Sampling)
func select<T>(from a: [T], count k: Int) -> [T] {
var a = a
for i in 0..<k {
let r = random(min: i, max: a.count - 1)
if i != r {
swap(&a[i], &a[r])
}
}
return Array(a[0..<k])
}
复制代码
显然,这个方法的复杂度是 O(k)
另一种算法,蓄水池算法(reservoir sampling)
func reservoirSample<T>(from a: [T], count k: Int) -> [T] {
precondition(a.count >= k)
var result = [T]() // 1
for i in 0..<k {
result.append(a[i])
}
for i in k..<a.count { // 2
let j = random(min: 0, max: i)
if j < k {
result[j] = a[i]
}
}
return result
}
复制代码
在数组很大,且在遍历之前,无法获取数组大小的情况下,就是蓄水池算法的应用了。
这两个方法的一个缺点是,随机抽样出来的序列不会保持之前的顺序。
func select<T>(form a: [T], count requested: Int) -> [T] {
var examined = 0
var selected = 0
var b = [T]()
while selected < request { // 1
let r = Double(arc4random()) / 0x100000000 // 2
let leftToExamine = a.count - examined // 3
let leftToAdd = requested - selected
if Double(leftToExamine) * r < Double(leftToAdd) { // 4
selected += 1
b.append(a[examined])
}
examined += 1
}
return b
}
复制代码
刚看到有点懵,反应过来,这不就是 抽签原理 么。复杂度 O(n)。
并查集(Union-Find)
并查集是一种树型数据结构,用来处理一些**不相交集(disjoint subsets)**的合并及查询问题。
并查集最常见的应用是追踪无向图的连接组件,还用于实现Kruskal算法的一个高效版本来找到一个图的最小生成树。
并查集有多种实现方式,这里我们采用 Weighted Quick Union
public struct UnionFind<T: Hashable> {
private var index = [T: Int]()
private var parent = [Int]()
private var size = [Int]()
}
复制代码
我们的并查集实际上是一个森林,其中每个子集是一棵树。
添加集合
public mutating func addSetWith(_ element: T) {
index[element] = parent.count // 1
parent.append(parent.count) // 2
size.append(1) // 3
}
复制代码
查找
public mutating func setOf(_ element: T) -> Int? {
if let indexOfElement = index[element] {
return setByIndex(indexOfElement)
} else {
return nil
}
}
复制代码
private mutating func setByIndex(_ index: Int) -> Int {
if parent[index] == index { // 1
return index
} else {
parent[index] = setByIndex(parent[index]) // 2
return parent[index] // 3
}
}
复制代码
检查两个元素是否在同一个集合中的辅助方法:
public mutating func inSameSet(_ firstElement: T, and secondElement: T) -> Bool {
if let firstSet = setOf(firstElement), let secondSet = setOf(secondElement) {
return firstSet == secondSet
} else {
return false
}
}
复制代码
合并
public mutating func unionSetsContaining(_ firstElement: T,and secondElement: T) {
if let firstSet = setOf(firstElement), let secondSet = setOf(secondElement) {
if firstSet != secondSet {
if size[firstSet] < size[secondSet] {
parent[firstSet] = secondSet
size[secondSet] += size[FirstSet]
} else {
parent[secondSet] = firstSet
size[firstSet] += size[secondSet]
}
}
}
}
复制代码
路径压缩
private mutating func setByIndex(_ index: Int) -> Int {
if index != parent[index] {
// Updating parent index while looking up the index of parent.
parent[index] = setByIndex(parent[index])
}
return parent[index]
}
复制代码
路径压缩有助于保持树的平坦,因此查找的可能只需要几乎**O(1)**时间
复杂度总结
处理N个对象
Data Structure | Union | Find |
---|---|---|
Quick Find | N | 1 |
Quick Union | Tree height | Tree height |
Weighted Quick Union | lgN | lgN |
Weighted Quick Union + Path Compression | very close, but not O(1) | very close, but not O(1) |
在N个对象上处理M的合并操作
Algorithm | Worst-case time |
---|---|
Quick Find | M N |
Quick Union | M N |
Weighted Quick Union | N + M lgN |
Weighted Quick Union + Path Compression | (M + N) lgN |