リスト要素の合計がしきい値を下回る様グルーピング

group list into lists when sum of list under threshold – StackOverflow

def grouping_lst(lst, t=50):
    out, temp, v = [], [], 0
    for tpl in lst:
        x = sum(tpl)
        if v + x > 50:
            out.append(temp)
            temp, v = [x], x
        else:
            temp.append(x)
            v += x
    if v != x: out.append(temp)
    return out


lst = [
    (14, 2), (14, 2), (16, 2), (14, 2), (15, 2), (15, 2), (21, 2), (15, 2), 
    (18, 2), (15, 2), (19, 2), (25, 2), (22, 2), (17, 2), (31, 2), (26, 2), 
    (21, 2), (25, 2), (29, 2), (33, 2), (25, 2), (23, 2), (25, 2), (19, 2), 
    (12, 2), (29, 2), (18, 2), (21, 2), (13, 2), (13, 2), (18, 2), (11, 2), 
    (12, 2), (20, 2), (23, 2), (17, 2), (14, 2), (17, 2), (12, 2), (13, 2), 
    (15, 2), (21, 2), (15, 2), (19, 2), (22, 2), (16, 2), (16, 2), (13, 2), 
    (17, 2), (18, 2), (20, 2), (18, 2), (13, 2), (13, 2), (18, 2), (14, 2), 
    (13, 2), (22, 2), (14, 2), (25, 2), (22, 2), (9, 2), (18, 2), (22, 2), 
    (19, 2), (13, 2), (14, 2), (15, 2), (13, 2), (17, 2), (21, 2), (18, 2), 
    (21, 2), (18, 2), (15, 2), (16, 2), (13, 2), (16, 2), (16, 2), (15, 2), 
    (11, 2), (24, 2), (15, 2), (12, 2), (20, 2), (21, 2), (21, 2), (14, 2), 
    (11, 2), (26, 2), (17, 2), (21, 2), (16, 2), (13, 2), (15, 2), (13, 2), 
    (12, 2), (22, 2), (16, 2), (13, 2), (13, 2), (22, 2), (12, 2), (16, 2), 
    (16, 2), (21, 2), (19, 2), (15, 2), (16, 2), (16, 2), (13, 2), (14, 2), 
    (14, 2), (20, 2), (14, 2), (20, 2), (13, 2), (19, 2), (20, 2), (17, 2), 
    (17, 2), (25, 2), (22, 2), (22, 2), (22, 2), (14, 2), (19, 2), (20, 2), 
    (16, 2), (13, 2), (19, 2), (16, 2), (12, 2), (18, 2), (20, 2), (19, 2), 
    (18, 2), (15, 2), (22, 2), (18, 2), (20, 2), (14, 2), (19, 2), (16, 2), 
    (18, 2), (28, 2), (14, 2), (17, 2), (17, 2), (23, 2), (18, 2), (24, 2), 
    (17, 2), (18, 2), (18, 2), (22, 2)
]

print(grouping_lst(lst), end='\n\n')
[[16, 16, 18], [16, 17, 17], [23, 17], [20, 17], [21, 27], [24, 19], [33], [28], [23, 27], [31], [35], [27], [25], [27, 21], [14, 31], [20, 23], [15, 15, 20], [13, 14, 22], [25, 19], [16, 19, 14], [15, 17], [23, 17], [21, 24], [18, 18], [15, 19], [20, 22], [20, 15, 15], [20, 16], [15, 24], [16, 27], [24, 11], [20, 24], [21, 15], [16, 17, 15], [19, 23], [20, 23], [20, 17], [18, 15], [18, 18], [17, 13], [26, 17], [14, 22], [23, 23], [16, 13], [28, 19], [23, 18], [15, 17, 15], [14, 24], [18, 15, 15], [24, 14], [18, 18], [23, 21], [17, 18], [18, 15, 16], [16, 22], [16, 22], [15, 21], [22, 19], [19, 27], [24, 24], [24, 16], [21, 22], [18, 15], [21, 18], [14, 20], [22, 21], [20, 17], [24, 20], [22, 16], [21, 18], [20, 30], [16, 19], [19, 25], [20, 26], [19, 20], [20, 24]]

広告
カテゴリー: 未分類 | コメントをどうぞ

配列を生成するための最速の方法

Fastest way to populate an array? – StackOverflow

Fastest wayを考えるなら,普通にループさせてCythonizeする場合,Numbaを用いる場合も検討するべきだが,非常に単純なのでNumpyでも十分に高速.最初にゼロ埋めのアレイを作れば,「N//2:」の処理は不要.

def populate_lst(N):
    return [1 if i in (0, N//2) else 2 if 0<i<N//2 else 0 for i in range(N)]


def populate_arr(N, dtype='u1'):
    out = np.zeros(N, dtype=dtype)
    out[[0, N//2]] = 1
    out[1:N//2] = 2
    return out


N = 10
populate_lst(N), populate_arr(N)
([1, 2, 2, 2, 2, 1, 0, 0, 0, 0],
 array([1, 2, 2, 2, 2, 1, 0, 0, 0, 0], dtype=uint8))
%timeit populate_lst(1_000_000)
%timeit populate_arr(1_000_000)
1 loop, best of 3: 274 ms per loop
10000 loops, best of 3: 185 µs per loop
カテゴリー: 未分類 | コメントをどうぞ

データフレーム列内の連続した値の和(任意長の範囲で合計)

extracting data from panda series as per the index of another panda series – StackOverflow

Numpyを用いるのがリーズナブルチョイス.

import pandas as pd
import numpy as np


def seq_sum_1d(arr, ind=None, lens=None):
    if ind is None:
        if lens is None:
            ind = np.concatenate(([0], np.flatnonzero(np.diff(arr))+1))
        else:
            ind = np.concatenate(([0], lens)).cumsum()[:-1]
    return np.add.reduceat(arr, ind)


def seq_cumsum_1d(a, ind=None, lens=None):
    arr = a.copy()
    if ind is None:
        if lens is None:
            ind = np.concatenate(([0], np.flatnonzero(np.diff(arr))+1))
        else:
            ind = np.concatenate(([0], lens)).cumsum()[:-1]
    arr[ind[1:]] -= np.add.reduceat(arr, ind)[:-1]
    return arr.cumsum(0)


x = pd.Series([220,340,500,600,700,900,540,60])
y = pd.Series([2,1,2,2,1])
res = pd.Series(seq_sum_1d(x.to_numpy(), lens=y.to_numpy()))
res
0     560
1     500
2    1300
3    1440
4      60
dtype: int64

 
 
関連:
データフレームの任意列でグループ化された年数を比較

pandasデータフレーム列内のNaNに基づいて累積和をリセット – 任意の条件に基づいた範囲で累積和を求める

しきい値に基づいて累積和を計算

データフレームの任意列における条件から降順の累積和を求める

データフレーム各行の連続したゼロをカウント

pandas.DataFrame.applyは(時空間効率の観点からは)使用するべきではない – 使用すべき明確な理由がない限りpandas.DataFrame.applyはリーズナブルチョイスになり得ない

特定の値のギャップを数える – 任意の要素間のカウント

シーケンシャルな値の累積和(cumsum)

ある配列から別の1次元numpyアレイの要素の出現数を調べる – 配列aについて配列bの要素をカウント

2D Numpy配列で最初の要素が重複している行の平均 – 2Dなnumpy.ndarrayの最初の要素(1列目)で各行をグルーピングして集計(平均を算出)

一貫性のある最大領域(最大ブロブ)を取得

Counting Sort(Python/Numpy)

カテゴリー: 未分類 | コメントをどうぞ

時系列データを結合(測定値の位置合わせ)

Numpy: How to best align two sorted arrays? – StackOverflow

データ長を合わせて,欠損値を前の値で補間する様な処理.

import numpy as np


def align_arr(*arr):
    sarr = np.union1d(*arr)
    return [np.maximum.accumulate(np.isin(sarr, a)*sarr) for a in arr]


a = np.array([0, 10, 12, 16, 25, 29])
b = np.array([0,  5, 10, 15, 20, 25, 30])
# [0,0,10,12,12,16,16,25,29,29]
# [0,5,10,10,15,15,20,25,25,30]
align_arr(a, b)
[array([ 0,  0, 10, 12, 12, 16, 16, 25, 29, 29]),
 array([ 0,  5, 10, 10, 15, 15, 20, 25, 25, 30])]
カテゴリー: 未分類 | コメントをどうぞ

合計が任意の値となる整数の組み合わせを求める(部分和問題)

How many ways can you make the sum of a number in javascript. How to code lt in javascript? [on hold] – StackOverflow

def comb(t, tpl=(), s=None):
    if s is None: s = set()
    for i in range(1, t+1):
        res = tuple(sorted((*tpl, i)))
        if res in s:
            break
        elif t - i == 0:
            yield res
            s.add(res)
        else:
            yield from comb(t-i, (*tpl, i), s)            


for x in comb(4):
    print('+'.join(map(str, x)))
1+1+1+1
1+1+2
1+3
2+2
4
def count_dp(S, t):
    dp = [1] + [0]*t
    for i in S:
        for j in range(t-i+1):
            dp[i+j] += dp[j]
    return dp[t]
 

def count_dfs(lst: sorted, t: int) -> int:
    if t == 0:
        return 1
    elif len(lst) == 0:
        return 0
    else:
        return sum(count_dfs(lst[1:], t-lst[0]*i) for i in range(t//lst[0]+1))


lst, t = list(range(1, 5)), 4
count_dp(lst, t), count_dfs(lst, t)
(5, 5)
import numpy as np
 

def print_allcomb(S, t, repeated=True, file=None):
    elm = [np.arange(0, t+1, s).reshape(-1, *[1]*(len(S)-1-i)) for i, s in enumerate(S)]
    idx = np.argwhere(np.sum(elm)==t)
    if not repeated:
        idx = idx[~(idx>1).any(1)]
        out = set()
    for subl in idx.tolist():
        temp = [S[i] for i, n in enumerate(subl) for _ in range(n)]
        if repeated:
            print('+'.join(map(str, temp)), file=file)
        else:
            s = '+'.join(map(str, temp))
            if s not in out:
                print(s, file=file)
            out.add(s)


print_allcomb(lst, t)
4
2+2
1+3
1+1+2
1+1+1+1
%timeit list(comb(4))
%timeit list(comb(10))

f = open('/dev/null', 'w')
%timeit print_allcomb(list(range(1, 5)), 4, file=f)
%timeit print_allcomb(list(range(1, 11)), 10, file=f)
100000 loops, best of 3: 18 µs per loop
1000 loops, best of 3: 1.36 ms per loop

10000 loops, best of 3: 53.1 µs per loop
1000 loops, best of 3: 1.65 ms per loop

 
 
関連:
硬貨の和(任意の合計値を満たす組み合わせの総数)

すべての部分集合和を求める(部分和問題)

カテゴリー: 未分類 | コメントをどうぞ

numpy.ndarrayの各行要素をデカルト積を求める様に結合

Elementwise concatenation in numpy – StackOverflow

[0, 1]
[2, 3]
[4, 5]

↓

[0, 1, 0, 1]
[0, 1, 2, 3]
[0, 1, 4, 5]
[2, 3, 0, 1]
[2, 3, 2, 3]
[2, 3, 4, 5]
[4, 5, 0, 1]
[4, 5, 2, 3]
[4, 5, 4, 5] 

numpy.repeatとnumpy.tileを用いるのがリーズナブルチョイス.

回答でデカルト積云々と書いてあるのをみて,例えば,Numpyでデカルト積(cartesian product)を求めるのに,sklearn.utils.extmath.cartesianがリーズナブルだが,これはそのままだと要素のデカルト積しか求められない.今のように,row-wiseに処理しようと思うと,各行のviewを作成して,デカルト積を求めた後で,元のviewに戻すという処理が考えられる.このケースだと,別に効率的ではないけど,他の場面でcartesian productを求めたい時に,欲しい形状に合わせながら処理するには,numpy.ndarrayのメモリ配列の理解という意味では,良い練習になるかなと.

import numpy as np
from sklearn.utils.extmath import cartesian


def np_concat(a):
    n = len(a)
    return np.concatenate((a.repeat(n, 0), np.tile(a, (n, 1))), 1)


def view1D(a):
    a = np.ascontiguousarray(a)
    void_dt = np.dtype((np.void, a.itemsize*a.shape[1]))
    return a.view(void_dt).ravel()


def np_product(a):
    A = view1D(a)
    v = np.stack((A, A))
    return cartesian(v).view('(2,1)i8').reshape(-1, 4)


a = np.arange(6).reshape(3, 2)
print(np_concat(a), end='\n\n')
print(np_product(a), end='\n\n')
[[0 1 0 1]
 [0 1 2 3]
 [0 1 4 5]
 [2 3 0 1]
 [2 3 2 3]
 [2 3 4 5]
 [4 5 0 1]
 [4 5 2 3]
 [4 5 4 5]]

[[0 1 0 1]
 [0 1 2 3]
 [0 1 4 5]
 [2 3 0 1]
 [2 3 2 3]
 [2 3 4 5]
 [4 5 0 1]
 [4 5 2 3]
 [4 5 4 5]]
def cartesian_product(*arrays):
    la = len(arrays)
    dtype = np.result_type(*arrays)
    arr = np.empty([len(a) for a in arrays] + [la], dtype=dtype)
    for i, a in enumerate(np.ix_(*arrays)):
        arr[...,i] = a
    return arr.reshape(-1, la)


a = np.arange(20000).reshape(10000, 2)
%timeit np_concat(a)
%timeit np_product(a)
%timeit idxs = cartesian_product(*[np.arange(len(a))] * 2);a[idxs].reshape(idxs.shape[0], -1)   
1 loop, best of 3: 2.79 s per loop
1 loop, best of 3: 5.13 s per loop
1 loop, best of 3: 5.21 s per loop
カテゴリー: 未分類 | コメントをどうぞ

リスト内連続要素の合計値が任意値となる数

Subarray Sum Equals K – StackOverflow

多分,O(n^2)にしかならないので,普通にループを回すのが最善.

def count(lst, k=0, c=0):
    n = len(lst)
    for i in range(n):
        v = lst[i]
        if v == k:
            c += 1
        elif v > k:
            continue
        for j in range(i+1, n):
            v += lst[j]
            if v == k:
                c += 1
            elif v > k:
                break
    return c


test_case = (([0]*10, 0), ([1, 1, 1], 2))
for l, t in test_case:
    print(count(l, t))
55
2
カテゴリー: 未分類 | コメントをどうぞ