Kmeans as mixed integer programming

Good day, I need help with getting getting the results as shown here: (http://yetanothermathprogrammingconsultant.blogspot.com/2021/05/clustering-models.html). Here is the code I tried but the distances from the optimised solution are zeros when I expect only the one's not assigned to any clusters to be zero. My question is why are distances from assigned clusters also zero?

dataArray = np.array(data, dtype=float)
clusters = 3


def getM(dataArray):
    nrOfPoints = len(dataArray)

    MList = [0] * nrOfPoints

    for i in range(0, nrOfPoints):
        for j in range(0, nrOfPoints):
            M = (np.linalg.norm(dataArray[i] - dataArray[j])) ** 2

            if M > MList[i]:
                MList[i] = M

    return MList


# solve problem with gurobi
# def solveProblem(dataArray, dimension, clusters, n):


# calculate M
MList = getM(dataArray)


# MAKE GUROBI MODEL
model = gp.Model()


# ADD VARIABLES
# store centroids coordinates in dictionary
centroids = {}

for category in range(clusters):
    centroids[category] = {}
    for dim in range(dimension):
        centroids[category][dim] = model.addVar(
            lb=0, ub=gp.GRB.INFINITY, name="c_" + str(category) + "_p" + str(dim)
        )


# store binary variables of every data point in dictionary
b = {}
for data in range(n):
    b[data] = {}

    for category in range(clusters):
        b[data][category] = model.addVar(
            0, 1, vtype=gp.GRB.BINARY, name="b_" + str(category) + "_d" + str(data)
        )


# store distances between cluster center and data points in dictionary
r = {}
for data in range(n):
    r[data] = model.addVar(0, MList[data], name="r_" + str(data))


# UPDATE MODEL to include variables
model.update()


# ADD CONSTRAINTS
for data in range(n):
    # every data point must be assigned to one center
    # sum of binary variables must be eaqual to one
    model.addConstr(sum(b[data][category] for category in range(clusters)) == 1)


# ADD CONSTRAINT: Each cluster must have at least one point assigned to it
for category in range(clusters):
    model.addConstr(sum(b[data][category] for data in range(n)) >= 1)


# Quadratic constraints for the distances (big M)
for category in range(clusters):
    model.addQConstr(
        sum(
            (dataArray[data][dim] - centroids[category][dim])
            * (dataArray[data][dim] - centroids[category][dim])
            for dim in range(dimension)
        )
        <= r[data] + MList[data] * (1 - b[data][category])
    )


# ADD SYMMETRY BREAKING CONSTRAINT
# The following constraints removes some symmetries, by imposing an ordering of the centroids along the first axis
# The constraints are of the type: the first coordinate of the first center must be smaller than that of the second
# second center and so on. Without these constraints we can create two different equally good solutions by switching
# the location of two centroids.
for category in range(clusters - 1):
    model.addConstr(centroids[category][0] <= centroids[category + 1][0])


# DEFINE OBJECTIVE FUNCTION
model.setObjective(sum(r[data] for data in range(n)))

# SET TIME LIMIT
model.setParam(
    "TimeLimit", 3600
)  # you don't want to wait much longer! Consider test that are faster

# UPDATE MODEL
model.update()

# SOLVE
model.optimize()

Kmeans as mixed integer programming

コメント

お探しのものを見つけられませんでしたか？