第11章 PMML モデルの例

PMML は、XML スキーマ を定義しており、このスキーマを使用することで、PMML モデルが異なる PMML 準拠のプラットフォーム間で使用できるようになります。PMML 仕様では、プロデューサーとコンシューマー適合を満たしていることが前提で、複数のソフトウェアプラットフォームが同じファイルを使用してオーサリング、テスト、および実稼働環境での実行を可能にします。

以下は、PMML 回帰、スコアカード、ツリー、マイニング、およびクラスターリングモデルの例です。これらの例では、Red Hat Process Automation Manager のデシジョンサービスと統合可能なモデルでサポートされているタイプを紹介します。

PMML の詳細例は、DMG の PMML Sample Files ページを参照してください。

PMML 回帰モデルの例

<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.dmg.org/PMML-4_2">
  <Header copyright="JBoss"/>
  <DataDictionary numberOfFields="5">
    <DataField dataType="double" name="fld1" optype="continuous"/>
    <DataField dataType="double" name="fld2" optype="continuous"/>
    <DataField dataType="string" name="fld3" optype="categorical">
      <Value value="x"/>
      <Value value="y"/>
    </DataField>
    <DataField dataType="double" name="fld4" optype="continuous"/>
    <DataField dataType="double" name="fld5" optype="continuous"/>
  </DataDictionary>
  <RegressionModel algorithmName="linearRegression" functionName="regression" modelName="LinReg" normalizationMethod="logit" targetFieldName="fld4">
    <MiningSchema>
      <MiningField name="fld1"/>
      <MiningField name="fld2"/>
      <MiningField name="fld3"/>
      <MiningField name="fld4" usageType="predicted"/>
      <MiningField name="fld5" usageType="target"/>
    </MiningSchema>
    <RegressionTable intercept="0.5">
      <NumericPredictor coefficient="5" exponent="2" name="fld1"/>
      <NumericPredictor coefficient="2" exponent="1" name="fld2"/>
      <CategoricalPredictor coefficient="-3" name="fld3" value="x"/>
      <CategoricalPredictor coefficient="3" name="fld3" value="y"/>
      <PredictorTerm coefficient="0.4">
        <FieldRef field="fld1"/>
        <FieldRef field="fld2"/>
      </PredictorTerm>
    </RegressionTable>
  </RegressionModel>
</PMML>

PMML スコアカードモデルの例

<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.dmg.org/PMML-4_2">
  <Header copyright="JBoss"/>
  <DataDictionary numberOfFields="4">
    <DataField name="param1" optype="continuous" dataType="double"/>
    <DataField name="param2" optype="continuous" dataType="double"/>
    <DataField name="overallScore" optype="continuous" dataType="double" />
    <DataField name="finalscore" optype="continuous" dataType="double" />
  </DataDictionary>
  <Scorecard modelName="ScorecardCompoundPredicate" useReasonCodes="true" isScorable="true" functionName="regression"    baselineScore="15" initialScore="0.8" reasonCodeAlgorithm="pointsAbove">
    <MiningSchema>
      <MiningField name="param1" usageType="active" invalidValueTreatment="asMissing">
      </MiningField>
      <MiningField name="param2" usageType="active" invalidValueTreatment="asMissing">
      </MiningField>
      <MiningField name="overallScore" usageType="target"/>
      <MiningField name="finalscore" usageType="predicted"/>
    </MiningSchema>
    <Characteristics>
      <Characteristic name="ch1" baselineScore="50" reasonCode="reasonCh1">
        <Attribute partialScore="20">
          <SimplePredicate field="param1" operator="lessThan" value="20"/>
        </Attribute>
        <Attribute partialScore="100">
          <CompoundPredicate booleanOperator="and">
            <SimplePredicate field="param1" operator="greaterOrEqual" value="20"/>
            <SimplePredicate field="param2" operator="lessOrEqual" value="25"/>
          </CompoundPredicate>
        </Attribute>
        <Attribute partialScore="200">
          <CompoundPredicate booleanOperator="and">
            <SimplePredicate field="param1" operator="greaterOrEqual" value="20"/>
            <SimplePredicate field="param2" operator="greaterThan" value="25"/>
          </CompoundPredicate>
        </Attribute>
      </Characteristic>
      <Characteristic name="ch2" reasonCode="reasonCh2">
        <Attribute partialScore="10">
          <CompoundPredicate booleanOperator="or">
            <SimplePredicate field="param2" operator="lessOrEqual" value="-5"/>
            <SimplePredicate field="param2" operator="greaterOrEqual" value="50"/>
          </CompoundPredicate>
        </Attribute>
        <Attribute partialScore="20">
          <CompoundPredicate booleanOperator="and">
            <SimplePredicate field="param2" operator="greaterThan" value="-5"/>
            <SimplePredicate field="param2" operator="lessThan" value="50"/>
          </CompoundPredicate>
        </Attribute>
      </Characteristic>
    </Characteristics>
  </Scorecard>
</PMML>

PMML ツリーモデルの例

<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.dmg.org/PMML-4_2">
  <Header copyright="JBOSS"/>
  <DataDictionary numberOfFields="5">
    <DataField dataType="double" name="fld1" optype="continuous"/>
    <DataField dataType="double" name="fld2" optype="continuous"/>
    <DataField dataType="string" name="fld3" optype="categorical">
      <Value value="true"/>
      <Value value="false"/>
    </DataField>
    <DataField dataType="string" name="fld4" optype="categorical">
      <Value value="optA"/>
      <Value value="optB"/>
      <Value value="optC"/>
    </DataField>
    <DataField dataType="string" name="fld5" optype="categorical">
      <Value value="tgtX"/>
      <Value value="tgtY"/>
      <Value value="tgtZ"/>
    </DataField>
  </DataDictionary>
  <TreeModel functionName="classification" modelName="TreeTest">
    <MiningSchema>
      <MiningField name="fld1"/>
      <MiningField name="fld2"/>
      <MiningField name="fld3"/>
      <MiningField name="fld4"/>
      <MiningField name="fld5" usageType="predicted"/>
    </MiningSchema>
    <Node score="tgtX">
      <True/>
      <Node score="tgtX">
        <SimplePredicate field="fld4" operator="equal" value="optA"/>
        <Node score="tgtX">
          <CompoundPredicate booleanOperator="surrogate">
            <SimplePredicate field="fld1" operator="lessThan" value="30.0"/>
            <SimplePredicate field="fld2" operator="greaterThan" value="20.0"/>
          </CompoundPredicate>
          <Node score="tgtX">
            <SimplePredicate field="fld2" operator="lessThan" value="40.0"/>
          </Node>
          <Node score="tgtZ">
            <SimplePredicate field="fld2" operator="greaterOrEqual" value="10.0"/>
          </Node>
        </Node>
        <Node score="tgtZ">
          <CompoundPredicate booleanOperator="or">
            <SimplePredicate field="fld1" operator="greaterOrEqual" value="60.0"/>
            <SimplePredicate field="fld1" operator="lessOrEqual" value="70.0"/>
          </CompoundPredicate>
          <Node score="tgtZ">
            <SimpleSetPredicate booleanOperator="isNotIn" field="fld4">
              <Array type="string">optA optB</Array>
            </SimpleSetPredicate>
          </Node>
        </Node>
      </Node>
      <Node score="tgtY">
        <CompoundPredicate booleanOperator="or">
          <SimplePredicate field="fld4" operator="equal" value="optA"/>
          <SimplePredicate field="fld4" operator="equal" value="optC"/>
        </CompoundPredicate>
        <Node score="tgtY">
          <CompoundPredicate booleanOperator="and">
            <SimplePredicate field="fld1" operator="greaterThan" value="10.0"/>
            <SimplePredicate field="fld1" operator="lessThan" value="50.0"/>
            <SimplePredicate field="fld4" operator="equal" value="optA"/>
            <SimplePredicate field="fld2" operator="lessThan" value="100.0"/>
            <SimplePredicate field="fld3" operator="equal" value="false"/>
          </CompoundPredicate>
        </Node>
        <Node score="tgtZ">
          <CompoundPredicate booleanOperator="and">
            <SimplePredicate field="fld4" operator="equal" value="optC"/>
            <SimplePredicate field="fld2" operator="lessThan" value="30.0"/>
          </CompoundPredicate>
        </Node>
      </Node>
    </Node>
  </TreeModel>
</PMML>

PMML マイニングモデルの例 (modelChain)

<PMML version="4.2" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2-1/pmml-4-2.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"   xmlns="http://www.dmg.org/PMML-4_2">
  <Header>
    <Application name="Drools-PMML" version="7.0.0-SNAPSHOT" />
  </Header>
  <DataDictionary numberOfFields="7">
    <DataField name="age" optype="continuous" dataType="double" />
    <DataField name="occupation" optype="categorical" dataType="string">
      <Value value="SKYDIVER" />
      <Value value="ASTRONAUT" />
      <Value value="PROGRAMMER" />
      <Value value="TEACHER" />
      <Value value="INSTRUCTOR" />
    </DataField>
    <DataField name="residenceState" optype="categorical" dataType="string">
      <Value value="AP" />
      <Value value="KN" />
      <Value value="TN" />
    </DataField>
    <DataField name="validLicense" optype="categorical" dataType="boolean" />
    <DataField name="overallScore" optype="continuous" dataType="double" />
    <DataField name="grade" optype="categorical" dataType="string">
      <Value value="A" />
      <Value value="B" />
      <Value value="C" />
      <Value value="D" />
      <Value value="F" />
    </DataField>
    <DataField name="qualificationLevel" optype="categorical" dataType="string">
      <Value value="Unqualified" />
      <Value value="Barely" />
      <Value value="Well" />
      <Value value="Over" />
    </DataField>
  </DataDictionary>
  <MiningModel modelName="SampleModelChainMine" functionName="classification">
    <MiningSchema>
      <MiningField name="age" />
      <MiningField name="occupation" />
      <MiningField name="residenceState" />
      <MiningField name="validLicense" />
      <MiningField name="overallScore" />
      <MiningField name="qualificationLevel" usageType="target"/>
    </MiningSchema>
    <Segmentation multipleModelMethod="modelChain">
      <Segment id="1">
        <True />
        <Scorecard modelName="Sample Score 1" useReasonCodes="true" isScorable="true" functionName="regression"               baselineScore="0.0" initialScore="0.345">
          <MiningSchema>
            <MiningField name="age" usageType="active" invalidValueTreatment="asMissing" />
            <MiningField name="occupation" usageType="active" invalidValueTreatment="asMissing" />
            <MiningField name="residenceState" usageType="active" invalidValueTreatment="asMissing" />
            <MiningField name="validLicense" usageType="active" invalidValueTreatment="asMissing" />
            <MiningField name="overallScore" usageType="predicted" />
          </MiningSchema>
          <Output>
            <OutputField name="calculatedScore" displayName="Final Score" dataType="double" feature="predictedValue"                     targetField="overallScore" />
          </Output>
          <Characteristics>
            <Characteristic name="AgeScore" baselineScore="0.0" reasonCode="ABZ">
              <Extension name="cellRef" value="$B$8" />
              <Attribute partialScore="10.0">
                <Extension name="cellRef" value="$C$10" />
                <SimplePredicate field="age" operator="lessOrEqual" value="5" />
              </Attribute>
              <Attribute partialScore="30.0" reasonCode="CX1">
                <Extension name="cellRef" value="$C$11" />
                <CompoundPredicate booleanOperator="and">
                  <SimplePredicate field="age" operator="greaterOrEqual" value="5" />
                  <SimplePredicate field="age" operator="lessThan" value="12" />
                </CompoundPredicate>
              </Attribute>
              <Attribute partialScore="40.0" reasonCode="CX2">
                <Extension name="cellRef" value="$C$12" />
                <CompoundPredicate booleanOperator="and">
                  <SimplePredicate field="age" operator="greaterOrEqual" value="13" />
                  <SimplePredicate field="age" operator="lessThan" value="44" />
                </CompoundPredicate>
              </Attribute>
              <Attribute partialScore="25.0">
                <Extension name="cellRef" value="$C$13" />
                <SimplePredicate field="age" operator="greaterOrEqual" value="45" />
              </Attribute>
            </Characteristic>
            <Characteristic name="OccupationScore" baselineScore="0.0">
              <Extension name="cellRef" value="$B$16" />
              <Attribute partialScore="-10.0" reasonCode="CX2">
                <Extension name="description" value="skydiving is a risky occupation" />
                <Extension name="cellRef" value="$C$18" />
                <SimpleSetPredicate field="occupation" booleanOperator="isIn">
                  <Array n="2" type="string">SKYDIVER ASTRONAUT</Array>
                </SimpleSetPredicate>
              </Attribute>
              <Attribute partialScore="10.0">
                <Extension name="cellRef" value="$C$19" />
                <SimpleSetPredicate field="occupation" booleanOperator="isIn">
                  <Array n="2" type="string">TEACHER INSTRUCTOR</Array>
                </SimpleSetPredicate>
              </Attribute>
              <Attribute partialScore="5.0">
                <Extension name="cellRef" value="$C$20" />
                <SimplePredicate field="occupation" operator="equal" value="PROGRAMMER" />
              </Attribute>
            </Characteristic>
            <Characteristic name="ResidenceStateScore" baselineScore="0.0" reasonCode="RES">
              <Extension name="cellRef" value="$B$22" />
              <Attribute partialScore="-10.0">
                <Extension name="cellRef" value="$C$24" />
                <SimplePredicate field="residenceState" operator="equal" value="AP" />
              </Attribute>
              <Attribute partialScore="10.0">
                <Extension name="cellRef" value="$C$25" />
                <SimplePredicate field="residenceState" operator="equal" value="KN" />
              </Attribute>
              <Attribute partialScore="5.0">
                <Extension name="cellRef" value="$C$26" />
                <SimplePredicate field="residenceState" operator="equal" value="TN" />
              </Attribute>
            </Characteristic>
            <Characteristic name="ValidLicenseScore" baselineScore="0.0">
              <Extension name="cellRef" value="$B$28" />
              <Attribute partialScore="1.0" reasonCode="LX00">
                <Extension name="cellRef" value="$C$30" />
                <SimplePredicate field="validLicense" operator="equal" value="true" />
              </Attribute>
              <Attribute partialScore="-1.0" reasonCode="LX00">
                <Extension name="cellRef" value="$C$31" />
                <SimplePredicate field="validLicense" operator="equal" value="false" />
              </Attribute>
            </Characteristic>
          </Characteristics>
        </Scorecard>
      </Segment>
      <Segment id="2">
        <True />
        <TreeModel modelName="SampleTree" functionName="classification" missingValueStrategy="lastPrediction" noTrueChildStrategy="returnLastPrediction">
          <MiningSchema>
            <MiningField name="age" usageType="active" />
            <MiningField name="validLicense" usageType="active" />
            <MiningField name="calculatedScore" usageType="active" />
            <MiningField name="qualificationLevel" usageType="predicted" />
          </MiningSchema>
          <Output>
            <OutputField name="qualification" displayName="Qualification Level" dataType="string" feature="predictedValue"                     targetField="qualificationLevel" />
          </Output>
          <Node score="Well" id="1">
            <True/>
            <Node score="Barely" id="2">
              <CompoundPredicate booleanOperator="and">
                <SimplePredicate field="age" operator="greaterOrEqual" value="16" />
                <SimplePredicate field="validLicense" operator="equal" value="true" />
              </CompoundPredicate>
              <Node score="Barely" id="3">
                <SimplePredicate field="calculatedScore" operator="lessOrEqual" value="50.0" />
              </Node>
              <Node score="Well" id="4">
                <CompoundPredicate booleanOperator="and">
                  <SimplePredicate field="calculatedScore" operator="greaterThan" value="50.0" />
                  <SimplePredicate field="calculatedScore" operator="lessOrEqual" value="60.0" />
                </CompoundPredicate>
              </Node>
              <Node score="Over" id="5">
                <SimplePredicate field="calculatedScore" operator="greaterThan" value="60.0" />
              </Node>
            </Node>
            <Node score="Unqualified" id="6">
              <CompoundPredicate booleanOperator="surrogate">
                <SimplePredicate field="age" operator="lessThan" value="16" />
                <SimplePredicate field="calculatedScore" operator="lessOrEqual" value="40.0" />
                <True />
              </CompoundPredicate>
            </Node>
          </Node>
        </TreeModel>
      </Segment>
    </Segmentation>
  </MiningModel>
</PMML>

PMML クラスターリングモデルの例

<?xml version="1.0" encoding="UTF-8"?>
<PMML version="4.1" xmlns="http://www.dmg.org/PMML-4_1">
  <Header>
    <Application name="KNIME" version="2.8.0"/>
  </Header>
  <DataDictionary numberOfFields="5">
    <DataField name="sepal_length" optype="continuous" dataType="double">
      <Interval closure="closedClosed" leftMargin="4.3" rightMargin="7.9"/>
    </DataField>
    <DataField name="sepal_width" optype="continuous" dataType="double">
      <Interval closure="closedClosed" leftMargin="2.0" rightMargin="4.4"/>
    </DataField>
    <DataField name="petal_length" optype="continuous" dataType="double">
      <Interval closure="closedClosed" leftMargin="1.0" rightMargin="6.9"/>
    </DataField>
    <DataField name="petal_width" optype="continuous" dataType="double">
      <Interval closure="closedClosed" leftMargin="0.1" rightMargin="2.5"/>
    </DataField>
    <DataField name="class" optype="categorical" dataType="string"/>
  </DataDictionary>
  <ClusteringModel modelName="SingleIrisKMeansClustering" functionName="clustering" modelClass="centerBased" numberOfClusters="4">
    <MiningSchema>
      <MiningField name="sepal_length" invalidValueTreatment="asIs"/>
      <MiningField name="sepal_width" invalidValueTreatment="asIs"/>
      <MiningField name="petal_length" invalidValueTreatment="asIs"/>
      <MiningField name="petal_width" invalidValueTreatment="asIs"/>
      <MiningField name="class" usageType="predicted"/>
    </MiningSchema>
    <ComparisonMeasure kind="distance">
      <squaredEuclidean/>
    </ComparisonMeasure>
    <ClusteringField field="sepal_length" compareFunction="absDiff"/>
    <ClusteringField field="sepal_width" compareFunction="absDiff"/>
    <ClusteringField field="petal_length" compareFunction="absDiff"/>
    <ClusteringField field="petal_width" compareFunction="absDiff"/>
    <Cluster name="virginica" size="32">
      <Array n="4" type="real">6.9125000000000005 3.099999999999999 5.846874999999999 2.1312499999999996</Array>
    </Cluster>
    <Cluster name="versicolor" size="41">
      <Array n="4" type="real">6.23658536585366 2.8585365853658535 4.807317073170731 1.6219512195121943</Array>
    </Cluster>
    <Cluster name="setosa" size="50">
      <Array n="4" type="real">5.005999999999999 3.4180000000000006 1.464 0.2439999999999999</Array>
    </Cluster>
    <Cluster name="unknown" size="27">
      <Array n="4" type="real">5.529629629629629 2.6222222222222222 3.940740740740741 1.2185185185185188</Array>
    </Cluster>
  </ClusteringModel>
</PMML>